Merge pull request #258 from sbokov/AddingConfidence

Added confidence support to disparity filtering
10 years ago · c05a7e0182
parent 8a05bdbcc9 7c421ba2b3
commit c05a7e0182
19 changed files with 2101 additions and 602 deletions
--- a/modules/ximgproc/doc/ximgproc.bib
+++ b/modules/ximgproc/doc/ximgproc.bib
@ -55,3 +55,25 @@
  year={2013},
  organization={IEEE}
 }
+
+@article{Min2014,
+  title={Fast global image smoothing based on weighted least squares},
+  author={Min, Dongbo and Choi, Sunghwan and Lu, Jiangbo and Ham, Bumsub and Sohn, Kwanghoon and Do, Minh N},
+  journal={Image Processing, IEEE Transactions on},
+  volume={23},
+  number={12},
+  pages={5638--5653},
+  year={2014},
+  publisher={IEEE}
+}
+
+@inproceedings{Farbman2008,
+  title={Edge-preserving decompositions for multi-scale tone and detail manipulation},
+  author={Farbman, Zeev and Fattal, Raanan and Lischinski, Dani and Szeliski, Richard},
+  booktitle={ACM Transactions on Graphics (TOG)},
+  volume={27},
+  number={3},
+  pages={67},
+  year={2008},
+  organization={ACM}
+}
--- a/modules/ximgproc/include/opencv2/ximgproc.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc.hpp
@ -38,6 +38,7 @@
 #define __OPENCV_XIMGPROC_HPP__

 #include "ximgproc/edge_filter.hpp"
+#include "ximgproc/disparity_filter.hpp"
 #include "ximgproc/structured_edge_detection.hpp"
 #include "ximgproc/seeds.hpp"

--- a/modules/ximgproc/include/opencv2/ximgproc/disparity_filter.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/disparity_filter.hpp
@ -43,49 +43,147 @@
 namespace cv {
 namespace ximgproc {

+//! @addtogroup ximgproc_filters
+//! @{
+
+/** @brief Main interface for all disparity map filters.
+ */
 class CV_EXPORTS_W DisparityFilter : public Algorithm
 {
 public:
-    CV_WRAP virtual void filter(InputArray disparity_map, InputArray left_view, OutputArray filtered_disparity_map,Rect ROI) = 0;
-};

-/////////////////////////////////////////////////////////////////////////////////////////////////
+    /** @brief Apply filtering to the disparity map.

-class CV_EXPORTS_W DisparityDTFilter : public DisparityFilter
-{
-public:
-    CV_WRAP virtual double getSigmaSpatial() = 0;
-    CV_WRAP virtual void setSigmaSpatial(double _sigmaSpatial) = 0;
-    CV_WRAP virtual double getSigmaColor() = 0;
-    CV_WRAP virtual void setSigmaColor(double _sigmaColor) = 0;
-};
+    @param disparity_map_left disparity map of the left view, 1 channel, CV_16S type. Implicitly assumes that disparity
+    values are scaled by 16 (one-pixel disparity corresponds to the value of 16 in the disparity map). Disparity map
+    can have any resolution, it will be automatically resized to fit left_view resolution.

-CV_EXPORTS_W
-Ptr<DisparityDTFilter> createDisparityDTFilter();
+    @param left_view left view of the original stereo-pair to guide the filtering process, 8-bit single-channel
+    or three-channel image.

-class CV_EXPORTS_W DisparityGuidedFilter : public DisparityFilter
-{
-public:
-    CV_WRAP virtual double getEps() = 0;
-    CV_WRAP virtual void setEps(double _eps) = 0;
-    CV_WRAP virtual int getRadius() = 0;
-    CV_WRAP virtual void setRadius(int _radius) = 0;
-};
+    @param filtered_disparity_map output disparity map.

-CV_EXPORTS_W
-Ptr<DisparityGuidedFilter> createDisparityGuidedFilter();
+    @param ROI region of the disparity map to filter.

+    @param disparity_map_right optional argument, some implementations might also use the disparity map
+    of the right view to compute confidence maps, for instance.
+
+    @param right_view optional argument, some implementations might also use the right view of the original
+    stereo-pair.
+     */
+    CV_WRAP virtual void filter(InputArray disparity_map_left, InputArray left_view, OutputArray filtered_disparity_map, Rect ROI, InputArray disparity_map_right = Mat(), InputArray right_view = Mat()) = 0;
+};
+
+/** @brief Disparity map filter based on Weighted Least Squares filter (in form of Fast Global Smoother that
+is a lot faster than traditional Weighted Least Squares filter implementations) and optional use of
+left-right-consistency-based confidence to refine the results in half-occlusions and uniform areas.
+ */
 class CV_EXPORTS_W DisparityWLSFilter : public DisparityFilter
 {
+public:
+    /** filter parameters */
+
+    /** @brief Lambda is a parameter defining the amount of regularization during filtering. Larger values force
+    filtered disparity map edges to adhere more to source image edges. Typical value is 8000.
+     */
    CV_WRAP virtual double getLambda() = 0;
+    /** @see getLambda */
    CV_WRAP virtual void setLambda(double _lambda) = 0;
+    /** @brief SigmaColor is a parameter defining how sensitive the filtering process is to source image edges.
+    Large values can lead to disparity leakage through low-contrast edges. Small values can make the filter too
+    sensitive to noise and textures in the source image. Typical values range from 0.8 to 2.0.
+     */
    CV_WRAP virtual double getSigmaColor() = 0;
+    /** @see getSigmaColor */
    CV_WRAP virtual void setSigmaColor(double _sigma_color) = 0;
+
+    /** confidence-related parameters */
+
+    /** @brief LRCthresh is a threshold of disparity difference used in left-right-consistency check during
+    confidence map computation. The default value of 24 (1.5 pixels) is virtually always good enough.
+     */
+    CV_WRAP virtual int getLRCthresh() = 0;
+    /** @see getLRCthresh */
+    CV_WRAP virtual void setLRCthresh(int _LRC_thresh) = 0;
+    /** @brief DepthDiscontinuityRadius is a parameter used in confidence computation. It defines the size of
+    low-confidence regions around depth discontinuities. For typical window sizes used in stereo matching the
+    optimal value is around 5.
+     */
+    CV_WRAP virtual int getDepthDiscontinuityRadius() = 0;
+    /** @see getDepthDiscontinuityRadius */
+    CV_WRAP virtual void setDepthDiscontinuityRadius(int _disc_radius) = 0;
+    /** @brief Get the confidence map that was used in the last filter call. It is a CV_32F one-channel image
+    with values ranging from 0.0 (totally untrusted regions of the raw disparity map) to 255.0 (regions containing
+    correct disparity values with a high degree of confidence).
+     */
+    CV_WRAP virtual Mat getConfidenceMap() = 0;
+
 };

+/** @brief Factory method, create instance of DisparityWLSFilter and execute the initialization routines.
+
+@param use_confidence filtering with confidence requires two disparity maps (for the left and right views) and is
+approximately two times slower. However, quality is typically significantly better.
+*/
 CV_EXPORTS_W
-Ptr<DisparityWLSFilter> createDisparityWLSFilter();
+Ptr<DisparityWLSFilter> createDisparityWLSFilter(bool use_confidence);
+
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+
+/** @brief Function for reading ground truth disparity maps. Supports basic Middlebury
+and MPI-Sintel formats. Note that the resulting disparity map is scaled by 16.
+
+@param src_path path to the image, containing ground-truth disparity map
+
+@param dst output disparity map, CV_16S depth
+
+@result returns zero if successfully read the ground truth
+ */
+CV_EXPORTS
+int readGT(String src_path,OutputArray dst);
+
+/** @brief Function for computing mean square error for disparity maps
+
+@param GT ground truth disparity map
+
+@param src disparity map to evaluate
+
+@param ROI region of interest
+
+@result returns mean square error between GT and src
+ */
+CV_EXPORTS
+double computeMSE(InputArray GT, InputArray src, Rect ROI);
+
+/** @brief Function for computing the percent of "bad" pixels in the disparity map
+(pixels where error is higher than a specified threshold)
+
+@param GT ground truth disparity map
+
+@param src disparity map to evaluate
+
+@param ROI region of interest
+
+@param thresh threshold used to determine "bad" pixels
+
+@result returns mean square error between GT and src
+ */
+CV_EXPORTS
+double computeBadPixelPercent(InputArray GT, InputArray src, Rect ROI, int thresh=24/*1.5 pixels*/);
+
+/** @brief Function for creating a disparity map visualization (clamped CV_8U image)
+
+@param src input disparity map (CV_16S depth)
+
+@param dst output visualization
+
+@param scale disparity map will be multiplied by this value for visualization
+ */
+CV_EXPORTS
+void getDisparityVis(InputArray src,OutputArray dst,double scale=1.0);

+//! @}
 }
 }
 #endif
--- a/modules/ximgproc/include/opencv2/ximgproc/edge_filter.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/edge_filter.hpp
@ -316,19 +316,69 @@ proportional to sigmaSpace .
 CV_EXPORTS_W
 void jointBilateralFilter(InputArray joint, InputArray src, OutputArray dst, int d, double sigmaColor, double sigmaSpace, int borderType = BORDER_DEFAULT);

-//! @}
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+

-class CV_EXPORTS_W WeightedLeastSquaresFilter : public Algorithm
+/** @brief Interface for implementations of Fast Global Smoother filter.
+
+For more details about this filter see @cite Min2014 and @cite Farbman2008 .
+*/
+class CV_EXPORTS_W FastGlobalSmootherFilter : public Algorithm
 {
 public:
+    /** @brief Apply smoothing operation to the source image.
+
+    @param src source image for filtering with unsigned 8-bit or signed 16-bit or floating-point 32-bit depth and up to 4 channels.
+
+    @param dst destination image.
+    */
    CV_WRAP virtual void filter(InputArray src, OutputArray dst) = 0;
-    virtual ~WeightedLeastSquaresFilter();
 };

-CV_EXPORTS_W Ptr<WeightedLeastSquaresFilter> createWeightedLeastSquaresFilter(InputArray guide, double lambda, double sigma_color, int num_iter=3);
+/** @brief Factory method, create instance of FastGlobalSmootherFilter and execute the initialization routines.
+
+@param guide image serving as guide for filtering. It should have 8-bit depth and either 1 or 3 channels.
+
+@param lambda parameter defining the amount of regularization
+
+@param sigma_color parameter, that is similar to color space sigma in bilateralFilter.
+
+@param lambda_attenuation internal parameter, defining how much lambda decreases after each iteration. Normally,
+it should be 0.25. Setting it to 1.0 may lead to streaking artifacts.

-CV_EXPORTS_W void weightedLeastSquaresFilter(InputArray guide, InputArray src, OutputArray dst, double lambda, double sigma_color, int num_iter=3);
+@param num_iter number of iterations used for filtering, 3 is usually enough.

+For more details about Fast Global Smoother parameters, see the original paper @cite Min2014. However, please note that
+there are several differences. Lambda attenuation described in the paper is implemented a bit differently so do not
+expect the results to be identical to those from the paper; sigma_color values from the paper should be multiplied by 255.0 to
+achieve the same effect. Also, in case of image filtering where source and guide image are the same, authors
+propose to dynamically update the guide image after each iteration. To maximize the performance this feature
+was not implemented here.
+*/
+CV_EXPORTS_W Ptr<FastGlobalSmootherFilter> createFastGlobalSmootherFilter(InputArray guide, double lambda, double sigma_color, double lambda_attenuation=0.25, int num_iter=3);
+
+/** @brief Simple one-line Fast Global Smoother filter call. If you have multiple images to filter with the same
+guide then use FastGlobalSmootherFilter interface to avoid extra computations.
+
+@param guide image serving as guide for filtering. It should have 8-bit depth and either 1 or 3 channels.
+
+@param src source image for filtering with unsigned 8-bit or signed 16-bit or floating-point 32-bit depth and up to 4 channels.
+
+@param dst destination image.
+
+@param lambda parameter defining the amount of regularization
+
+@param sigma_color parameter, that is similar to color space sigma in bilateralFilter.
+
+@param lambda_attenuation internal parameter, defining how much lambda decreases after each iteration. Normally,
+it should be 0.25. Setting it to 1.0 may lead to streaking artifacts.
+
+@param num_iter number of iterations used for filtering, 3 is usually enough.
+*/
+CV_EXPORTS_W void fastGlobalSmootherFilter(InputArray guide, InputArray src, OutputArray dst, double lambda, double sigma_color, double lambda_attenuation=0.25, int num_iter=3);
+
+//! @}
 }
 }
 #endif
--- a/modules/ximgproc/perf/perf_disparity_wls_filter.cpp
+++ b/modules/ximgproc/perf/perf_disparity_wls_filter.cpp
@ -0,0 +1,169 @@
+/*
+ *  By downloading, copying, installing or using the software you agree to this license.
+ *  If you do not agree to this license, do not download, install,
+ *  copy or use the software.
+ *
+ *
+ *  License Agreement
+ *  For Open Source Computer Vision Library
+ *  (3 - clause BSD License)
+ *
+ *  Redistribution and use in source and binary forms, with or without modification,
+ *  are permitted provided that the following conditions are met :
+ *
+ *  *Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *  this list of conditions and the following disclaimer in the documentation
+ *  and / or other materials provided with the distribution.
+ *
+ *  * Neither the names of the copyright holders nor the names of the contributors
+ *  may be used to endorse or promote products derived from this software
+ *  without specific prior written permission.
+ *
+ *  This software is provided by the copyright holders and contributors "as is" and
+ *  any express or implied warranties, including, but not limited to, the implied
+ *  warranties of merchantability and fitness for a particular purpose are disclaimed.
+ *  In no event shall copyright holders or contributors be liable for any direct,
+ *  indirect, incidental, special, exemplary, or consequential damages
+ *  (including, but not limited to, procurement of substitute goods or services;
+ *  loss of use, data, or profits; or business interruption) however caused
+ *  and on any theory of liability, whether in contract, strict liability,
+ *  or tort(including negligence or otherwise) arising in any way out of
+ *  the use of this software, even if advised of the possibility of such damage.
+ */
+
+#include "perf_precomp.hpp"
+#include "opencv2/ximgproc/disparity_filter.hpp"
+
+namespace cvtest
+{
+
+using std::tr1::tuple;
+using std::tr1::get;
+using namespace perf;
+using namespace testing;
+using namespace cv;
+using namespace cv::ximgproc;
+
+void MakeArtificialExample(RNG rng, Mat& dst_left_view, Mat& dst_left_disparity_map, Mat& dst_right_disparity_map, Rect& dst_ROI);
+
+CV_ENUM(GuideTypes, CV_8UC3);
+CV_ENUM(SrcTypes, CV_16S);
+typedef tuple<GuideTypes, SrcTypes, Size, bool, bool> DisparityWLSParams;
+
+typedef TestBaseWithParam<DisparityWLSParams> DisparityWLSFilterPerfTest;
+
+PERF_TEST_P( DisparityWLSFilterPerfTest, perf, Combine(GuideTypes::all(), SrcTypes::all(), Values(sz720p), Values(true,false), Values(true,false)) )
+{
+    RNG rng(0);
+
+    DisparityWLSParams params = GetParam();
+    int guideType        = get<0>(params);
+    int srcType          = get<1>(params);
+    Size sz              = get<2>(params);
+    bool use_conf        = get<3>(params);
+    bool use_downscale   = get<4>(params);
+
+    Mat guide(sz, guideType);
+    Mat disp_left(sz, srcType);
+    Mat disp_right(sz, srcType);
+    Mat dst(sz, srcType);
+    Rect ROI;
+
+    MakeArtificialExample(rng,guide,disp_left,disp_right,ROI);
+    if(use_downscale)
+    {
+        resize(disp_left,disp_left,Size(),0.5,0.5);
+        disp_left/=2;
+        resize(disp_right,disp_right,Size(),0.5,0.5);
+        disp_right/=2;
+        ROI = Rect(ROI.x/2,ROI.y/2,ROI.width/2,ROI.height/2);
+    }
+
+    cv::setNumThreads(cv::getNumberOfCPUs());
+    TEST_CYCLE_N(10)
+    {
+        Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(use_conf);
+        wls_filter->filter(disp_left,guide,dst,ROI,disp_right);
+    }
+
+    SANITY_CHECK(dst);
+}
+
+void MakeArtificialExample(RNG rng, Mat& dst_left_view, Mat& dst_left_disparity_map, Mat& dst_right_disparity_map, Rect& dst_ROI)
+{
+    int w = dst_left_view.cols;
+    int h = dst_left_view.rows;
+
+    //params:
+    unsigned char bg_level = (unsigned char)rng.uniform(0.0,255.0);
+    unsigned char fg_level = (unsigned char)rng.uniform(0.0,255.0);
+    int rect_width  = (int)rng.uniform(w/16,w/2);
+    int rect_height = (int)rng.uniform(h/16,h/2);
+    int rect_disparity = (int)(0.15*w); //typical maximum disparity value
+    double sigma = 6.0;
+
+    int rect_x_offset = (w-rect_width) /2;
+    int rect_y_offset = (h-rect_height)/2;
+
+    if(dst_left_view.channels()==3)
+        dst_left_view = Scalar(Vec3b(bg_level,bg_level,bg_level));
+    else
+        dst_left_view = Scalar(bg_level);
+    dst_left_disparity_map  = Scalar(0);
+    dst_right_disparity_map = Scalar(0);
+    Mat dst_left_view_rect =          Mat(dst_left_view,         Rect(rect_x_offset,rect_y_offset,rect_width,rect_height));
+    Mat dst_left_disparity_map_rect = Mat(dst_left_disparity_map,Rect(rect_x_offset,rect_y_offset,rect_width,rect_height));
+    if(dst_left_view.channels()==3)
+        dst_left_view_rect = Scalar(Vec3b(fg_level,fg_level,fg_level));
+    else
+        dst_left_view_rect = Scalar(fg_level);
+    dst_left_disparity_map_rect = Scalar(16*rect_disparity);
+
+    rect_x_offset-=rect_disparity;
+    Mat dst_right_disparity_map_rect = Mat(dst_right_disparity_map,Rect(rect_x_offset,rect_y_offset,rect_width,rect_height));
+    dst_right_disparity_map_rect = Scalar(-16*rect_disparity);
+
+    //add some gaussian noise:
+    unsigned char *l;
+    short *ldisp, *rdisp;
+    for(int i=0;i<h;i++)
+    {
+        l = dst_left_view.ptr(i);
+        ldisp = (short*)dst_left_disparity_map.ptr(i);
+        rdisp = (short*)dst_right_disparity_map.ptr(i);
+
+        if(dst_left_view.channels()==3)
+        {
+            for(int j=0;j<w;j++)
+            {
+                l[0] = saturate_cast<unsigned char>(l[0] + rng.gaussian(sigma));
+                l[1] = saturate_cast<unsigned char>(l[1] + rng.gaussian(sigma));
+                l[2] = saturate_cast<unsigned char>(l[2] + rng.gaussian(sigma));
+                l+=3;
+                ldisp[0] = saturate_cast<short>(ldisp[0] + rng.gaussian(sigma));
+                ldisp++;
+                rdisp[0] = saturate_cast<short>(rdisp[0] + rng.gaussian(sigma));
+                rdisp++;
+            }
+        }
+        else
+        {
+            for(int j=0;j<w;j++)
+            {
+                l[0] = saturate_cast<unsigned char>(l[0] + rng.gaussian(sigma));
+                l++;
+                ldisp[0] = saturate_cast<short>(ldisp[0] + rng.gaussian(sigma));
+                ldisp++;
+                rdisp[0] = saturate_cast<short>(rdisp[0] + rng.gaussian(sigma));
+                rdisp++;
+            }
+        }
+    }
+
+    dst_ROI = Rect(rect_disparity,0,w-rect_disparity,h);
+}
+
+}
--- a/modules/ximgproc/perf/perf_fgs_filter.cpp
+++ b/modules/ximgproc/perf/perf_fgs_filter.cpp
@ -0,0 +1,81 @@
+/*
+ *  By downloading, copying, installing or using the software you agree to this license.
+ *  If you do not agree to this license, do not download, install,
+ *  copy or use the software.
+ *
+ *
+ *  License Agreement
+ *  For Open Source Computer Vision Library
+ *  (3 - clause BSD License)
+ *
+ *  Redistribution and use in source and binary forms, with or without modification,
+ *  are permitted provided that the following conditions are met :
+ *
+ *  *Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *  this list of conditions and the following disclaimer in the documentation
+ *  and / or other materials provided with the distribution.
+ *
+ *  * Neither the names of the copyright holders nor the names of the contributors
+ *  may be used to endorse or promote products derived from this software
+ *  without specific prior written permission.
+ *
+ *  This software is provided by the copyright holders and contributors "as is" and
+ *  any express or implied warranties, including, but not limited to, the implied
+ *  warranties of merchantability and fitness for a particular purpose are disclaimed.
+ *  In no event shall copyright holders or contributors be liable for any direct,
+ *  indirect, incidental, special, exemplary, or consequential damages
+ *  (including, but not limited to, procurement of substitute goods or services;
+ *  loss of use, data, or profits; or business interruption) however caused
+ *  and on any theory of liability, whether in contract, strict liability,
+ *  or tort(including negligence or otherwise) arising in any way out of
+ *  the use of this software, even if advised of the possibility of such damage.
+ */
+
+#include "perf_precomp.hpp"
+
+namespace cvtest
+{
+
+using std::tr1::tuple;
+using std::tr1::get;
+using namespace perf;
+using namespace testing;
+using namespace cv;
+using namespace cv::ximgproc;
+
+CV_ENUM(GuideTypes, CV_8UC1, CV_8UC3);
+CV_ENUM(SrcTypes, CV_8UC1, CV_8UC3, CV_16SC1, CV_16SC3, CV_32FC1, CV_32FC3);
+typedef tuple<GuideTypes, SrcTypes, Size> FGSParams;
+
+typedef TestBaseWithParam<FGSParams> FGSFilterPerfTest;
+
+PERF_TEST_P( FGSFilterPerfTest, perf, Combine(GuideTypes::all(), SrcTypes::all(), Values(sz720p)) )
+{
+    RNG rng(0);
+
+    FGSParams params = GetParam();
+    int guideType   = get<0>(params);
+    int srcType     = get<1>(params);
+    Size sz         = get<2>(params);
+
+    Mat guide(sz, guideType);
+    Mat src(sz, srcType);
+    Mat dst(sz, srcType);
+
+    declare.in(guide, src, WARMUP_RNG).out(dst).tbb_threads(cv::getNumberOfCPUs());
+
+    cv::setNumThreads(cv::getNumberOfCPUs());
+    TEST_CYCLE_N(10)
+    {
+        double lambda = rng.uniform(500.0, 10000.0);
+        double sigma  = rng.uniform(1.0, 100.0);
+        fastGlobalSmootherFilter(guide,src,dst,lambda,sigma);
+    }
+
+    SANITY_CHECK(dst);
+}
+
+}
--- a/modules/ximgproc/samples/disparity_filtering.cpp
+++ b/modules/ximgproc/samples/disparity_filtering.cpp
@ -4,215 +4,306 @@
 #include "opencv2/highgui.hpp"
 #include "opencv2/core/utility.hpp"
 #include "opencv2/ximgproc/disparity_filter.hpp"
-#include <stdio.h>
+#include <iostream>
 #include <string>
-#include <vector>
-#include <map>
-
-#if defined(_WIN32)
-#include <direct.h>
-#else 
-#include <sys/stat.h>
-#endif

 using namespace cv;
 using namespace cv::ximgproc;
 using namespace std;

-#define UNKNOWN_DISPARITY 16320
- 
-static void print_help()
-{
-    printf("\nDemo for disparity filtering, evaluating speed and performance of different filters\n");
-    printf("\nUsage: disparity_filtering.exe <path_to_dataset_folder> <path_to_results_folder>\n");
-}
+Rect computeROI(Size2i src_sz, Ptr<StereoMatcher> matcher_instance);
+
+const String keys =
+    "{help h usage ? |                  | print this message                                                }"
+    "{@left          |../data/aloeL.jpg | left view of the stereopair                                       }"
+    "{@right         |../data/aloeR.jpg | right view of the stereopair                                      }"
+    "{GT             |../data/aloeGT.png| optional ground-truth disparity (MPI-Sintel or Middlebury format) }"
+    "{dst_path       |None              | optional path to save the resulting filtered disparity map        }"
+    "{dst_raw_path   |None              | optional path to save raw disparity map before filtering          }"
+    "{algorithm      |bm                | stereo matching method (bm or sgbm)                               }"
+    "{filter         |wls_conf          | used post-filtering (wls_conf or wls_no_conf)                     }"
+    "{no-display     |                  | don't display results                                             }"
+    "{no-downscale   |                  | prevent stereo matching on downscaled views                       }"
+    "{dst_conf_path  |None              | optional path to save the confidence map used in filtering        }"
+    "{vis_mult       |1.0               | coefficient used to scale disparity map visualizations            }"
+    "{max_disparity  |160               | parameter of stereo matching                                      }"
+    "{window_size    |19                | parameter of stereo matching                                      }"
+    "{wls_lambda     |8000.0            | parameter of post-filtering                                       }"
+    "{wls_sigma      |1.0               | parameter of post-filtering                                       }"
+    ;

-struct dataset_entry
+int main(int argc, char** argv)
 {
-    string name;
-    string dataset_folder;
-    string left_file,right_file,GT_file;
-    dataset_entry(string _dataset_folder): dataset_folder(_dataset_folder){}
-    void readEntry(Mat& dst_left,Mat& dst_right,Mat& dst_GT)
+    CommandLineParser parser(argc,argv,keys);
+    parser.about("Disparity Filtering Demo");
+    if (parser.has("help"))
    {
-        dst_left  = imread(dataset_folder+"/"+left_file,  IMREAD_COLOR);
-        dst_right = imread(dataset_folder+"/"+right_file, IMREAD_COLOR);
-        Mat raw_disp = imread(dataset_folder+"/"+GT_file, IMREAD_COLOR);
-        dst_GT = Mat(raw_disp.rows,raw_disp.cols,CV_16S);
-        for(int i=0;i<raw_disp.rows;i++)
-            for(int j=0;j<raw_disp.cols;j++)
-            {
-                Vec3b bgrPixel = raw_disp.at<Vec3b>(i, j);
-                dst_GT.at<short>(i,j) = 64*bgrPixel.val[2]+bgrPixel.val[1]/4; //16-multiplied disparity
-            }
+        parser.printMessage();
+        return 0;
    }
-};

-struct config
-{
-    Ptr<StereoMatcher> matcher_instance;
-    Ptr<DisparityFilter> filter_instance;
-    config(Ptr<StereoMatcher> _matcher_instance,Ptr<DisparityFilter> _filter_instance)
+    String left_im = parser.get<String>(0);
+    String right_im = parser.get<String>(1);
+    String GT_path = parser.get<String>("GT");
+
+    String dst_path = parser.get<String>("dst_path");
+    String dst_raw_path = parser.get<String>("dst_raw_path");
+    String dst_conf_path = parser.get<String>("dst_conf_path");
+    String algo = parser.get<String>("algorithm");
+    String filter = parser.get<String>("filter");
+    bool no_display = parser.has("no-display");
+    bool no_downscale = parser.has("no-downscale");
+    int max_disp = parser.get<int>("max_disparity");
+    int wsize = parser.get<int>("window_size");
+    double lambda = parser.get<double>("wls_lambda");
+    double sigma  = parser.get<double>("wls_sigma");
+    double vis_mult = parser.get<double>("vis_mult");
+
+    if (!parser.check())
    {
-        matcher_instance = _matcher_instance;
-        filter_instance = _filter_instance;
+        parser.printErrors();
+        return -1;
    }
-    config() {}
-};

-void operator>>(const FileNode& node,dataset_entry& entry);
-double computeMSE(Mat& GT, Mat& src, Rect ROI);
-double computeBadPixelPercent(Mat& GT, Mat& src, Rect ROI, int thresh=24/*1.5 pixels*/);
-void getDisparityVis(Mat& disparity_map,Mat& dst);
-Rect computeROI(Size2i src_sz, Ptr<StereoMatcher> matcher_instance);
-void setConfigsForTesting(map<string,config>& cfgs);
-void CreateDir(string path);
-
-int main(int argc, char** argv)
-{
-    if(argc < 3)
+    Mat left  = imread(left_im ,IMREAD_COLOR);
+    if ( left.empty() )
    {
-        print_help();
-        return 0;
+        cout<<"Cannot read image file: "<<left_im;
+        return -1;
    }
-    string dataset_folder(argv[1]);
-    string res_folder(argv[2]);

-    map<string,config> configs_for_testing;
-    setConfigsForTesting(configs_for_testing);
-    CreateDir(res_folder);
+    Mat right = imread(right_im,IMREAD_COLOR);
+    if ( right.empty() )
+    {
+        cout<<"Cannot read image file: "<<right_im;
+        return -1;
+    }

-    for (map<string,config>::iterator cfg = configs_for_testing.begin(); cfg != configs_for_testing.end(); cfg++)
+    bool noGT;
+    Mat GT_disp;
+    if (GT_path=="../data/aloeGT.png" && left_im!="../data/aloeL.jpg")
+        noGT=true;
+    else
    {
-        string vis_folder = res_folder+"/vis_"+cfg->first;
-        CreateDir(vis_folder);
-
-        string cfg_file_name = res_folder+"/"+cfg->first+"_res.csv";
-        FILE* cur_cfg_res_file = fopen(cfg_file_name.c_str(),"w");
-        fprintf(cur_cfg_res_file,"Name,MSE,MSE after postfiltering,Percent bad,Percent bad after postfiltering,Matcher Execution Time(s),Filter Execution Time(s)\n");
-
-        printf("Processing configuration: %s\n",cfg->first.c_str());
-
-        FileStorage fs(dataset_folder + "/_dataset.xml", FileStorage::READ);
-        FileNode n = fs["data_set"];
-        double MSE_pre,percent_pre,MSE_post,percent_post,matching_time,filtering_time;
-        double average_MSE_pre=0,average_percent_pre=0,average_MSE_post=0,
-            average_percent_post=0,average_matching_time=0,average_filtering_time=0;
-        int cnt = 0;
-        for (FileNodeIterator it = n.begin(); it != n.end(); it++)
+        noGT=false;
+        if(readGT(GT_path,GT_disp)!=0)
        {
-            dataset_entry entry(dataset_folder);
-            (*it)>>entry;
-            printf("%s ",entry.name.c_str());
-            Mat left,right,GT;
-            entry.readEntry(left,right,GT);
-            Mat raw_disp;
-            Mat left_gray; cvtColor(left, left_gray, COLOR_BGR2GRAY );
-            Mat right_gray; cvtColor(right, right_gray, COLOR_BGR2GRAY );
-            matching_time = (double)getTickCount();
-            cfg->second.matcher_instance->compute(left_gray,right_gray,raw_disp);
-            matching_time = ((double)getTickCount() - matching_time)/getTickFrequency();
-
-            Rect ROI = computeROI(left.size(),cfg->second.matcher_instance);
-            Mat filtered_disp;
-            filtering_time = (double)getTickCount();
-            cfg->second.filter_instance->filter(raw_disp,left,filtered_disp,ROI);
-            filtering_time = ((double)getTickCount() - filtering_time)/getTickFrequency();
+            cout<<"Cannot read ground truth image file: "<<GT_path<<endl;
+            return -1;
+        }
+    }

+    Mat left_for_matcher, right_for_matcher;
+    Mat left_disp,right_disp;
+    Mat filtered_disp;
+    Mat conf_map = Mat(left.rows,left.cols,CV_8U);
+    conf_map = Scalar(255);
+    Rect ROI;
+    double matching_time, filtering_time;
+    if(max_disp<=0 || max_disp%16!=0)
+    {
+        cout<<"Incorrect max_disparity value: it should be positive and divisible by 16";
+        return -1;
+    }
+    if(wsize<=0 || wsize%2!=1)
+    {
+        cout<<"Incorrect window_size value: it should be positive and odd";
+        return -1;
+    }
+    if(filter=="wls_conf")
+    {
+        if(!no_downscale)
+        {
+            wsize = wsize/2;
+            if(wsize%2==0) wsize++;
+            max_disp/=2;
+            if(max_disp%16!=0)
+                max_disp += 16-(max_disp%16);
+            resize(left ,left_for_matcher ,Size(),0.5,0.5);
+            resize(right,right_for_matcher,Size(),0.5,0.5);
+        }
+        else
+        {
+            left_for_matcher  = left.clone();
+            right_for_matcher = right.clone();
+        }

-            MSE_pre = computeMSE(GT,raw_disp,ROI);
-            percent_pre = computeBadPixelPercent(GT,raw_disp,ROI);
-            MSE_post = computeMSE(GT,filtered_disp,ROI);
-            percent_post = computeBadPixelPercent(GT,filtered_disp,ROI);

-            fprintf(cur_cfg_res_file,"%s,%.1f,%.1f,%.1f,%.1f,%.3f,%.3f\n",entry.name.c_str(),MSE_pre,MSE_post,
-                percent_pre,percent_post,matching_time,filtering_time);
+        if(algo=="bm")
+        {
+            Ptr<StereoBM> left_matcher  = StereoBM::create(max_disp,wsize);
+            left_matcher->setMinDisparity(0);
+            Ptr<StereoBM> right_matcher = StereoBM::create(max_disp,wsize);
+            right_matcher->setMinDisparity(-max_disp+1);
+            left_matcher->setTextureThreshold(0);
+            left_matcher->setUniquenessRatio(0);
+            right_matcher->setTextureThreshold(0);
+            right_matcher->setUniquenessRatio(0);
+            cvtColor(left_for_matcher,  left_for_matcher, COLOR_BGR2GRAY);
+            cvtColor(right_for_matcher, right_for_matcher, COLOR_BGR2GRAY);
+            ROI = computeROI(left_for_matcher.size(),left_matcher);

-            average_matching_time+=matching_time; average_filtering_time+=filtering_time;
-            average_MSE_pre+=MSE_pre; average_percent_pre+=percent_pre;
-            average_MSE_post+=MSE_post; average_percent_post+=percent_post;
-            cnt++;
+            matching_time = (double)getTickCount();
+            left_matcher-> compute(left_for_matcher, right_for_matcher,left_disp);
+            right_matcher->compute(right_for_matcher,left_for_matcher, right_disp);
+            matching_time = ((double)getTickCount() - matching_time)/getTickFrequency();
+        }
+        else if(algo=="sgbm")
+        {
+            Ptr<StereoSGBM> left_matcher  = StereoSGBM::create(0,max_disp,wsize);
+            left_matcher->setMinDisparity(0);
+            Ptr<StereoSGBM> right_matcher = StereoSGBM::create(-max_disp+1,max_disp,wsize);
+            left_matcher->setUniquenessRatio(0);
+            left_matcher->setDisp12MaxDiff(1000000);
+            left_matcher->setSpeckleWindowSize(0);
+            right_matcher->setUniquenessRatio(0);
+            right_matcher->setDisp12MaxDiff(1000000);
+            right_matcher->setSpeckleWindowSize(0);
+            ROI = computeROI(left_for_matcher.size(),left_matcher);

-            // dump visualizations:
-            imwrite(vis_folder + "/" + entry.name + "_left.png",left);
-            Mat GT_vis,raw_disp_vis,filtered_disp_vis;
-            getDisparityVis(GT,GT_vis);
-            getDisparityVis(raw_disp,raw_disp_vis);
-            getDisparityVis(filtered_disp,filtered_disp_vis);
-            imwrite(vis_folder + "/" + entry.name + "_disparity_GT.png",GT_vis);
-            imwrite(vis_folder + "/" + entry.name + "_disparity_raw.png",raw_disp_vis);
-            imwrite(vis_folder + "/" + entry.name + "_disparity_filtered.png",filtered_disp_vis);
+            matching_time = (double)getTickCount();
+            left_matcher-> compute(left_for_matcher, right_for_matcher,left_disp);
+            right_matcher->compute(right_for_matcher,left_for_matcher, right_disp);
+            matching_time = ((double)getTickCount() - matching_time)/getTickFrequency();
+        }
+        else
+        {
+            cout<<"Unsupported algorithm";
+            return -1;
+        }

-            printf("- Done\n");
+        Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(true);
+        wls_filter->setLambda(lambda);
+        wls_filter->setSigmaColor(sigma);
+        filtering_time = (double)getTickCount();
+        wls_filter->filter(left_disp,left,filtered_disp,ROI,right_disp);
+        filtering_time = ((double)getTickCount() - filtering_time)/getTickFrequency();
+        conf_map = wls_filter->getConfidenceMap();

+        if(!no_downscale)
+        {
+            // upscale raw disparity and ROI back for a proper comparison:
+            resize(left_disp,left_disp,Size(),2.0,2.0);
+            left_disp = left_disp*2.0;
+            ROI = Rect(ROI.x*2,ROI.y*2,ROI.width*2,ROI.height*2);
        }
-        fprintf(cur_cfg_res_file,"%s,%.1f,%.1f,%.1f,%.1f,%.3f,%.3f\n","average",average_MSE_pre/cnt,
-            average_MSE_post/cnt,average_percent_pre/cnt,average_percent_post/cnt,
-            average_matching_time/cnt,average_filtering_time/cnt);
-        fclose(cur_cfg_res_file);
    }
-    return 0;
-}
-
-void operator>>(const FileNode& node,dataset_entry& entry) 
-{
-    node["name"] >> entry.name;
-    node["left_file"] >> entry.left_file;
-    node["right_file"] >> entry.right_file;
-    node["GT_file"] >> entry.GT_file;
-}
+    else if(filter=="wls_no_conf")
+    {
+        left_for_matcher  = left.clone();
+        right_for_matcher = right.clone();

-double computeMSE(Mat& GT, Mat& src, Rect ROI)
-{
-    double res = 0;
-    Mat GT_ROI(GT,ROI);
-    Mat src_ROI(src,ROI);
-    int cnt=0;
-    for(int i=0;i<src_ROI.rows;i++)
-        for(int j=0;j<src_ROI.cols;j++)
+        if(algo=="bm")
        {
-            if(GT_ROI.at<short>(i,j)!=UNKNOWN_DISPARITY)
-            {
-                res += (GT_ROI.at<short>(i,j) - src_ROI.at<short>(i,j))*(GT_ROI.at<short>(i,j) - src_ROI.at<short>(i,j));
-                cnt++;
-            }
+            Ptr<StereoBM> matcher  = StereoBM::create(max_disp,wsize);
+            matcher->setTextureThreshold(0);
+            matcher->setUniquenessRatio(0);
+            cvtColor(left_for_matcher,  left_for_matcher, COLOR_BGR2GRAY);
+            cvtColor(right_for_matcher, right_for_matcher, COLOR_BGR2GRAY);
+            ROI = computeROI(left_for_matcher.size(),matcher);
+
+            matching_time = (double)getTickCount();
+            matcher->compute(left_for_matcher,right_for_matcher,left_disp);
+            matching_time = ((double)getTickCount() - matching_time)/getTickFrequency();
        }
-    res /= cnt*256;
-    return res;
-}
+        else if(algo=="sgbm")
+        {
+            Ptr<StereoSGBM> matcher  = StereoSGBM::create(0,max_disp,wsize);
+            matcher->setUniquenessRatio(0);
+            matcher->setDisp12MaxDiff(1000000);
+            matcher->setSpeckleWindowSize(0);
+            ROI = computeROI(left_for_matcher.size(),matcher);

-double computeBadPixelPercent(Mat& GT, Mat& src, Rect ROI, int thresh)
-{
-    int bad_pixel_num = 0;
-    Mat GT_ROI(GT,ROI);
-    Mat src_ROI(src,ROI);
-    int cnt=0;
-    for(int i=0;i<src_ROI.rows;i++)
-        for(int j=0;j<src_ROI.cols;j++)
+            matching_time = (double)getTickCount();
+            matcher->compute(left_for_matcher,right_for_matcher,left_disp);
+            matching_time = ((double)getTickCount() - matching_time)/getTickFrequency();
+        }
+        else
        {
-            if(GT_ROI.at<short>(i,j)!=UNKNOWN_DISPARITY)
-            {
-                if( abs(GT_ROI.at<short>(i,j) - src_ROI.at<short>(i,j))>=thresh )
-                    bad_pixel_num++;
-                cnt++;
-            }
+            cout<<"Unsupported algorithm";
+            return -1;
        }
-    return (100.0*bad_pixel_num)/cnt;
-}

-void getDisparityVis(Mat& disparity_map,Mat& dst)
-{
-    dst = Mat(disparity_map.rows,disparity_map.cols,CV_8UC3);
-    for(int i=0;i<dst.rows;i++)
-        for(int j=0;j<dst.cols;j++)
+        Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(false);
+        wls_filter->setLambda(lambda);
+        wls_filter->setSigmaColor(sigma);
+        filtering_time = (double)getTickCount();
+        wls_filter->filter(left_disp,left,filtered_disp,ROI);
+        filtering_time = ((double)getTickCount() - filtering_time)/getTickFrequency();
+    }
+    else
+    {
+        cout<<"Unsupported filter";
+        return -1;
+    }
+
+    //collect and print all the stats:
+    cout.precision(2);
+    cout<<"Matching time:  "<<matching_time<<"s"<<endl;
+    cout<<"Filtering time: "<<filtering_time<<"s"<<endl;
+    cout<<endl;
+
+    double MSE_before,percent_bad_before,MSE_after,percent_bad_after;
+    if(!noGT)
+    {
+        MSE_before = computeMSE(GT_disp,left_disp,ROI);
+        percent_bad_before = computeBadPixelPercent(GT_disp,left_disp,ROI);
+        MSE_after = computeMSE(GT_disp,filtered_disp,ROI);
+        percent_bad_after = computeBadPixelPercent(GT_disp,filtered_disp,ROI);
+
+        cout.precision(5);
+        cout<<"MSE before filtering: "<<MSE_before<<endl;
+        cout<<"MSE after filtering:  "<<MSE_after<<endl;
+        cout<<endl;
+        cout.precision(3);
+        cout<<"Percent of bad pixels before filtering: "<<percent_bad_before<<endl;
+        cout<<"Percent of bad pixels after filtering:  "<<percent_bad_after<<endl;
+    }
+
+    if(dst_path!="None")
+    {
+        Mat filtered_disp_vis;
+        getDisparityVis(filtered_disp,filtered_disp_vis,vis_mult);
+        imwrite(dst_path,filtered_disp_vis);
+    }
+    if(dst_raw_path!="None")
+    {
+        Mat raw_disp_vis;
+        getDisparityVis(left_disp,raw_disp_vis,vis_mult);
+        imwrite(dst_raw_path,raw_disp_vis);
+    }
+    if(dst_conf_path!="None")
+    {
+        imwrite(dst_conf_path,conf_map);
+    }
+
+    if(!no_display)
+    {
+        namedWindow("left", WINDOW_AUTOSIZE);
+        imshow("left", left);
+        namedWindow("right", WINDOW_AUTOSIZE);
+        imshow("right", right);
+
+        if(!noGT)
        {
-            if(disparity_map.at<short>(i,j)==UNKNOWN_DISPARITY)
-                dst.at<Vec3b>(i,j) = Vec3b(0,0,0);
-            else
-                dst.at<Vec3b>(i,j) = Vec3b(saturate_cast<unsigned char>(disparity_map.at<short>(i,j)/8),
-                                           saturate_cast<unsigned char>(disparity_map.at<short>(i,j)/8),
-                                           saturate_cast<unsigned char>(disparity_map.at<short>(i,j)/8));
+            Mat GT_disp_vis;
+            getDisparityVis(GT_disp,GT_disp_vis,vis_mult);
+            namedWindow("ground-truth disparity", WINDOW_AUTOSIZE);
+            imshow("ground-truth disparity", GT_disp_vis);
        }
+
+        Mat raw_disp_vis;
+        getDisparityVis(left_disp,raw_disp_vis,vis_mult);
+        namedWindow("raw disparity", WINDOW_AUTOSIZE);
+        imshow("raw disparity", raw_disp_vis);
+        Mat filtered_disp_vis;
+        getDisparityVis(filtered_disp,filtered_disp_vis,vis_mult);
+        namedWindow("filtered disparity", WINDOW_AUTOSIZE);
+        imshow("filtered disparity", filtered_disp_vis);
+        waitKey();
+    }
+
+    return 0;
 }

 Rect computeROI(Size2i src_sz, Ptr<StereoMatcher> matcher_instance)
@ -225,34 +316,10 @@ Rect computeROI(Size2i src_sz, Ptr<StereoMatcher> matcher_instance)
    int minD = min_disparity, maxD = min_disparity + num_disparities - 1;

    int xmin = maxD + bs2;
-    int xmax = src_sz.width - minD - bs2;
+    int xmax = src_sz.width + minD - bs2;
    int ymin = bs2;
    int ymax = src_sz.height - bs2;

    Rect r(xmin, ymin, xmax - xmin, ymax - ymin);
    return r;
 }
-
-void setConfigsForTesting(map<string,config>& cfgs)
-{
-    Ptr<StereoBM> stereobm_matcher = StereoBM::create(128,21); 
-    stereobm_matcher->setTextureThreshold(0); 
-    stereobm_matcher->setUniquenessRatio(0);
-
-    Ptr<DisparityFilter> wls_filter = createDisparityWLSFilter();
-    Ptr<DisparityFilter> dt_filter = createDisparityDTFilter();
-    Ptr<DisparityFilter> guided_filter = createDisparityGuidedFilter();
-
-    cfgs["stereobm_wls"] = config(stereobm_matcher,wls_filter);
-    cfgs["stereobm_dtf"] = config(stereobm_matcher,dt_filter);
-    cfgs["stereobm_gf"]  = config(stereobm_matcher,guided_filter);
-}
-
-void CreateDir(string path)
-{
-#if defined(_WIN32)
-    _mkdir(path.c_str());
-#else 
-    mkdir(path.c_str(), 0777);
-#endif
-}
--- a/modules/ximgproc/src/disparity_filters.cpp
+++ b/modules/ximgproc/src/disparity_filters.cpp
@ -36,149 +36,429 @@

 #include "precomp.hpp"
 #include "opencv2/ximgproc/disparity_filter.hpp"
+#include "opencv2/highgui.hpp"
 #include <math.h>
+#include <vector>

 namespace cv {
 namespace ximgproc {

-void setROI(InputArray disparity_map, InputArray left_view, OutputArray filtered_disparity_map,
-            Mat& disp,Mat& src,Mat& dst,Rect ROI);
+using std::vector;
+#define EPS 1e-43f

-class DisparityDTFilterImpl : public DisparityDTFilter
+class DisparityWLSFilterImpl : public DisparityWLSFilter
 {
 protected:
-    double sigmaSpatial,sigmaColor;
-    void init(double _sigmaSpatial, double _sigmaColor)
+    double lambda,sigma_color;
+    bool use_confidence;
+    Mat confidence_map;
+    int LRC_thresh,depth_discontinuity_radius;
+    float depth_discontinuity_roll_off_factor;
+    float resize_factor;
+    int num_stripes;
+
+    void init(double _lambda, double _sigma_color, bool _use_confidence);
+    void computeDepthDiscontinuityMaps(Mat& left_disp, Mat& right_disp, Mat& left_dst, Mat& right_dst, Rect ROI);
+    void computeConfidenceMap(InputArray left_disp, InputArray right_disp, Rect ROI);
+
+protected:
+    struct ComputeDiscontinuityAwareLRC_ParBody : public ParallelLoopBody
    {
-        sigmaColor = _sigmaColor;
-        sigmaSpatial = _sigmaSpatial;
-    }
-public:
-    double getSigmaSpatial() {return sigmaSpatial;}
-    void setSigmaSpatial(double _sigmaSpatial) {sigmaSpatial = _sigmaSpatial;}
-    double getSigmaColor() {return sigmaColor;}
-    void setSigmaColor(double _sigmaColor) {sigmaColor = _sigmaColor;}
+        DisparityWLSFilterImpl* wls;
+        Mat *left_disp, *right_disp;
+        Mat *left_disc, *right_disc, *dst;
+        Rect left_ROI, right_ROI;
+        int nstripes, stripe_sz;
+
+        ComputeDiscontinuityAwareLRC_ParBody(DisparityWLSFilterImpl& _wls, Mat& _left_disp, Mat& _right_disp, Mat& _left_disc, Mat& _right_disc, Mat& _dst, Rect _left_ROI, Rect _right_ROI, int _nstripes);
+        void operator () (const Range& range) const;
+    };

-    static Ptr<DisparityDTFilterImpl> create()
+    struct ComputeDepthDisc_ParBody : public ParallelLoopBody
    {
-        DisparityDTFilterImpl *dtf = new DisparityDTFilterImpl();
-        dtf->init(25.0,60.0); 
-        return Ptr<DisparityDTFilterImpl>(dtf);
+        DisparityWLSFilterImpl* wls;
+        Mat *disp,*disp_squares,*dst;
+        int nstripes, stripe_sz;
+
+        ComputeDepthDisc_ParBody(DisparityWLSFilterImpl& _wls, Mat& _disp, Mat& _disp_squares, Mat& _dst, int _nstripes);
+        void operator () (const Range& range) const;
+    };
+
+    typedef void (DisparityWLSFilterImpl::*MatOp)(Mat& src, Mat& dst);
+
+    struct ParallelMatOp_ParBody : public ParallelLoopBody
+    {
+        DisparityWLSFilterImpl* wls;
+        vector<MatOp> ops;
+        vector<Mat*> src;
+        vector<Mat*> dst;
+
+        ParallelMatOp_ParBody(DisparityWLSFilterImpl& _wls, vector<MatOp> _ops, vector<Mat*>& _src, vector<Mat*>& _dst);
+        void operator () (const Range& range) const;
+    };
+
+    void boxFilterOp(Mat& src,Mat& dst)
+    {
+        int rad = (int)ceil(resize_factor*depth_discontinuity_radius);
+        boxFilter(src,dst,CV_32F,Size(2*rad+1,2*rad+1),Point(-1,-1));
    }

-    void filter(InputArray disparity_map, InputArray left_view, OutputArray filtered_disparity_map,Rect ROI)
+    void sqrBoxFilterOp(Mat& src,Mat& dst)
    {
-        Mat disp,src,dst;
-        setROI(disparity_map,left_view,filtered_disparity_map,disp,src,dst,ROI);
+        int rad = (int)ceil(resize_factor*depth_discontinuity_radius);
+        sqrBoxFilter(src,dst,CV_32F,Size(2*rad+1,2*rad+1),Point(-1,-1));
+    }

-        Mat disp_32F,dst_32F;
-        disp.convertTo(disp_32F,CV_32F);
-        dtFilter(src,disp_32F,dst_32F,sigmaSpatial,sigmaColor);
-        dst_32F.convertTo(dst,CV_16S);
+    void copyToOp(Mat& src,Mat& dst)
+    {
+        src.copyTo(dst);
    }
+
+public:
+    static Ptr<DisparityWLSFilterImpl> create(bool _use_confidence);
+    void filter(InputArray disparity_map_left, InputArray left_view, OutputArray filtered_disparity_map, Rect ROI, InputArray disparity_map_right, InputArray);
+
+    double getLambda() {return lambda;}
+    void setLambda(double _lambda) {lambda = _lambda;}
+    double getSigmaColor() {return sigma_color;}
+    void setSigmaColor(double _sigma_color) {sigma_color = _sigma_color;}
+    Mat getConfidenceMap() {return confidence_map;}
+    int getLRCthresh() {return LRC_thresh;}
+    void setLRCthresh(int _LRC_thresh) {LRC_thresh = _LRC_thresh;}
+    int getDepthDiscontinuityRadius() {return depth_discontinuity_radius;}
+    void setDepthDiscontinuityRadius(int _disc_radius) {depth_discontinuity_radius = _disc_radius;}
 };

-CV_EXPORTS_W
-Ptr<DisparityDTFilter> createDisparityDTFilter()
+void DisparityWLSFilterImpl::init(double _lambda, double _sigma_color, bool _use_confidence)
 {
-    return Ptr<DisparityDTFilter>(DisparityDTFilterImpl::create());
+    lambda = _lambda;
+    sigma_color = _sigma_color;
+    use_confidence = _use_confidence;
+    confidence_map = Mat();
+    LRC_thresh = 24;
+    depth_discontinuity_radius = 5;
+    depth_discontinuity_roll_off_factor = 0.001f;
+    resize_factor = 1.0;
+    num_stripes = getNumThreads();
 }

-//////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-class DisparityGuidedFilterImpl : public DisparityGuidedFilter
+void DisparityWLSFilterImpl::computeDepthDiscontinuityMaps(Mat& left_disp, Mat& right_disp, Mat& left_dst, Mat& right_dst, Rect ROI)
 {
-protected:
-    int radius;
-    double eps;
-    void init(int _radius, double _eps)
+    Rect right_ROI(left_disp.cols-(ROI.x+ROI.width),ROI.y,ROI.width,ROI.height);
+    Mat left_disp_ROI (left_disp, ROI);
+    Mat right_disp_ROI(right_disp,right_ROI);
+    Mat ldisp,rdisp,ldisp_squared,rdisp_squared;
+
    {
-        radius = _radius;
-        eps = _eps;
+        vector<Mat*> _src; _src.push_back(&left_disp_ROI);_src.push_back(&right_disp_ROI);
+                           _src.push_back(&left_disp_ROI);_src.push_back(&right_disp_ROI);
+        vector<Mat*> _dst; _dst.push_back(&ldisp);        _dst.push_back(&rdisp);
+                           _dst.push_back(&ldisp_squared);_dst.push_back(&rdisp_squared);
+        vector<MatOp> _ops; _ops.push_back(&DisparityWLSFilterImpl::copyToOp); _ops.push_back(&DisparityWLSFilterImpl::copyToOp);
+                            _ops.push_back(&DisparityWLSFilterImpl::copyToOp); _ops.push_back(&DisparityWLSFilterImpl::copyToOp);
+        parallel_for_(Range(0,4),ParallelMatOp_ParBody(*this,_ops,_src,_dst));
    }
-public:
-    double getEps() {return eps;}
-    void setEps(double _eps) {eps = _eps;}
-    int getRadius() {return radius;}
-    void setRadius(int _radius) {radius = _radius;}

-    static Ptr<DisparityGuidedFilterImpl> create()
    {
-        DisparityGuidedFilterImpl *gf = new DisparityGuidedFilterImpl();
-        gf->init(20,100.0);
-        return Ptr<DisparityGuidedFilterImpl>(gf);
+        vector<Mat*> _src; _src.push_back(&ldisp);        _src.push_back(&rdisp);
+                           _src.push_back(&ldisp_squared);_src.push_back(&rdisp_squared);
+        vector<Mat*> _dst; _dst.push_back(&ldisp);        _dst.push_back(&rdisp);
+                           _dst.push_back(&ldisp_squared);_dst.push_back(&rdisp_squared);
+        vector<MatOp> _ops; _ops.push_back(&DisparityWLSFilterImpl::boxFilterOp);   _ops.push_back(&DisparityWLSFilterImpl::boxFilterOp);
+                            _ops.push_back(&DisparityWLSFilterImpl::sqrBoxFilterOp);_ops.push_back(&DisparityWLSFilterImpl::sqrBoxFilterOp);
+        parallel_for_(Range(0,4),ParallelMatOp_ParBody(*this,_ops,_src,_dst));
    }

-    void filter(InputArray disparity_map, InputArray left_view, OutputArray filtered_disparity_map,Rect ROI)
+    left_dst  = Mat::zeros(left_disp.rows,left_disp.cols,CV_32F);
+    right_dst = Mat::zeros(right_disp.rows,right_disp.cols,CV_32F);
+    Mat left_dst_ROI (left_dst,ROI);
+    Mat right_dst_ROI(right_dst,right_ROI);
+
+    parallel_for_(Range(0,num_stripes),ComputeDepthDisc_ParBody(*this,ldisp,ldisp_squared,left_dst_ROI ,num_stripes));
+    parallel_for_(Range(0,num_stripes),ComputeDepthDisc_ParBody(*this,rdisp,rdisp_squared,right_dst_ROI,num_stripes));
+}
+
+
+void DisparityWLSFilterImpl::computeConfidenceMap(InputArray left_disp, InputArray right_disp, Rect ROI)
+{
+    Mat ldisp = left_disp.getMat();
+    Mat rdisp = right_disp.getMat();
+    Mat depth_discontinuity_map_left,depth_discontinuity_map_right;
+    computeDepthDiscontinuityMaps(ldisp,rdisp,depth_discontinuity_map_left,depth_discontinuity_map_right,ROI);
+
+    Rect right_ROI(ldisp.cols-(ROI.x+ROI.width),ROI.y,ROI.width,ROI.height);
+    confidence_map = depth_discontinuity_map_left;
+
+    parallel_for_(Range(0,num_stripes),ComputeDiscontinuityAwareLRC_ParBody(*this,ldisp,rdisp, depth_discontinuity_map_left,depth_discontinuity_map_right,confidence_map,ROI,right_ROI,num_stripes));
+    confidence_map = 255.0f*confidence_map;
+}
+
+Ptr<DisparityWLSFilterImpl> DisparityWLSFilterImpl::create(bool _use_confidence)
+{
+    DisparityWLSFilterImpl *wls = new DisparityWLSFilterImpl();
+    wls->init(8000.0,1.0,_use_confidence);
+    return Ptr<DisparityWLSFilterImpl>(wls);
+}
+
+void DisparityWLSFilterImpl::filter(InputArray disparity_map_left, InputArray left_view, OutputArray filtered_disparity_map, Rect ROI, InputArray disparity_map_right, InputArray)
+{
+    CV_Assert( !disparity_map_left.empty() && (disparity_map_left.depth() == CV_16S) && (disparity_map_left.channels() == 1) );
+    CV_Assert( !left_view.empty() && (left_view.depth() == CV_8U) && (left_view.channels() == 3 || left_view.channels() == 1) );
+    Mat disp,src,dst;
+    if(disparity_map_left.size()!=left_view.size())
+        resize_factor = disparity_map_left.cols()/(float)left_view.cols();
+    else
+        resize_factor = 1.0;
+
+    if(!use_confidence)
    {
-        Mat disp,src,dst;
-        setROI(disparity_map,left_view,filtered_disparity_map,disp,src,dst,ROI);
+        Mat disp_full_size = disparity_map_left.getMat();
+        Mat src_full_size = left_view.getMat();
+        if(disp_full_size.size!=src_full_size.size)
+        {
+            float x_ratio = src_full_size.cols/(float)disp_full_size.cols;
+            float y_ratio = src_full_size.rows/(float)disp_full_size.rows;
+            resize(disp_full_size,disp_full_size,src_full_size.size());
+            disp_full_size = disp_full_size*x_ratio;
+            ROI = Rect((int)(ROI.x*x_ratio),(int)(ROI.y*y_ratio),(int)(ROI.width*x_ratio),(int)(ROI.height*y_ratio));
+        }
+        disp = Mat(disp_full_size,ROI);
+        src  = Mat(src_full_size ,ROI);
+        filtered_disparity_map.create(disp_full_size.size(), disp_full_size.type());
+        Mat& dst_full_size = filtered_disparity_map.getMatRef();
+        dst_full_size = Scalar(-16);
+        dst = Mat(dst_full_size,ROI);
+        Mat filtered_disp;
+        fastGlobalSmootherFilter(src,disp,filtered_disp,lambda,sigma_color);
+        filtered_disp.copyTo(dst);
+    }
+    else
+    {
+        CV_Assert( !disparity_map_right.empty() && (disparity_map_right.depth() == CV_16S) && (disparity_map_right.channels() == 1) );
+        CV_Assert( (disparity_map_left.cols() == disparity_map_right.cols()) );
+        CV_Assert( (disparity_map_left.rows() == disparity_map_right.rows()) );
+        computeConfidenceMap(disparity_map_left,disparity_map_right,ROI);
+        Mat disp_full_size = disparity_map_left.getMat();
+        Mat src_full_size = left_view.getMat();
+        if(disp_full_size.size!=src_full_size.size)
+        {
+            float x_ratio = src_full_size.cols/(float)disp_full_size.cols;
+            float y_ratio = src_full_size.rows/(float)disp_full_size.rows;
+            resize(disp_full_size,disp_full_size,src_full_size.size());
+            disp_full_size = disp_full_size*x_ratio;
+            resize(confidence_map,confidence_map,src_full_size.size());
+            ROI = Rect((int)(ROI.x*x_ratio),(int)(ROI.y*y_ratio),(int)(ROI.width*x_ratio),(int)(ROI.height*y_ratio));
+        }
+        disp = Mat(disp_full_size,ROI);
+        src  = Mat(src_full_size ,ROI);
+        filtered_disparity_map.create(disp_full_size.size(), disp_full_size.type());
+        Mat& dst_full_size = filtered_disparity_map.getMatRef();
+        dst_full_size = Scalar(-16);
+        dst = Mat(dst_full_size,ROI);
+        Mat conf(confidence_map,ROI);

-        Mat disp_32F,dst_32F;
-        disp.convertTo(disp_32F,CV_32F);
-        guidedFilter(src,disp_32F,dst_32F,radius,eps);
-        dst_32F.convertTo(dst,CV_16S);
+        Mat disp_mul_conf;
+        disp.convertTo(disp_mul_conf,CV_32F);
+        disp_mul_conf = conf.mul(disp_mul_conf);
+        Mat conf_filtered;
+        Ptr<FastGlobalSmootherFilter> wls = createFastGlobalSmootherFilter(src,lambda,sigma_color);
+        wls->filter(disp_mul_conf,disp_mul_conf);
+        wls->filter(conf,conf_filtered);
+        disp_mul_conf = disp_mul_conf.mul(1/(conf_filtered+EPS));
+        disp_mul_conf.convertTo(dst,CV_16S);
    }
-};
+}

-CV_EXPORTS_W
-Ptr<DisparityGuidedFilter> createDisparityGuidedFilter()
+DisparityWLSFilterImpl::ComputeDiscontinuityAwareLRC_ParBody::ComputeDiscontinuityAwareLRC_ParBody(DisparityWLSFilterImpl& _wls, Mat& _left_disp, Mat& _right_disp, Mat& _left_disc, Mat& _right_disc, Mat& _dst, Rect _left_ROI, Rect _right_ROI, int _nstripes):
+wls(&_wls),left_disp(&_left_disp),right_disp(&_right_disp),left_disc(&_left_disc),right_disc(&_right_disc),dst(&_dst),left_ROI(_left_ROI),right_ROI(_right_ROI),nstripes(_nstripes)
 {
-    return Ptr<DisparityGuidedFilter>(DisparityGuidedFilterImpl::create());
+    stripe_sz = (int)ceil(left_disp->rows/(double)nstripes);
 }
-    
-//////////////////////////////////////////////////////////////////////////////////////////////////////

-class DisparityWLSFilterImpl : public DisparityWLSFilter
+void DisparityWLSFilterImpl::ComputeDiscontinuityAwareLRC_ParBody::operator() (const Range& range) const
 {
-protected:
-    double lambda,sigma_color;
-    void init(double _lambda, double _sigma_color)
+    short* row_left;
+    float* row_left_conf;
+    short* row_right;
+    float* row_right_conf;
+    float* row_dst;
+    int right_idx;
+    int h = left_disp->rows;
+
+    int start = std::min(range.start * stripe_sz, h);
+    int end   = std::min(range.end   * stripe_sz, h);
+    int thresh = (int)(wls->resize_factor*wls->LRC_thresh);
+    for(int i=start;i<end;i++)
    {
-        lambda = _lambda;
-        sigma_color = _sigma_color;
+        row_left  = (short*)left_disp->ptr(i);
+        row_left_conf  = (float*)left_disc->ptr(i);
+        row_right = (short*)right_disp->ptr(i);
+        row_right_conf  = (float*)right_disc->ptr(i);
+        row_dst   = (float*)dst->ptr(i);
+        int j_start = left_ROI.x;
+        int j_end = left_ROI.x+left_ROI.width;
+        int right_end = right_ROI.x+right_ROI.width;
+        for(int j=j_start;j<j_end;j++)
+        {
+            right_idx = j-(row_left[j]>>4);
+            if( right_idx>=right_ROI.x && right_idx<right_end)
+            {
+                if(abs(row_left[j] + row_right[right_idx])< thresh)
+                    row_dst[j] = min(row_left_conf[j],row_right_conf[right_idx]);
+                else
+                    row_dst[j] = 0.0f;
+            }
+        }
    }
-public:
-    double getLambda() {return lambda;}
-    void setLambda(double _lambda) {lambda = _lambda;}
-    double getSigmaColor() {return sigma_color;}
-    void setSigmaColor(double _sigma_color) {sigma_color = _sigma_color;}
+}
+
+DisparityWLSFilterImpl::ComputeDepthDisc_ParBody::ComputeDepthDisc_ParBody(DisparityWLSFilterImpl& _wls, Mat& _disp, Mat& _disp_squares, Mat& _dst, int _nstripes):
+wls(&_wls),disp(&_disp),disp_squares(&_disp_squares),dst(&_dst),nstripes(_nstripes)
+{
+    stripe_sz = (int)ceil(disp->rows/(double)nstripes);
+}

-    static Ptr<DisparityWLSFilterImpl> create()
+void DisparityWLSFilterImpl::ComputeDepthDisc_ParBody::operator() (const Range& range) const
+{
+    float* row_disp;
+    float* row_disp_squares;
+    float* row_dst;
+    float variance;
+    int h = disp->rows;
+    int w = disp->cols;
+    int start = std::min(range.start * stripe_sz, h);
+    int end   = std::min(range.end   * stripe_sz, h);
+    float roll_off = wls->depth_discontinuity_roll_off_factor/(wls->resize_factor*wls->resize_factor);
+
+    for(int i=start;i<end;i++)
    {
-        DisparityWLSFilterImpl *wls = new DisparityWLSFilterImpl();
-        wls->init(500.0,2.0);
-        return Ptr<DisparityWLSFilterImpl>(wls);
+        row_disp = (float*)disp->ptr(i);
+        row_disp_squares = (float*)disp_squares->ptr(i);
+        row_dst = (float*)dst->ptr(i);
+
+        for(int j=0;j<w;j++)
+        {
+            variance = row_disp_squares[j] - (row_disp[j])*(row_disp[j]);
+            row_dst[j] = max(1.0f - roll_off*variance,0.0f);
+        }
    }
+}
+
+DisparityWLSFilterImpl::ParallelMatOp_ParBody::ParallelMatOp_ParBody(DisparityWLSFilterImpl& _wls, vector<MatOp> _ops, vector<Mat*>& _src, vector<Mat*>& _dst):
+wls(&_wls),ops(_ops),src(_src),dst(_dst)
+{}
+
+void DisparityWLSFilterImpl::ParallelMatOp_ParBody::operator() (const Range& range) const
+{
+    for(int i=range.start;i<range.end;i++)
+        (wls->*ops[i])(*src[i],*dst[i]);
+}
+
+CV_EXPORTS_W
+Ptr<DisparityWLSFilter> createDisparityWLSFilter(bool use_confidence)
+{
+    return Ptr<DisparityWLSFilter>(DisparityWLSFilterImpl::create(use_confidence));
+}
+
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+
+#define UNKNOWN_DISPARITY 16320

-    void filter(InputArray disparity_map, InputArray left_view, OutputArray filtered_disparity_map,Rect ROI)
+int readGT(String src_path,OutputArray dst)
+{
+    Mat src = imread(src_path,IMREAD_UNCHANGED);
+    dst.create(src.rows,src.cols,CV_16S);
+    Mat& dstMat = dst.getMatRef();
+
+    if(!src.empty() && src.channels()==3 && src.depth()==CV_8U)
    {
-        Mat disp,src,dst;
-        setROI(disparity_map,left_view,filtered_disparity_map,disp,src,dst,ROI);
-        weightedLeastSquaresFilter(src,disp,dst,lambda,sigma_color);
+        //MPI-Sintel format:
+        for(int i=0;i<src.rows;i++)
+            for(int j=0;j<src.cols;j++)
+            {
+                Vec3b bgrPixel = src.at<Vec3b>(i, j);
+                dstMat.at<short>(i,j) = 64*bgrPixel.val[2]+bgrPixel.val[1]/4; //16-multiplied disparity
+            }
+        return 0;
    }
-};
+    else if(!src.empty() && src.channels()==1 && src.depth()==CV_8U)
+    {
+        //Middlebury format:
+        for(int i=0;i<src.rows;i++)
+            for(int j=0;j<src.cols;j++)
+            {
+                short src_val = src.at<unsigned char>(i, j);
+                if(src_val==0)
+                    dstMat.at<short>(i,j) = UNKNOWN_DISPARITY;
+                else
+                    dstMat.at<short>(i,j) = 16*src_val; //16-multiplied disparity
+            }
+         return 0;
+    }
+    else
+        return 1;
+}

-CV_EXPORTS_W
-Ptr<DisparityWLSFilter> createDisparityWLSFilter()
+double computeMSE(InputArray GT, InputArray src, Rect ROI)
 {
-    return Ptr<DisparityWLSFilter>(DisparityWLSFilterImpl::create());
+    CV_Assert( !GT.empty()  && (GT.depth()  == CV_16S) && (GT.channels()  == 1) );
+    CV_Assert( !src.empty() && (src.depth() == CV_16S) && (src.channels() == 1) );
+    CV_Assert( src.rows() == GT.rows() && src.cols() == GT.cols() );
+    double res = 0;
+    Mat GT_ROI (GT.getMat(), ROI);
+    Mat src_ROI(src.getMat(),ROI);
+    int cnt=0;
+    for(int i=0;i<src_ROI.rows;i++)
+        for(int j=0;j<src_ROI.cols;j++)
+        {
+            if(GT_ROI.at<short>(i,j)!=UNKNOWN_DISPARITY)
+            {
+                res += (GT_ROI.at<short>(i,j) - src_ROI.at<short>(i,j))*(GT_ROI.at<short>(i,j) - src_ROI.at<short>(i,j));
+                cnt++;
+            }
+        }
+    res /= cnt*256;
+    return res;
 }

-//////////////////////////////////////////////////////////////////////////////////////////////////////
+double computeBadPixelPercent(InputArray GT, InputArray src, Rect ROI, int thresh)
+{
+    CV_Assert( !GT.empty()  && (GT.depth()  == CV_16S) && (GT.channels()  == 1) );
+    CV_Assert( !src.empty() && (src.depth() == CV_16S) && (src.channels() == 1) );
+    CV_Assert( src.rows() == GT.rows() && src.cols() == GT.cols() );
+    int bad_pixel_num = 0;
+    Mat GT_ROI (GT.getMat(), ROI);
+    Mat src_ROI(src.getMat(),ROI);
+    int cnt=0;
+    for(int i=0;i<src_ROI.rows;i++)
+        for(int j=0;j<src_ROI.cols;j++)
+        {
+            if(GT_ROI.at<short>(i,j)!=UNKNOWN_DISPARITY)
+            {
+                if( abs(GT_ROI.at<short>(i,j) - src_ROI.at<short>(i,j))>=thresh )
+                    bad_pixel_num++;
+                cnt++;
+            }
+        }
+    return (100.0*bad_pixel_num)/cnt;
+}

-void setROI(InputArray disparity_map, InputArray left_view, OutputArray filtered_disparity_map,
-    Mat& disp,Mat& src,Mat& dst,Rect ROI)
+void getDisparityVis(InputArray src,OutputArray dst,double scale)
 {
-    Mat disp_full_size = disparity_map.getMat();
-    Mat src_full_size = left_view.getMat();
-    CV_Assert( (disp_full_size.depth() == CV_16S) && (disp_full_size.channels() == 1) );
-    CV_Assert( (src_full_size.depth()  == CV_8U)  && (src_full_size.channels()  <= 4) );
-    disp = Mat(disp_full_size,ROI);
-    src  = Mat(src_full_size ,ROI);
-    filtered_disparity_map.create(disp_full_size.size(), disp_full_size.type());
-    Mat& dst_full_size = filtered_disparity_map.getMatRef();
-    dst = Mat(dst_full_size,ROI);
+    CV_Assert( !src.empty() && (src.depth() == CV_16S) && (src.channels() == 1) );
+    Mat srcMat = src.getMat();
+    dst.create(srcMat.rows,srcMat.cols,CV_8UC1);
+    Mat& dstMat = dst.getMatRef();
+
+    for(int i=0;i<dstMat.rows;i++)
+        for(int j=0;j<dstMat.cols;j++)
+        {
+            if(srcMat.at<short>(i,j)==UNKNOWN_DISPARITY)
+                dstMat.at<unsigned char>(i,j) = 0;
+            else
+                dstMat.at<unsigned char>(i,j) = saturate_cast<unsigned char>(scale*srcMat.at<short>(i,j)/16.0);
+        }
 }

 }
--- a/modules/ximgproc/src/fgs_filter.cpp
+++ b/modules/ximgproc/src/fgs_filter.cpp
@ -0,0 +1,694 @@
+/*
+ *  By downloading, copying, installing or using the software you agree to this license.
+ *  If you do not agree to this license, do not download, install,
+ *  copy or use the software.
+ *
+ *
+ *  License Agreement
+ *  For Open Source Computer Vision Library
+ *  (3 - clause BSD License)
+ *
+ *  Redistribution and use in source and binary forms, with or without modification,
+ *  are permitted provided that the following conditions are met :
+ *
+ *  *Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *  this list of conditions and the following disclaimer in the documentation
+ *  and / or other materials provided with the distribution.
+ *
+ *  * Neither the names of the copyright holders nor the names of the contributors
+ *  may be used to endorse or promote products derived from this software
+ *  without specific prior written permission.
+ *
+ *  This software is provided by the copyright holders and contributors "as is" and
+ *  any express or implied warranties, including, but not limited to, the implied
+ *  warranties of merchantability and fitness for a particular purpose are disclaimed.
+ *  In no event shall copyright holders or contributors be liable for any direct,
+ *  indirect, incidental, special, exemplary, or consequential damages
+ *  (including, but not limited to, procurement of substitute goods or services;
+ *  loss of use, data, or profits; or business interruption) however caused
+ *  and on any theory of liability, whether in contract, strict liability,
+ *  or tort(including negligence or otherwise) arising in any way out of
+ *  the use of this software, even if advised of the possibility of such damage.
+ */
+
+#include "precomp.hpp"
+#include "opencv2/hal/intrin.hpp"
+#include <vector>
+
+namespace cv {
+namespace ximgproc {
+
+using std::vector;
+
+typedef float WorkType;
+typedef Vec<WorkType, 1> WorkVec;
+typedef WorkType (*get_weight_op)(WorkType*, unsigned char*,unsigned char*);
+
+inline WorkType get_weight_1channel(WorkType* LUT, unsigned char* p1,unsigned char* p2)
+{
+    return LUT[ (p1[0]-p2[0])*(p1[0]-p2[0]) ];
+}
+inline WorkType get_weight_3channel(WorkType* LUT, unsigned char* p1,unsigned char* p2)
+{
+    return LUT[ (p1[0]-p2[0])*(p1[0]-p2[0])+
+                (p1[1]-p2[1])*(p1[1]-p2[1])+
+                (p1[2]-p2[2])*(p1[2]-p2[2]) ];
+}
+
+class FastGlobalSmootherFilterImpl : public FastGlobalSmootherFilter
+{
+public:
+    static Ptr<FastGlobalSmootherFilterImpl> create(InputArray guide, double lambda, double sigma_color, int num_iter,double lambda_attenuation);
+    void filter(InputArray src, OutputArray dst);
+
+protected:
+    int w,h;
+    int num_stripes;
+    float sigmaColor,lambda;
+    float lambda_attenuation;
+    int num_iter;
+    Mat weights_LUT;
+    Mat Chor, Cvert;
+    Mat interD;
+    void init(InputArray guide,double _lambda,double _sigmaColor,int _num_iter,double _lambda_attenuation);
+    void horizontalPass(Mat& cur);
+    void verticalPass(Mat& cur);
+protected:
+    struct HorizontalPass_ParBody : public ParallelLoopBody
+    {
+        FastGlobalSmootherFilterImpl* fgs;
+        Mat* cur;
+        int nstripes, stripe_sz;
+        int h;
+
+        HorizontalPass_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _cur, int _nstripes, int _h);
+        void operator () (const Range& range) const;
+    };
+    inline void process_4row_block(Mat* cur,int i);
+    inline void process_row(Mat* cur,int i);
+
+    struct VerticalPass_ParBody : public ParallelLoopBody
+    {
+        FastGlobalSmootherFilterImpl* fgs;
+        Mat* cur;
+        int nstripes, stripe_sz;
+        int w;
+
+        VerticalPass_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _cur, int _nstripes, int _w);
+        void operator () (const Range& range) const;
+    };
+
+    template<get_weight_op get_weight, const int num_ch>
+    struct ComputeHorizontalWeights_ParBody : public ParallelLoopBody
+    {
+        FastGlobalSmootherFilterImpl* fgs;
+        Mat* guide;
+        int nstripes, stripe_sz;
+        int h;
+
+        ComputeHorizontalWeights_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _guide, int _nstripes, int _h);
+        void operator () (const Range& range) const;
+    };
+
+    template<get_weight_op get_weight, const int num_ch>
+    struct ComputeVerticalWeights_ParBody : public ParallelLoopBody
+    {
+        FastGlobalSmootherFilterImpl* fgs;
+        Mat* guide;
+        int nstripes, stripe_sz;
+        int w;
+
+        ComputeVerticalWeights_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _guide, int _nstripes, int _w);
+        void operator () (const Range& range) const;
+    };
+
+    struct ComputeLUT_ParBody : public ParallelLoopBody
+    {
+        FastGlobalSmootherFilterImpl* fgs;
+        WorkType* LUT;
+        int nstripes, stripe_sz;
+        int sz;
+
+        ComputeLUT_ParBody(FastGlobalSmootherFilterImpl &_fgs, WorkType* _LUT, int _nstripes, int _sz);
+        void operator () (const Range& range) const;
+    };
+};
+
+
+void FastGlobalSmootherFilterImpl::init(InputArray guide,double _lambda,double _sigmaColor,int _num_iter,double _lambda_attenuation)
+{
+    CV_Assert( !guide.empty() && _lambda >= 0 && _sigmaColor >= 0 && _num_iter >=1 );
+    CV_Assert( guide.depth() == CV_8U && (guide.channels() == 1 || guide.channels() == 3) );
+    sigmaColor = (float)_sigmaColor;
+    lambda = (float)_lambda;
+    lambda_attenuation = (float)_lambda_attenuation;
+    num_iter = _num_iter;
+    num_stripes = getNumThreads();
+    int num_levels = 3*256*256;
+    weights_LUT.create(1,num_levels,WorkVec::type);
+
+    WorkType* LUT = (WorkType*)weights_LUT.ptr(0);
+    parallel_for_(Range(0,num_stripes),ComputeLUT_ParBody(*this,LUT,num_stripes,num_levels));
+
+    w = guide.cols();
+    h = guide.rows();
+    Chor.  create(h,w,WorkVec::type);
+    Cvert. create(h,w,WorkVec::type);
+    interD.create(h,w,WorkVec::type);
+    Mat guideMat = guide.getMat();
+
+    if(guide.channels() == 1)
+    {
+        parallel_for_(Range(0,num_stripes),ComputeHorizontalWeights_ParBody<get_weight_1channel,1>(*this,guideMat,num_stripes,h));
+        parallel_for_(Range(0,num_stripes),ComputeVerticalWeights_ParBody  <get_weight_1channel,1>(*this,guideMat,num_stripes,w));
+    }
+    if(guide.channels() == 3)
+    {
+        parallel_for_(Range(0,num_stripes),ComputeHorizontalWeights_ParBody<get_weight_3channel,3>(*this,guideMat,num_stripes,h));
+        parallel_for_(Range(0,num_stripes),ComputeVerticalWeights_ParBody  <get_weight_3channel,3>(*this,guideMat,num_stripes,w));
+    }
+}
+
+Ptr<FastGlobalSmootherFilterImpl> FastGlobalSmootherFilterImpl::create(InputArray guide, double lambda, double sigma_color, int num_iter, double lambda_attenuation)
+{
+    FastGlobalSmootherFilterImpl *fgs = new FastGlobalSmootherFilterImpl();
+    fgs->init(guide,lambda,sigma_color,num_iter,lambda_attenuation);
+    return Ptr<FastGlobalSmootherFilterImpl>(fgs);
+}
+
+void FastGlobalSmootherFilterImpl::filter(InputArray src, OutputArray dst)
+{
+    CV_Assert(!src.empty() && (src.depth() == CV_8U || src.depth() == CV_16S || src.depth() == CV_32F) && src.channels()<=4);
+    if (src.rows() != h || src.cols() != w)
+    {
+        CV_Error(Error::StsBadSize, "Size of the filtered image must be equal to the size of the guide image");
+        return;
+    }
+
+    vector<Mat> src_channels;
+    vector<Mat> dst_channels;
+    if(src.channels()==1)
+        src_channels.push_back(src.getMat());
+    else
+        split(src,src_channels);
+
+    float lambda_ref = lambda;
+
+    for(int i=0;i<src.channels();i++)
+    {
+        lambda = lambda_ref;
+        Mat cur_res = src_channels[i].clone();
+        if(src.depth()!=WorkVec::type)
+            cur_res.convertTo(cur_res,WorkVec::type);
+
+        for(int n=0;n<num_iter;n++)
+        {
+            horizontalPass(cur_res);
+            verticalPass(cur_res);
+            lambda*=lambda_attenuation;
+        }
+
+        Mat dstMat;
+        if(src.depth()!=WorkVec::type)
+            cur_res.convertTo(dstMat,src.depth());
+        else
+            dstMat = cur_res;
+
+        dst_channels.push_back(dstMat);
+    }
+
+    lambda = lambda_ref;
+
+    dst.create(src.size(),src.type());
+    if(src.channels()==1)
+    {
+        Mat& dstMat = dst.getMatRef();
+        dstMat = dst_channels[0];
+    }
+    else
+        merge(dst_channels,dst);
+}
+
+void FastGlobalSmootherFilterImpl::horizontalPass(Mat& cur)
+{
+    parallel_for_(Range(0,num_stripes),HorizontalPass_ParBody(*this,cur,num_stripes,h));
+}
+
+void FastGlobalSmootherFilterImpl::verticalPass(Mat& cur)
+{
+    parallel_for_(Range(0,num_stripes),VerticalPass_ParBody(*this,cur,num_stripes,w));
+}
+
+FastGlobalSmootherFilterImpl::HorizontalPass_ParBody::HorizontalPass_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _cur, int _nstripes, int _h):
+fgs(&_fgs),cur(&_cur), nstripes(_nstripes), h(_h)
+{
+    stripe_sz = (int)ceil(h/(double)nstripes);
+}
+
+void FastGlobalSmootherFilterImpl::process_4row_block(Mat* cur,int i)
+{
+    WorkType denom,denom_next,denom_next2,denom_next3;
+
+    WorkType *Chor_row   = (WorkType*)Chor.ptr  (i);
+    WorkType *interD_row = (WorkType*)interD.ptr(i);
+    WorkType *cur_row    = (WorkType*)cur->ptr  (i);
+
+    WorkType *Chor_row_next   = (WorkType*)Chor.ptr  (i+1);
+    WorkType *interD_row_next = (WorkType*)interD.ptr(i+1);
+    WorkType *cur_row_next    = (WorkType*)cur->ptr  (i+1);
+
+    WorkType *Chor_row_next2   = (WorkType*)Chor.ptr  (i+2);
+    WorkType *interD_row_next2 = (WorkType*)interD.ptr(i+2);
+    WorkType *cur_row_next2    = (WorkType*)cur->ptr  (i+2);
+
+    WorkType *Chor_row_next3   = (WorkType*)Chor.ptr  (i+3);
+    WorkType *interD_row_next3 = (WorkType*)interD.ptr(i+3);
+    WorkType *cur_row_next3    = (WorkType*)cur->ptr  (i+3);
+
+    float coef_cur,          coef_prev;
+    float coef_cur_row_next, coef_prev_row_next;
+    float coef_cur_row_next2,coef_prev_row_next2;
+    float coef_cur_row_next3,coef_prev_row_next3;
+
+    //forward pass:
+    coef_prev           = lambda*Chor_row[0];
+    coef_prev_row_next  = lambda*Chor_row_next[0];
+    coef_prev_row_next2 = lambda*Chor_row_next2[0];
+    coef_prev_row_next3 = lambda*Chor_row_next3[0];
+
+    interD_row[0]       = coef_prev          /(1-coef_prev);
+    interD_row_next[0]  = coef_prev_row_next /(1-coef_prev_row_next);
+    interD_row_next2[0] = coef_prev_row_next2/(1-coef_prev_row_next2);
+    interD_row_next3[0] = coef_prev_row_next3/(1-coef_prev_row_next3);
+
+    cur_row[0]       = cur_row[0]      /(1-coef_prev);
+    cur_row_next[0]  = cur_row_next[0] /(1-coef_prev_row_next);
+    cur_row_next2[0] = cur_row_next2[0]/(1-coef_prev_row_next2);
+    cur_row_next3[0] = cur_row_next3[0]/(1-coef_prev_row_next3);
+    int j=1;
+
+#if CV_SIMD128
+    {
+        v_float32x4 coef_prev_reg(coef_prev,coef_prev_row_next,coef_prev_row_next2,coef_prev_row_next3);
+        v_float32x4 interD_prev_reg(interD_row[0],interD_row_next[0],interD_row_next2[0],interD_row_next3[0]);
+        v_float32x4 cur_prev_reg(cur_row[0],cur_row_next[0],cur_row_next2[0],cur_row_next3[0]);
+        v_float32x4 lambda_reg(lambda,lambda,lambda,lambda);
+        v_float32x4 one_reg(1.0f,1.0f,1.0f,1.0f);
+
+        v_float32x4 a0,a1,a2,a3;
+        v_float32x4 b0,b1,b2,b3;
+        v_float32x4 aux0,aux1,aux2,aux3;
+
+#define PROC4(Chor_in,cur_in,coef_prev_in,interD_prev_in,cur_prev_in,interD_out,cur_out,coef_cur_out)\
+        coef_cur_out = lambda_reg*Chor_in;\
+        aux0 = interD_prev_in*coef_prev_in;\
+        aux1 = coef_cur_out+coef_prev_in;\
+        aux1 = one_reg-aux1;\
+        aux0 = aux1-aux0;\
+        interD_out = coef_cur_out/aux0;\
+        aux1 = cur_prev_in*coef_prev_in;\
+        aux1 = cur_in - aux1;\
+        cur_out = aux1/aux0;
+
+        for(;j<w-3;j+=4)
+        {
+            // processing a 4x4 block:
+
+            aux0 = v_load(Chor_row+j);
+            aux1 = v_load(Chor_row_next+j);
+            aux2 = v_load(Chor_row_next2+j);
+            aux3 = v_load(Chor_row_next3+j);
+            v_transpose4x4(aux0,aux1,aux2,aux3,a0,a1,a2,a3);
+
+            aux0 = v_load(cur_row+j);
+            aux1 = v_load(cur_row_next+j);
+            aux2 = v_load(cur_row_next2+j);
+            aux3 = v_load(cur_row_next3+j);
+            v_transpose4x4(aux0,aux1,aux2,aux3,b0,b1,b2,b3);
+
+            PROC4(a0,b0,coef_prev_reg,interD_prev_reg,cur_prev_reg,a0,b0,aux2);
+            PROC4(a1,b1,aux2,a0,b0,a1,b1,aux3);
+            PROC4(a2,b2,aux3,a1,b1,a2,b2,aux2);
+            PROC4(a3,b3,aux2,a2,b2,a3,b3,aux3);
+
+            interD_prev_reg = a3;
+            cur_prev_reg = b3;
+            coef_prev_reg = aux3;
+
+            v_transpose4x4(a0,a1,a2,a3,aux0,aux1,aux2,aux3);
+            v_store(interD_row+j,aux0);
+            v_store(interD_row_next+j,aux1);
+            v_store(interD_row_next2+j,aux2);
+            v_store(interD_row_next3+j,aux3);
+
+            v_transpose4x4(b0,b1,b2,b3,aux0,aux1,aux2,aux3);
+            v_store(cur_row+j,aux0);
+            v_store(cur_row_next+j,aux1);
+            v_store(cur_row_next2+j,aux2);
+            v_store(cur_row_next3+j,aux3);
+        }
+#undef PROC4
+    }
+#endif
+
+    for(;j<w;j++)
+    {
+        coef_prev           = lambda*Chor_row[j-1];
+        coef_prev_row_next  = lambda*Chor_row_next[j-1];
+        coef_prev_row_next2 = lambda*Chor_row_next2[j-1];
+        coef_prev_row_next3 = lambda*Chor_row_next3[j-1];
+
+        coef_cur           = lambda*Chor_row[j];
+        coef_cur_row_next  = lambda*Chor_row_next[j];
+        coef_cur_row_next2 = lambda*Chor_row_next2[j];
+        coef_cur_row_next3 = lambda*Chor_row_next3[j];
+
+        denom       = (1-coef_prev          -coef_cur)          -interD_row[j-1]      *coef_prev;
+        denom_next  = (1-coef_prev_row_next -coef_cur_row_next) -interD_row_next[j-1] *coef_prev_row_next;
+        denom_next2 = (1-coef_prev_row_next2-coef_cur_row_next2)-interD_row_next2[j-1]*coef_prev_row_next2;
+        denom_next3 = (1-coef_prev_row_next3-coef_cur_row_next3)-interD_row_next3[j-1]*coef_prev_row_next3;
+
+        interD_row[j]       = coef_cur          /denom;
+        interD_row_next[j]  = coef_cur_row_next /denom_next;
+        interD_row_next2[j] = coef_cur_row_next2/denom_next2;
+        interD_row_next3[j] = coef_cur_row_next3/denom_next3;
+
+        cur_row[j]       = (cur_row[j]      -cur_row[j-1]      *coef_prev)          /denom;
+        cur_row_next[j]  = (cur_row_next[j] -cur_row_next[j-1] *coef_prev_row_next) /denom_next;
+        cur_row_next2[j] = (cur_row_next2[j]-cur_row_next2[j-1]*coef_prev_row_next2)/denom_next2;
+        cur_row_next3[j] = (cur_row_next3[j]-cur_row_next3[j-1]*coef_prev_row_next3)/denom_next3;
+    }
+    //backward pass:
+    j = w-2;
+
+#if CV_SIMD128
+    {
+        v_float32x4 cur_next_reg(cur_row[w-1],cur_row_next[w-1],cur_row_next2[w-1],cur_row_next3[w-1]);
+        v_float32x4 a0,a1,a2,a3;
+        v_float32x4 b0,b1,b2,b3;
+        v_float32x4 aux0,aux1,aux2,aux3;
+        for(j-=3;j>=0;j-=4)
+        {
+            //process 4x4 block:
+
+            aux0 = v_load(interD_row+j);
+            aux1 = v_load(interD_row_next+j);
+            aux2 = v_load(interD_row_next2+j);
+            aux3 = v_load(interD_row_next3+j);
+            v_transpose4x4(aux0,aux1,aux2,aux3,a0,a1,a2,a3);
+
+            aux0 = v_load(cur_row+j);
+            aux1 = v_load(cur_row_next+j);
+            aux2 = v_load(cur_row_next2+j);
+            aux3 = v_load(cur_row_next3+j);
+            v_transpose4x4(aux0,aux1,aux2,aux3,b0,b1,b2,b3);
+
+            aux0 = a3*cur_next_reg;
+            b3 = b3-aux0;
+            aux0 = a2*b3;
+            b2 = b2-aux0;
+            aux0 = a1*b2;
+            b1 = b1-aux0;
+            aux0 = a0*b1;
+            b0 = b0-aux0;
+
+            cur_next_reg = b0;
+
+            v_transpose4x4(b0,b1,b2,b3,aux0,aux1,aux2,aux3);
+            v_store(cur_row+j,aux0);
+            v_store(cur_row_next+j,aux1);
+            v_store(cur_row_next2+j,aux2);
+            v_store(cur_row_next3+j,aux3);
+        }
+        j+=3;
+    }
+#endif
+
+    for(;j>=0;j--)
+    {
+        cur_row[j]       = cur_row[j]      -interD_row[j]      *cur_row[j+1];
+        cur_row_next[j]  = cur_row_next[j] -interD_row_next[j] *cur_row_next[j+1];
+        cur_row_next2[j] = cur_row_next2[j]-interD_row_next2[j]*cur_row_next2[j+1];
+        cur_row_next3[j] = cur_row_next3[j]-interD_row_next3[j]*cur_row_next3[j+1];
+    }
+}
+
+void FastGlobalSmootherFilterImpl::process_row(Mat* cur,int i)
+{
+    WorkType denom;
+    WorkType *Chor_row = (WorkType*)Chor.ptr(i);
+    WorkType *interD_row = (WorkType*)interD.ptr(i);
+    WorkType *cur_row = (WorkType*)cur->ptr(i);
+
+    float coef_cur,coef_prev;
+
+    //forward pass:
+    coef_prev = lambda*Chor_row[0];
+    interD_row[0] = coef_prev/(1-coef_prev);
+    cur_row[0] = cur_row[0]/(1-coef_prev);
+    for(int j=1;j<w;j++)
+    {
+        coef_cur = lambda*Chor_row[j];
+        denom = (1-coef_prev-coef_cur)-interD_row[j-1]*coef_prev;
+        interD_row[j] = coef_cur/denom;
+        cur_row[j] = (cur_row[j]-cur_row[j-1]*coef_prev)/denom;
+        coef_prev = coef_cur;
+    }
+
+    //backward pass:
+    for(int j=w-2;j>=0;j--)
+        cur_row[j] = cur_row[j]-interD_row[j]*cur_row[j+1];
+}
+
+void FastGlobalSmootherFilterImpl::HorizontalPass_ParBody::operator()(const Range& range) const
+{
+    int start = std::min(range.start * stripe_sz, h);
+    int end   = std::min(range.end   * stripe_sz, h);
+
+    int i=start;
+    for(;i<end-3;i+=4)
+        fgs->process_4row_block(cur,i);
+    for(;i<end;i++)
+        fgs->process_row(cur,i);
+}
+
+FastGlobalSmootherFilterImpl::VerticalPass_ParBody::VerticalPass_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _cur, int _nstripes, int _w):
+fgs(&_fgs),cur(&_cur), nstripes(_nstripes), w(_w)
+{
+    stripe_sz = (int)ceil(w/(double)nstripes);
+}
+
+void FastGlobalSmootherFilterImpl::VerticalPass_ParBody::operator()(const Range& range) const
+{
+    int start = std::min(range.start * stripe_sz, w);
+    int end   = std::min(range.end   * stripe_sz, w);
+
+    //float lambda = fgs->lambda;
+    WorkType denom;
+    WorkType *Cvert_row, *Cvert_row_prev;
+    WorkType *interD_row, *interD_row_prev, *cur_row, *cur_row_prev, *cur_row_next;
+
+    float coef_cur,coef_prev;
+
+    Cvert_row = (WorkType*)fgs->Cvert.ptr(0);
+    interD_row = (WorkType*)fgs->interD.ptr(0);
+    cur_row = (WorkType*)cur->ptr(0);
+    //forward pass:
+    for(int j=start;j<end;j++)
+    {
+        coef_cur = fgs->lambda*Cvert_row[j];
+        interD_row[j] = coef_cur/(1-coef_cur);
+        cur_row[j] = cur_row[j]/(1-coef_cur);
+    }
+    for(int i=1;i<fgs->h;i++)
+    {
+        Cvert_row = (WorkType*)fgs->Cvert.ptr(i);
+        Cvert_row_prev = (WorkType*)fgs->Cvert.ptr(i-1);
+        interD_row = (WorkType*)fgs->interD.ptr(i);
+        interD_row_prev = (WorkType*)fgs->interD.ptr(i-1);
+        cur_row = (WorkType*)cur->ptr(i);
+        cur_row_prev = (WorkType*)cur->ptr(i-1);
+        int j = start;
+
+#if CV_SIMD128
+        v_float32x4 a,b,c,d,coef_cur_reg,coef_prev_reg;
+        v_float32x4 one_reg(1.0f,1.0f,1.0f,1.0f);
+        v_float32x4 lambda_reg(fgs->lambda,fgs->lambda,fgs->lambda,fgs->lambda);
+        int sz4 = 4*((end-start)/4);
+        int end4 = start+sz4;
+        for(;j<end4;j+=4)
+        {
+            a = v_load(Cvert_row_prev+j);
+            b = v_load(Cvert_row+j);
+            coef_prev_reg = lambda_reg*a;
+            coef_cur_reg =  lambda_reg*b;
+
+            a = v_load(interD_row_prev+j);
+            a = a*coef_prev_reg;
+
+            b = coef_prev_reg+coef_cur_reg;
+            b = b+a;
+            a = one_reg-b; //computed denom
+
+            b =  coef_cur_reg/a; //computed interD_row
+
+            c = v_load(cur_row_prev+j);
+            c = c*coef_prev_reg;
+
+            d = v_load(cur_row+j);
+            d = d-c;
+            d = d/a; //computed cur_row
+
+            v_store(interD_row+j,b);
+            v_store(cur_row+j,d);
+        }
+#endif
+        for(;j<end;j++)
+        {
+            coef_prev = fgs->lambda*Cvert_row_prev[j];
+            coef_cur  = fgs->lambda*Cvert_row[j];
+            denom = (1-coef_prev-coef_cur)-interD_row_prev[j]*coef_prev;
+            interD_row[j] = coef_cur/denom;
+            cur_row[j] = (cur_row[j]-cur_row_prev[j]*coef_prev)/denom;
+        }
+    }
+
+    //backward pass:
+    for(int i=fgs->h-2;i>=0;i--)
+    {
+        interD_row = (WorkType*)fgs->interD.ptr(i);
+        cur_row = (WorkType*)cur->ptr(i);
+        cur_row_next = (WorkType*)cur->ptr(i+1);
+        int j = start;
+#if CV_SIMD128
+        v_float32x4 a,b;
+        int sz4 = 4*((end-start)/4);
+        int end4 = start+sz4;
+        for(;j<end4;j+=4)
+        {
+            a = v_load(interD_row+j);
+            b = v_load(cur_row_next+j);
+            b = a*b;
+
+            a = v_load(cur_row+j);
+            b = a-b;
+            v_store(cur_row+j,b);
+        }
+#endif
+        for(;j<end;j++)
+            cur_row[j] = cur_row[j]-interD_row[j]*cur_row_next[j];
+    }
+}
+
+template<get_weight_op get_weight, const int num_ch>
+FastGlobalSmootherFilterImpl::ComputeHorizontalWeights_ParBody<get_weight,num_ch>::ComputeHorizontalWeights_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _guide, int _nstripes, int _h):
+fgs(&_fgs),guide(&_guide), nstripes(_nstripes), h(_h)
+{
+    stripe_sz = (int)ceil(h/(double)nstripes);
+}
+
+template<get_weight_op get_weight, const int num_ch>
+void FastGlobalSmootherFilterImpl::ComputeHorizontalWeights_ParBody<get_weight,num_ch>::operator()(const Range& range) const
+{
+    int start = std::min(range.start * stripe_sz, h);
+    int end   = std::min(range.end   * stripe_sz, h);
+
+    WorkType* LUT = (WorkType*)fgs->weights_LUT.ptr(0);
+    unsigned char *row;
+    WorkType *Chor_row;
+
+    for(int i=start;i<end;i++)
+    {
+        row = guide->ptr(i);
+        Chor_row = (WorkType*)fgs->Chor.ptr(i);
+        Chor_row[0] = get_weight(LUT,row,row+num_ch);
+        row+=num_ch;
+        for(int j=1;j<fgs->w-1;j++)
+        {
+            Chor_row[j] = get_weight(LUT,row,row+num_ch);
+            row+=num_ch;
+        }
+        Chor_row[fgs->w-1]=0;
+    }
+}
+
+template<get_weight_op get_weight, const int num_ch>
+FastGlobalSmootherFilterImpl::ComputeVerticalWeights_ParBody<get_weight,num_ch>::ComputeVerticalWeights_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _guide, int _nstripes, int _w):
+fgs(&_fgs),guide(&_guide), nstripes(_nstripes), w(_w)
+{
+    stripe_sz = (int)ceil(w/(double)nstripes);
+}
+
+template<get_weight_op get_weight, const int num_ch>
+void FastGlobalSmootherFilterImpl::ComputeVerticalWeights_ParBody<get_weight,num_ch>::operator()(const Range& range) const
+{
+    int start = std::min(range.start * stripe_sz, w);
+    int end   = std::min(range.end   * stripe_sz, w);
+
+    WorkType* LUT = (WorkType*)fgs->weights_LUT.ptr(0);
+    unsigned char *row,*row_next;
+    WorkType *Cvert_row;
+
+    Cvert_row = (WorkType*)fgs->Cvert.ptr(0);
+    row = guide->ptr(0)+start*num_ch;
+    row_next = guide->ptr(1)+start*num_ch;
+    for(int j=start;j<end;j++)
+    {
+        Cvert_row[j] = get_weight(LUT,row,row_next);
+        row+=num_ch;
+        row_next+=num_ch;
+    }
+
+    for(int i=1;i<fgs->h-1;i++)
+    {
+        row = guide->ptr(i)+start*num_ch;
+        row_next = guide->ptr(i+1)+start*num_ch;
+        Cvert_row = (WorkType*)fgs->Cvert.ptr(i);
+        for(int j=start;j<end;j++)
+        {
+            Cvert_row[j] = get_weight(LUT,row,row_next);
+            row+=num_ch;
+            row_next+=num_ch;
+        }
+    }
+
+    Cvert_row = (WorkType*)fgs->Cvert.ptr(fgs->h-1);
+    for(int j=start;j<end;j++)
+        Cvert_row[j] = 0;
+}
+
+FastGlobalSmootherFilterImpl::ComputeLUT_ParBody::ComputeLUT_ParBody(FastGlobalSmootherFilterImpl &_fgs, WorkType *_LUT, int _nstripes, int _sz):
+fgs(&_fgs), LUT(_LUT), nstripes(_nstripes), sz(_sz)
+{
+    stripe_sz = (int)ceil(sz/(double)nstripes);
+}
+
+void FastGlobalSmootherFilterImpl::ComputeLUT_ParBody::operator()(const Range& range) const
+{
+    int start = std::min(range.start * stripe_sz, sz);
+    int end   = std::min(range.end   * stripe_sz, sz);
+    for(int i=start;i<end;i++)
+        LUT[i] = (WorkType)(-cv::exp(-sqrt((float)i)/fgs->sigmaColor));
+}
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+CV_EXPORTS_W
+Ptr<FastGlobalSmootherFilter> createFastGlobalSmootherFilter(InputArray guide, double lambda, double sigma_color, double lambda_attenuation, int num_iter)
+{
+    return Ptr<FastGlobalSmootherFilter>(FastGlobalSmootherFilterImpl::create(guide, lambda, sigma_color, num_iter, lambda_attenuation));
+}
+
+CV_EXPORTS_W
+void fastGlobalSmootherFilter(InputArray guide, InputArray src, OutputArray dst, double lambda, double sigma_color, double lambda_attenuation, int num_iter)
+{
+    Ptr<FastGlobalSmootherFilter> fgs = createFastGlobalSmootherFilter(guide, lambda, sigma_color, lambda_attenuation, num_iter);
+    fgs->filter(src, dst);
+}
+
+}
+}
--- a/modules/ximgproc/src/wls_filter.cpp
+++ b/modules/ximgproc/src/wls_filter.cpp
@ -1,284 +0,0 @@
-/*
- *  By downloading, copying, installing or using the software you agree to this license.
- *  If you do not agree to this license, do not download, install,
- *  copy or use the software.
- *  
- *  
- *  License Agreement
- *  For Open Source Computer Vision Library
- *  (3 - clause BSD License)
- *  
- *  Redistribution and use in source and binary forms, with or without modification,
- *  are permitted provided that the following conditions are met :
- *  
- *  *Redistributions of source code must retain the above copyright notice,
- *  this list of conditions and the following disclaimer.
- *  
- *  * Redistributions in binary form must reproduce the above copyright notice,
- *  this list of conditions and the following disclaimer in the documentation
- *  and / or other materials provided with the distribution.
- *  
- *  * Neither the names of the copyright holders nor the names of the contributors
- *  may be used to endorse or promote products derived from this software
- *  without specific prior written permission.
- *  
- *  This software is provided by the copyright holders and contributors "as is" and
- *  any express or implied warranties, including, but not limited to, the implied
- *  warranties of merchantability and fitness for a particular purpose are disclaimed.
- *  In no event shall copyright holders or contributors be liable for any direct,
- *  indirect, incidental, special, exemplary, or consequential damages
- *  (including, but not limited to, procurement of substitute goods or services;
- *  loss of use, data, or profits; or business interruption) however caused
- *  and on any theory of liability, whether in contract, strict liability,
- *  or tort(including negligence or otherwise) arising in any way out of
- *  the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "precomp.hpp"
-
-namespace cv {
-namespace ximgproc {
-
-class WeightedLeastSquaresFilterImpl : public WeightedLeastSquaresFilter
-{
-public:
-    static Ptr<WeightedLeastSquaresFilterImpl> create(InputArray guide, double lambda, double sigma_color, int num_iter);
-    void filter(InputArray src, OutputArray dst);
-    ~WeightedLeastSquaresFilterImpl();
-protected:
-    int w,h;
-    double sigmaColor,lambda;
-    int num_iter;
-    double *weights_LUT;
-    double *Ahor,  *Bhor,  *Chor, 
-            *Avert, *Bvert, *Cvert;
-    double *interD,*interE,*cur_res;
-    void init(InputArray guide,double _lambda,double _sigmaColor,int _num_iter);
-    void buildCoefMatrices(Mat& guide);
-    void horizontalPass(double* cur);
-    void verticalPass(double* cur);
-};
-
-void WeightedLeastSquaresFilterImpl::init(InputArray guide,double _lambda,double _sigmaColor,int _num_iter)
-{
-    //currently support only 3 channel 8bit images as guides
-    CV_Assert( !guide.empty() && _lambda >= 0 && _sigmaColor >= 0 && _num_iter >=1 );
-    CV_Assert( guide.depth() == CV_8U && guide.channels() == 3 );
-    sigmaColor = _sigmaColor;
-    lambda = _lambda;
-    num_iter = _num_iter;
-    int num_levels = 3*256*256;
-    weights_LUT = new double[num_levels];
-    for(int i=0;i<num_levels;i++)
-        weights_LUT[i] = exp(-sqrt((double)i)/sigmaColor);
-    w = guide.cols();
-    h = guide.rows();
-    int sz = w*h;
-    Ahor  = new double[sz];Bhor  = new double[sz];Chor  = new double[sz];
-    Avert = new double[sz];Bvert = new double[sz];Cvert = new double[sz];
-    interD = new double[sz];interE = new double[sz];cur_res = new double[sz];
-    Mat guideMat = guide.getMat();
-    buildCoefMatrices(guideMat);
-}
-
-Ptr<WeightedLeastSquaresFilterImpl> WeightedLeastSquaresFilterImpl::create(InputArray guide, double lambda, double sigma_color, int num_iter)
-{
-    WeightedLeastSquaresFilterImpl *wls = new WeightedLeastSquaresFilterImpl();
-    wls->init(guide,lambda,sigma_color,num_iter);
-    return Ptr<WeightedLeastSquaresFilterImpl>(wls);
-}
-
-WeightedLeastSquaresFilter::~WeightedLeastSquaresFilter(){}
-WeightedLeastSquaresFilterImpl::~WeightedLeastSquaresFilterImpl()
-{
-    delete[] weights_LUT;
-    delete[] Ahor;  delete[] Bhor;  delete[] Chor;
-    delete[] Avert; delete[] Bvert; delete[] Cvert;
-    delete[] interD;delete[] interE;delete[] cur_res;
-}
-
-void WeightedLeastSquaresFilterImpl::buildCoefMatrices(Mat& guide)
-{
-    double hor_weight;
-    const unsigned char *row,*row_prev,*row_next;
-    for(int i=0;i<h;i++)
-    {
-        //compute horizontal coefs:
-        row = guide.ptr(i);
-        Ahor[i*w] = 0;
-        hor_weight = weights_LUT[ (row[0]-row[3])*(row[0]-row[3])+
-                                  (row[1]-row[4])*(row[1]-row[4])+
-                                  (row[2]-row[5])*(row[2]-row[5]) ];
-        Chor[i*w] = -lambda*hor_weight;
-        Bhor[i*w] = 1 - Ahor[i*w] - Chor[i*w];
-        row+=3;
-        for(int j=1;j<w-1;j++)
-        {
-            Ahor[i*w+j] = -lambda*hor_weight;
-            hor_weight = weights_LUT[ (row[0]-row[3])*(row[0]-row[3])+
-                                      (row[1]-row[4])*(row[1]-row[4])+
-                                      (row[2]-row[5])*(row[2]-row[5]) ];
-            Chor[i*w+j] = -lambda*hor_weight;
-            Bhor[i*w+j] = 1 - Ahor[i*w+j] - Chor[i*w+j];
-            row+=3;
-        }
-        Ahor[i*w+w-1] = -lambda*hor_weight;
-        Chor[i*w+w-1] = 0;
-        Bhor[i*w+w-1] = 1 - Ahor[i*w+w-1] - Chor[i*w+w-1];
-
-        //compute vertical coefs:
-        row = guide.ptr(i);
-        if(i==0)
-        {
-            row_next = guide.ptr(i+1);
-            for(int j=0;j<w;j++)
-            {
-                Avert[i*w+j] = 0;
-                Cvert[i*w+j] = -lambda*weights_LUT[ (row[0]-row_next[0])*(row[0]-row_next[0])+
-                                                    (row[1]-row_next[1])*(row[1]-row_next[1])+
-                                                    (row[2]-row_next[2])*(row[2]-row_next[2]) ];
-                Bvert[i*w+j] = 1 - Avert[i*w+j] - Cvert[i*w+j];
-                row+=3;
-                row_next+=3;
-            }
-        }
-        else if(i==h-1)
-        {
-            row_prev = guide.ptr(i-1);
-            for(int j=0;j<w;j++)
-            {
-                Avert[i*w+j] = -lambda*weights_LUT[ (row[0]-row_prev[0])*(row[0]-row_prev[0])+
-                                                    (row[1]-row_prev[1])*(row[1]-row_prev[1])+
-                                                    (row[2]-row_prev[2])*(row[2]-row_prev[2]) ];
-                Cvert[i*w+j] = 0;
-                Bvert[i*w+j] = 1 - Avert[i*w+j] - Cvert[i*w+j];
-                row+=3;
-                row_prev+=3;
-            }
-        }
-        else
-        {
-            row_prev = guide.ptr(i-1);
-            row_next = guide.ptr(i+1);
-            for(int j=0;j<w;j++)
-            {
-                Avert[i*w+j] = -lambda*weights_LUT[ (row[0]-row_prev[0])*(row[0]-row_prev[0])+
-                                                    (row[1]-row_prev[1])*(row[1]-row_prev[1])+
-                                                    (row[2]-row_prev[2])*(row[2]-row_prev[2]) ];
-                Cvert[i*w+j] = -lambda*weights_LUT[ (row[0]-row_next[0])*(row[0]-row_next[0])+
-                    (row[1]-row_next[1])*(row[1]-row_next[1])+
-                    (row[2]-row_next[2])*(row[2]-row_next[2]) ];
-                Bvert[i*w+j] = 1 - Avert[i*w+j] - Cvert[i*w+j];
-                row+=3;
-                row_prev+=3;
-                row_next+=3;
-            }
-        }
-    }
-}
-
-void WeightedLeastSquaresFilterImpl::filter(InputArray src, OutputArray dst)
-{
-    //temporarily support only one-channel CV_16S src type (for disparity map filtering)
-    CV_Assert(!src.empty() && (src.depth() == CV_16S) && src.channels()==1);
-    if (src.rows() != h || src.cols() != w)
-    {
-        CV_Error(Error::StsBadSize, "Size of filtering image must be equal to size of guide image");
-        return;
-    }
-
-    Mat srcMat = src.getMat(); 
-    Mat& dstMat = dst.getMatRef();
-    short* row;
-    for(int i=0;i<h;i++)
-    {
-        row = (short*)srcMat.ptr(i);
-        for(int j=0;j<w;j++)
-        {
-            cur_res[i*w+j] = (double)(*row);
-            row++;
-        }
-    }
-
-    for(int n=0;n<num_iter;n++)
-    {
-        horizontalPass(cur_res);
-        verticalPass(cur_res);
-    }
-
-    for(int i=0;i<h;i++)
-    {
-        row = (short*)dstMat.ptr(i);
-        for(int j=0;j<w;j++)
-        {
-            *row = saturate_cast<short>(cur_res[i*w+j]);
-            row++;
-        }
-    }
-}
-
-void WeightedLeastSquaresFilterImpl::horizontalPass(double* cur)
-{
-    double denom;
-    for(int i=0;i<h;i++)
-    {
-        //forward pass:
-        interD[i*w] = Chor[i*w]/Bhor[i*w];
-        interE[i*w] = cur[i*w] /Bhor[i*w];
-        for(int j=1;j<w;j++)
-        {
-            denom = Bhor[i*w+j]-interD[i*w+j-1]*Ahor[i*w+j];
-            interD[i*w+j] = Chor[i*w+j]/denom;
-            interE[i*w+j] = (cur[i*w+j]-interE[i*w+j-1]*Ahor[i*w+j])/denom;
-        }
-
-        //backward pass:
-        cur[i*w+w-1] = interE[i*w+w-1];
-        for(int j=w-2;j>=0;j--)
-            cur[i*w+j] = interE[i*w+j]-interD[i*w+j]*cur[i*w+j+1];
-    }
-}
-
-void WeightedLeastSquaresFilterImpl::verticalPass(double* cur)
-{
-    double denom;
-    //forward pass:
-    for(int j=0;j<w;j++)
-    {
-        interD[j] = Cvert[j]/Bvert[j];
-        interE[j] = cur[j]/Bvert[j];
-    }
-    for(int i=1;i<h;i++)
-    {
-        for(int j=0;j<w;j++)
-        {
-            denom = Bvert[i*w+j]-interD[(i-1)*w+j]*Avert[i*w+j];
-            interD[i*w+j] = Cvert[i*w+j]/denom;
-            interE[i*w+j] = (cur[i*w+j]-interE[(i-1)*w+j]*Avert[i*w+j])/denom;
-        }
-    }
-    //backward pass:
-    for(int j=0;j<w;j++)
-        cur[(h-1)*w+j] = interE[(h-1)*w+j];
-    for(int i=h-2;i>=0;i--)
-        for(int j=w-1;j>=0;j--)
-            cur[i*w+j] = interE[i*w+j]-interD[i*w+j]*cur[(i+1)*w+j];
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////
-
-CV_EXPORTS_W
-Ptr<WeightedLeastSquaresFilter> createWeightedLeastSquaresFilter(InputArray guide, double lambda, double sigma_color, int num_iter)
-{
-    return Ptr<WeightedLeastSquaresFilter>(WeightedLeastSquaresFilterImpl::create(guide, lambda, sigma_color, num_iter));
-}
-
-CV_EXPORTS_W
-void weightedLeastSquaresFilter(InputArray guide, InputArray src, OutputArray dst, double lambda, double sigma_color, int num_iter)
-{
-    Ptr<WeightedLeastSquaresFilter> wls = createWeightedLeastSquaresFilter(guide, lambda, sigma_color, num_iter);
-    wls->filter(src, dst);
-}
-
-}
-}
--- a/modules/ximgproc/test/test_disparity_wls_filter.cpp
+++ b/modules/ximgproc/test/test_disparity_wls_filter.cpp
@ -0,0 +1,154 @@
+/*
+ *  By downloading, copying, installing or using the software you agree to this license.
+ *  If you do not agree to this license, do not download, install,
+ *  copy or use the software.
+ *
+ *
+ *  License Agreement
+ *  For Open Source Computer Vision Library
+ *  (3 - clause BSD License)
+ *
+ *  Redistribution and use in source and binary forms, with or without modification,
+ *  are permitted provided that the following conditions are met :
+ *
+ *  *Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *  this list of conditions and the following disclaimer in the documentation
+ *  and / or other materials provided with the distribution.
+ *
+ *  * Neither the names of the copyright holders nor the names of the contributors
+ *  may be used to endorse or promote products derived from this software
+ *  without specific prior written permission.
+ *
+ *  This software is provided by the copyright holders and contributors "as is" and
+ *  any express or implied warranties, including, but not limited to, the implied
+ *  warranties of merchantability and fitness for a particular purpose are disclaimed.
+ *  In no event shall copyright holders or contributors be liable for any direct,
+ *  indirect, incidental, special, exemplary, or consequential damages
+ *  (including, but not limited to, procurement of substitute goods or services;
+ *  loss of use, data, or profits; or business interruption) however caused
+ *  and on any theory of liability, whether in contract, strict liability,
+ *  or tort(including negligence or otherwise) arising in any way out of
+ *  the use of this software, even if advised of the possibility of such damage.
+ */
+
+#include "test_precomp.hpp"
+#include "opencv2/ximgproc/disparity_filter.hpp"
+
+namespace cvtest
+{
+
+using namespace std;
+using namespace std::tr1;
+using namespace testing;
+using namespace perf;
+using namespace cv;
+using namespace cv::ximgproc;
+
+static string getDataDir()
+{
+    return cvtest::TS::ptr()->get_data_path();
+}
+
+CV_ENUM(SrcTypes, CV_16S);
+CV_ENUM(GuideTypes, CV_8UC1, CV_8UC3)
+typedef tuple<Size, SrcTypes, GuideTypes, bool, bool> DisparityWLSParams;
+typedef TestWithParam<DisparityWLSParams> DisparityWLSFilterTest;
+
+TEST(DisparityWLSFilterTest, ReferenceAccuracy)
+{
+    string dir = getDataDir() + "cv/disparityfilter";
+
+    Mat left = imread(dir + "/left_view.png",IMREAD_COLOR);
+    ASSERT_FALSE(left.empty());
+    Mat left_disp  = imread(dir + "/disparity_left_raw.png",IMREAD_GRAYSCALE);
+    ASSERT_FALSE(left_disp.empty());
+    left_disp.convertTo(left_disp,CV_16S,16);
+    Mat right_disp = imread(dir + "/disparity_right_raw.png",IMREAD_GRAYSCALE);
+    ASSERT_FALSE(right_disp.empty());
+    right_disp.convertTo(right_disp,CV_16S,-16);
+
+    Mat GT;
+    ASSERT_FALSE(readGT(dir + "/GT.png",GT));
+
+    FileStorage ROI_storage( dir + "/ROI.xml", FileStorage::READ );
+    Rect ROI((int)ROI_storage["x"],(int)ROI_storage["y"],(int)ROI_storage["width"],(int)ROI_storage["height"]);
+
+    FileStorage reference_res( dir + "/reference_accuracy.xml", FileStorage::READ );
+    double ref_MSE = (double)reference_res["MSE_after"];
+    double ref_BadPercent = (double)reference_res["BadPercent_after"];
+
+    cv::setNumThreads(cv::getNumberOfCPUs());
+    Mat res;
+
+    Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(true);
+    wls_filter->setLambda(8000.0);
+    wls_filter->setSigmaColor(0.5);
+    wls_filter->filter(left_disp,left,res,ROI,right_disp);
+
+    double MSE = computeMSE(GT,res,ROI);
+    double BadPercent = computeBadPixelPercent(GT,res,ROI);
+    double eps = 0.01;
+
+    EXPECT_LE(MSE,ref_MSE+eps*ref_MSE);
+    EXPECT_LE(BadPercent,ref_BadPercent+eps*ref_BadPercent);
+}
+
+TEST_P(DisparityWLSFilterTest, MultiThreadReproducibility)
+{
+    if (cv::getNumberOfCPUs() == 1)
+        return;
+
+    double MAX_DIF = 1.0;
+    double MAX_MEAN_DIF = 1.0 / 256.0;
+    int loopsCount = 2;
+    RNG rng(0);
+
+    DisparityWLSParams params = GetParam();
+    Size size          = get<0>(params);
+    int srcType        = get<1>(params);
+    int guideType      = get<2>(params);
+    bool use_conf      = get<3>(params);
+    bool use_downscale = get<4>(params);
+
+    Mat left(size, guideType);
+    randu(left, 0, 255);
+    Mat left_disp(size,srcType);
+    int max_disp = (int)(size.width*0.1);
+    randu(left_disp, 0, max_disp-1);
+    Mat right_disp(size,srcType);
+    randu(left_disp, -max_disp+1, 0);
+    Rect ROI(max_disp,0,size.width-max_disp,size.height);
+
+    if(use_downscale)
+    {
+        resize(left_disp,left_disp,Size(),0.5,0.5);
+        resize(right_disp,right_disp,Size(),0.5,0.5);
+        ROI = Rect(ROI.x/2,ROI.y/2,ROI.width/2,ROI.height/2);
+    }
+
+    for (int iter = 0; iter <= loopsCount; iter++)
+    {
+        double lambda = rng.uniform(100.0, 10000.0);
+        double sigma  = rng.uniform(1.0, 100.0);
+
+        Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(use_conf);
+        wls_filter->setLambda(lambda);
+        wls_filter->setSigmaColor(sigma);
+
+        cv::setNumThreads(cv::getNumberOfCPUs());
+        Mat resMultiThread;
+        wls_filter->filter(left_disp,left,resMultiThread,ROI,right_disp);
+
+        cv::setNumThreads(1);
+        Mat resSingleThread;
+        wls_filter->filter(left_disp,left,resSingleThread,ROI,right_disp);
+
+        EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF);
+        EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF*left.total());
+    }
+}
+INSTANTIATE_TEST_CASE_P(FullSet,DisparityWLSFilterTest,Combine(Values(szODD, szQVGA), SrcTypes::all(), GuideTypes::all(),Values(true,false),Values(true,false)));
+}
--- a/modules/ximgproc/test/test_fgs_filter.cpp
+++ b/modules/ximgproc/test/test_fgs_filter.cpp
@ -0,0 +1,153 @@
+/*
+ *  By downloading, copying, installing or using the software you agree to this license.
+ *  If you do not agree to this license, do not download, install,
+ *  copy or use the software.
+ *
+ *
+ *  License Agreement
+ *  For Open Source Computer Vision Library
+ *  (3 - clause BSD License)
+ *
+ *  Redistribution and use in source and binary forms, with or without modification,
+ *  are permitted provided that the following conditions are met :
+ *
+ *  *Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *  this list of conditions and the following disclaimer in the documentation
+ *  and / or other materials provided with the distribution.
+ *
+ *  * Neither the names of the copyright holders nor the names of the contributors
+ *  may be used to endorse or promote products derived from this software
+ *  without specific prior written permission.
+ *
+ *  This software is provided by the copyright holders and contributors "as is" and
+ *  any express or implied warranties, including, but not limited to, the implied
+ *  warranties of merchantability and fitness for a particular purpose are disclaimed.
+ *  In no event shall copyright holders or contributors be liable for any direct,
+ *  indirect, incidental, special, exemplary, or consequential damages
+ *  (including, but not limited to, procurement of substitute goods or services;
+ *  loss of use, data, or profits; or business interruption) however caused
+ *  and on any theory of liability, whether in contract, strict liability,
+ *  or tort(including negligence or otherwise) arising in any way out of
+ *  the use of this software, even if advised of the possibility of such damage.
+ */
+
+#include "test_precomp.hpp"
+
+namespace cvtest
+{
+
+using namespace std;
+using namespace std::tr1;
+using namespace testing;
+using namespace perf;
+using namespace cv;
+using namespace cv::ximgproc;
+
+static string getDataDir()
+{
+    return cvtest::TS::ptr()->get_data_path();
+}
+
+CV_ENUM(SrcTypes, CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4, CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4, CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4);
+CV_ENUM(GuideTypes, CV_8UC1, CV_8UC3)
+typedef tuple<Size, SrcTypes, GuideTypes> FGSParams;
+typedef TestWithParam<FGSParams> FastGlobalSmootherTest;
+
+TEST(FastGlobalSmootherTest, SplatSurfaceAccuracy)
+{
+    RNG rnd(0);
+
+    for (int i = 0; i < 10; i++)
+    {
+        Size sz(rnd.uniform(512, 1024), rnd.uniform(512, 1024));
+
+        int guideCn = rnd.uniform(1, 2);
+        if(guideCn==2) guideCn++; //1 or 3 channels
+        Mat guide(sz, CV_MAKE_TYPE(CV_8U, guideCn));
+        randu(guide, 0, 255);
+
+        Scalar surfaceValue;
+        int srcCn = rnd.uniform(1, 4);
+        rnd.fill(surfaceValue, RNG::UNIFORM, 0, 255);
+        Mat src(sz, CV_MAKE_TYPE(CV_16S, srcCn), surfaceValue);
+
+        double lambda = rnd.uniform(100, 10000);
+        double sigma  = rnd.uniform(1.0, 100.0);
+
+        Mat res;
+        fastGlobalSmootherFilter(guide, src, res, lambda, sigma);
+
+        // When filtering a constant image we should get the same image:
+        double normL1 = cvtest::norm(src, res, NORM_L1)/src.total()/src.channels();
+        EXPECT_LE(normL1, 1.0/64);
+    }
+}
+
+TEST(FastGlobalSmootherTest, ReferenceAccuracy)
+{
+    string dir = getDataDir() + "cv/edgefilter";
+
+    Mat src = imread(dir + "/kodim23.png");
+    Mat ref = imread(dir + "/fgs/kodim23_lambda=1000_sigma=10.png");
+
+    ASSERT_FALSE(src.empty());
+    ASSERT_FALSE(ref.empty());
+
+    cv::setNumThreads(cv::getNumberOfCPUs());
+    Mat res;
+    fastGlobalSmootherFilter(src,src,res,1000.0,10.0);
+
+    double totalMaxError = 1.0/64.0*src.total()*src.channels();
+
+    EXPECT_LE(cvtest::norm(res, ref, NORM_L2), totalMaxError);
+    EXPECT_LE(cvtest::norm(res, ref, NORM_INF), 1);
+}
+
+TEST_P(FastGlobalSmootherTest, MultiThreadReproducibility)
+{
+    if (cv::getNumberOfCPUs() == 1)
+        return;
+
+    double MAX_DIF = 1.0;
+    double MAX_MEAN_DIF = 1.0 / 64.0;
+    int loopsCount = 2;
+    RNG rng(0);
+
+    FGSParams params = GetParam();
+    Size size     = get<0>(params);
+    int srcType   = get<1>(params);
+    int guideType = get<2>(params);
+
+    Mat guide(size, guideType);
+    randu(guide, 0, 255);
+    Mat src(size,srcType);
+    if(src.depth()==CV_8U)
+        randu(src, 0, 255);
+    else if(src.depth()==CV_16S)
+        randu(src, -32767, 32767);
+    else
+        randu(src, -100000.0f, 100000.0f);
+
+    for (int iter = 0; iter <= loopsCount; iter++)
+    {
+        double lambda = rng.uniform(100.0, 10000.0);
+        double sigma  = rng.uniform(1.0, 100.0);
+
+        cv::setNumThreads(cv::getNumberOfCPUs());
+        Mat resMultiThread;
+        fastGlobalSmootherFilter(guide, src, resMultiThread, lambda, sigma);
+
+        cv::setNumThreads(1);
+        Mat resSingleThread;
+        fastGlobalSmootherFilter(guide, src, resSingleThread, lambda, sigma);
+
+        EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF);
+        EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF*src.total()*src.channels());
+    }
+}
+INSTANTIATE_TEST_CASE_P(FullSet, FastGlobalSmootherTest,Combine(Values(szODD, szQVGA), SrcTypes::all(), GuideTypes::all()));
+
+}
--- a/modules/ximgproc/testdata/disparityfilter/GT.png
+++ b/modules/ximgproc/testdata/disparityfilter/GT.png
--- a/modules/ximgproc/testdata/disparityfilter/ROI.xml
+++ b/modules/ximgproc/testdata/disparityfilter/ROI.xml
@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<opencv_storage>
+<x>166</x>
+<y>7</y>
+<width>851</width>
+<height>422</height>
+</opencv_storage>
--- a/modules/ximgproc/testdata/disparityfilter/disparity_left_raw.png
+++ b/modules/ximgproc/testdata/disparityfilter/disparity_left_raw.png
--- a/modules/ximgproc/testdata/disparityfilter/disparity_right_raw.png
+++ b/modules/ximgproc/testdata/disparityfilter/disparity_right_raw.png
--- a/modules/ximgproc/testdata/disparityfilter/left_view.png
+++ b/modules/ximgproc/testdata/disparityfilter/left_view.png
--- a/modules/ximgproc/testdata/disparityfilter/reference_accuracy.xml
+++ b/modules/ximgproc/testdata/disparityfilter/reference_accuracy.xml
@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<opencv_storage>
+<MSE_before>5043.6495</MSE_before>
+<MSE_after>128.3773</MSE_after>
+<BadPercent_before>48.5417</BadPercent_before>
+<BadPercent_after>45.8749</BadPercent_after>
+</opencv_storage>
--- a/modules/ximgproc/testdata/edgefilter/fgs/kodim23_lambda=1000_sigma=10.png
+++ b/modules/ximgproc/testdata/edgefilter/fgs/kodim23_lambda=1000_sigma=10.png