Merge branch 'master' of https://github.com/Itseez/opencv_contrib

10 years ago · b2f4ba30d1
parent c1f44104d0 172fdb3152
commit b2f4ba30d1
27 changed files with 1093 additions and 109 deletions
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@ -69,6 +69,9 @@ public:
    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=0) = 0;
 };

 /** @brief OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++.
@ -106,6 +109,10 @@ public:
                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0);

+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=0);
+
    /** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract.

    @param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
@ -170,11 +177,11 @@ public:
 public:
    /** @brief Recognize text using HMM.

-    Takes image on input and returns recognized text in the output_text parameter. Optionally
+    Takes binary image on input and returns recognized text in the output_text parameter. Optionally
    provides also the Rects for individual text elements found (e.g. words), and the list of those
    text elements with their confidence values.

-    @param image Input image CV_8UC1 with a single text line (or word).
+    @param image Input binary image CV_8UC1 with a single text line (or word).

    @param output_text Output text. Most likely character sequence found by the HMM decoder.

@ -193,6 +200,33 @@ public:
                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0);

+    /** @brief Recognize text using HMM.
+
+    Takes an image and a mask (where each connected component corresponds to a segmented character)
+    on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.
+
+    @param image Input image CV_8UC1 or CV_8UC3 with a single text line (or word).
+    @param mask Input binary image CV_8UC1 same size as input image. Each connected component in mask corresponds to a segmented character in the input image.
+
+    @param output_text Output text. Most likely character sequence found by the HMM decoder.
+
+    @param component_rects If provided the method will output a list of Rects for the individual
+    text elements found (e.g. words).
+
+    @param component_texts If provided the method will output a list of text strings for the
+    recognition of individual text elements found (e.g. words).
+
+    @param component_confidences If provided the method will output a list of confidence values
+    for the recognition of individual text elements found (e.g. words).
+
+    @param component_level Only OCR_LEVEL_WORD is supported.
+     */
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=0);
+
    /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.

    @param classifier The character classifier with built in feature extractor.
@ -231,7 +265,7 @@ protected:

@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml)

-The default classifier is based in the scene text recognition method proposed by Lukás Neumann &
+The KNN default classifier is based in the scene text recognition method proposed by Lukás Neumann &
 Jiri Matas in [Neumann11b]. Basically, the region (contour) in the input image is normalized to a
 fixed size, while retaining the centroid and aspect ratio, in order to extract a feature vector
 based on gradient orientations along the chain-code of its perimeter. Then, the region is classified
@ -240,19 +274,32 @@ types.
 */
 CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);

-/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
+/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.

-@param vocabulary The language vocabulary (chars when ascii english text).
+@param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz)

-@param lexicon The list of words that are expected to be found in a particular image.
+The CNN default classifier is based in the scene text recognition method proposed by Adam Coates &
+Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Convolutional Neural Network and
+a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
+at each window location.
+ */
+CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename);

-@param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
+//! @}

-The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
-@note
-   -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
- */
+/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
+ *
+ * @param vocabulary The language vocabulary (chars when ascii english text).
+ *
+ * @param lexicon The list of words that are expected to be found in a particular image.
+ *
+ * @param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
+ *
+ * The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
+ * @note
+ *    -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
+ *            <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
+ *             */
 CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);


@ -319,6 +366,10 @@ public:
                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0);

+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=0);
+
    /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder.

    @param classifier The character classifier with built in feature extractor.
@ -359,10 +410,10 @@ protected:

 /** @brief Allow to implicitly load the default character classifier when creating an OCRBeamSearchDecoder object.

-@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml)
+@param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz)

-The default classifier is based in the scene text recognition method proposed by Adam Coates &
-Andrew NG in [Coates11a]. The character classifier sonsists in a Single Layer Convolutional Neural Network and
+The CNN default classifier is based in the scene text recognition method proposed by Adam Coates &
+Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Convolutional Neural Network and
 a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
 at each window location.
 */
--- a/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz
+++ b/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz
--- a/modules/text/samples/cropped_word_recognition.cpp
+++ b/modules/text/samples/cropped_word_recognition.cpp
@ -36,7 +36,7 @@ int main(int argc, char* argv[])
        return(0);
    }

-    string vocabulary = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyx0123456789"; // must have the same order as the clasifier output classes
+    string vocabulary = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; // must have the same order as the clasifier output classes
    vector<string> lexicon;  // a list of words expected to be found on the input image
    lexicon.push_back(string("abb"));
    lexicon.push_back(string("patata"));
--- a/modules/text/samples/scenetext_segmented_word01.jpg
+++ b/modules/text/samples/scenetext_segmented_word01.jpg
--- a/modules/text/samples/scenetext_segmented_word01_mask.png
+++ b/modules/text/samples/scenetext_segmented_word01_mask.png
--- a/modules/text/samples/scenetext_segmented_word02.jpg
+++ b/modules/text/samples/scenetext_segmented_word02.jpg
--- a/modules/text/samples/scenetext_segmented_word02_mask.png
+++ b/modules/text/samples/scenetext_segmented_word02_mask.png
--- a/modules/text/samples/scenetext_segmented_word03.jpg
+++ b/modules/text/samples/scenetext_segmented_word03.jpg
--- a/modules/text/samples/scenetext_segmented_word03_mask.png
+++ b/modules/text/samples/scenetext_segmented_word03_mask.png
--- a/modules/text/samples/scenetext_segmented_word04.jpg
+++ b/modules/text/samples/scenetext_segmented_word04.jpg
--- a/modules/text/samples/scenetext_segmented_word04_mask.png
+++ b/modules/text/samples/scenetext_segmented_word04_mask.png
--- a/modules/text/samples/scenetext_segmented_word05.jpg
+++ b/modules/text/samples/scenetext_segmented_word05.jpg
--- a/modules/text/samples/scenetext_segmented_word05_mask.png
+++ b/modules/text/samples/scenetext_segmented_word05_mask.png
--- a/modules/text/samples/segmented_word_recognition.cpp
+++ b/modules/text/samples/segmented_word_recognition.cpp
@ -0,0 +1,116 @@
+/*
+ * segmented_word_recognition.cpp
+ *
+ * A demo program on segmented word recognition.
+ * Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.
+ *
+ * Created on: Jul 31, 2015
+ *     Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
+ */
+
+#include "opencv2/text.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+
+#include <iostream>
+
+using namespace std;
+using namespace cv;
+using namespace text;
+
+
+int main(int argc, char* argv[]) {
+
+    const String keys =
+      "{help h usage ? |      | print this message.}"
+      "{@image         |      | source image for recognition.}"
+      "{@mask          |      | binary segmentation mask where each contour is a character.}"
+      "{lexicon lex l  |      | (optional) lexicon provided as a list of comma separated words.}"
+      ;
+    CommandLineParser parser(argc, argv, keys);
+
+    parser.about("\nSegmented word recognition.\nA demo program on segmented word recognition. Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.\n");
+
+    String filename1 = parser.get<String>(0);
+    String filename2 = parser.get<String>(1);
+
+    parser.printMessage();
+    cout << endl << endl;
+    if ((parser.has("help")) || (filename1.size()==0))
+    {
+        return 0;
+    }
+    if (!parser.check())
+    {
+        parser.printErrors();
+        return 0;
+    }
+
+    Mat image = imread(filename1);
+    Mat mask;
+    if (filename2.size() > 0)
+      mask = imread(filename2);
+    else
+      image.copyTo(mask);
+
+    // be sure the mask is a binry image
+    cvtColor(mask, mask, COLOR_BGR2GRAY);
+    threshold(mask, mask, 128., 255, THRESH_BINARY);
+
+    // character recognition vocabulary
+    string voc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+    // Emission probabilities for the HMM language model (identity matrix by default)
+    Mat emissionProbabilities = Mat::eye((int)voc.size(), (int)voc.size(), CV_64FC1);
+    // Bigram transition probabilities for the HMM language model
+    Mat transitionProbabilities;
+
+    string lex = parser.get<string>("lex");
+    if (lex.size()>0)
+    {
+        // Build tailored language model for the provided lexicon
+        vector<string> lexicon;
+        size_t pos = 0;
+        string delimiter = ",";
+        std::string token;
+        while ((pos = lex.find(delimiter)) != std::string::npos) {
+            token = lex.substr(0, pos);
+            lexicon.push_back(token);
+            lex.erase(0, pos + delimiter.length());
+        }
+        lexicon.push_back(lex);
+        createOCRHMMTransitionsTable(voc,lexicon,transitionProbabilities);
+    } else {
+        // Or load the generic language model (from Aspell English dictionary)
+        FileStorage fs("./OCRHMM_transitions_table.xml", FileStorage::READ);
+        fs["transition_probabilities"] >> transitionProbabilities;
+        fs.release();
+    }
+
+    Ptr<OCRTesseract>  ocrTes = OCRTesseract::create();
+
+    Ptr<OCRHMMDecoder> ocrNM  = OCRHMMDecoder::create(
+                                 loadOCRHMMClassifierNM("./OCRHMM_knn_model_data.xml.gz"),
+                                 voc, transitionProbabilities, emissionProbabilities);
+
+    Ptr<OCRHMMDecoder> ocrCNN = OCRHMMDecoder::create(
+                                 loadOCRHMMClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"),
+                                 voc, transitionProbabilities, emissionProbabilities);
+
+    std::string output;
+    double t_r = (double)getTickCount();
+    ocrTes->run(mask, output);
+    output.erase(remove(output.begin(), output.end(), '\n'), output.end());
+    cout << " OCR_Tesseract  output \"" << output << "\". Done in "
+         << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
+
+    t_r = (double)getTickCount();
+    ocrNM->run(mask, output);
+    cout << " OCR_NM         output \"" << output << "\". Done in "
+         << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
+
+    t_r = (double)getTickCount();
+    ocrCNN->run(image, mask, output);
+    cout << " OCR_CNN        output \"" << output << "\". Done in "
+         << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
+}
--- a/modules/text/src/ocr_beamsearch_decoder.cpp
+++ b/modules/text/src/ocr_beamsearch_decoder.cpp
@ -73,6 +73,22 @@ void OCRBeamSearchDecoder::run(Mat& image, string& output_text, vector<Rect>* co
        component_confidences->clear();
 }

+void OCRBeamSearchDecoder::run(Mat& image, Mat& mask, string& output_text, vector<Rect>* component_rects,
+                               vector<string>* component_texts, vector<float>* component_confidences,
+                               int component_level)
+{
+    CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );
+    CV_Assert( mask.type() == CV_8UC1 );
+    CV_Assert( (component_level == OCR_LEVEL_TEXTLINE) || (component_level == OCR_LEVEL_WORD) );
+    output_text.clear();
+    if (component_rects != NULL)
+        component_rects->clear();
+    if (component_texts != NULL)
+        component_texts->clear();
+    if (component_confidences != NULL)
+        component_confidences->clear();
+}
+

 void OCRBeamSearchDecoder::ClassifierCallback::eval( InputArray image, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation)
 {
@ -136,7 +152,7 @@ public:
        if (component_confidences != NULL)
            component_confidences->clear();

-        // TODO split a line into words
+        // TODO We must split a line into words or specify we only work with words

        if(src.type() == CV_8UC3)
        {
@ -174,14 +190,7 @@ public:
        }


-        //TODO it would be interesting to have a hash table with a vector of booleans
-        // but this is not possible when we have a large number of possible segmentations.
-        //vector<bool> visited_nodes(pow(2,oversegmentation.size()),false); // hash table for visited nodes
-        // options are using std::set<unsigned long long int> to store only the keys of visited nodes
-        // but will deteriorate the time performance.
        set<unsigned long long int> visited_nodes; //TODO make it member of class
-        // it is also possible to reduce the number of seg. points in some way (e.g. use only seg.points
-        // for which there is a change on the class prediction)

        vector<int> start_segmentation;
        start_segmentation.push_back(oversegmentation[0]);
@ -221,6 +230,21 @@ public:
        return;
    }

+    void run( Mat& src,
+              Mat& mask,
+              string& out_sequence,
+              vector<Rect>* component_rects,
+              vector<string>* component_texts,
+              vector<float>* component_confidences,
+              int component_level)
+    {
+
+        CV_Assert( mask.type() == CV_8UC1 );
+
+        // Nothing to do with a mask here. We do slidding window anyway.
+        run( src, out_sequence, component_rects, component_texts, component_confidences, component_level );
+    }
+
 private:

    ////////////////////////////////////////////////////////////
@ -421,22 +445,29 @@ OCRBeamSearchClassifierCNN::OCRBeamSearchClassifierCNN (const string& filename)
        fs["feature_min"] >> feature_min;
        fs["feature_max"] >> feature_max;
        fs.release();
-        // TODO check all matrix dimensions match correctly and no one is empty
    }
    else
        CV_Error(Error::StsBadArg, "Default classifier data file not found!");

+    // check all matrix dimensions match correctly and no one is empty
+    CV_Assert( (M.cols > 0) && (M.rows > 0) );
+    CV_Assert( (P.cols > 0) && (P.rows > 0) );
+    CV_Assert( (kernels.cols > 0) && (kernels.rows > 0) );
+    CV_Assert( (weights.cols > 0) && (weights.rows > 0) );
+    CV_Assert( (feature_min.cols > 0) && (feature_min.rows > 0) );
+    CV_Assert( (feature_max.cols > 0) && (feature_max.rows > 0) );
+
    nr_feature  = weights.rows;
    nr_class    = weights.cols;
-    // TODO some of this can be inferred from the input file (e.g. patch size must be sqrt(filters.cols))
-    step_size   = 4;
+    patch_size  = (int)sqrt(kernels.cols);
+    // algorithm internal parameters
    window_size = 32;
    quad_size   = 12;
-    patch_size  = 8;
    num_quads   = 25;
    num_tiles   = 25;
    alpha       = 0.5;

+    step_size   = 4; // TODO showld this be a parameter for the user?

 }

@ -459,7 +490,6 @@ void OCRBeamSearchClassifierCNN::eval( InputArray _src, vector< vector<double> >
        cvtColor(src,src,COLOR_RGB2GRAY);
    }

-    // TODO shall we resize the input image or make a copy ?
    resize(src,src,Size(window_size*src.cols/src.rows,window_size));

    int seg_points = 0;
@ -555,8 +585,8 @@ void OCRBeamSearchClassifierCNN::eval( InputArray _src, vector< vector<double> >
        double *p = new double[nr_class];
        double predict_label = eval_feature(feature,p);
        //cout << " Prediction: " << vocabulary[predict_label] << " with probability " << p[0] << endl;
-        if (predict_label < 0) // TODO use cvError
-            cout << "OCRBeamSearchClassifierCNN::eval Error: unexpected prediction in eval_feature()" << endl;
+        if (predict_label < 0)
+          CV_Error(Error::StsInternal, "OCRBeamSearchClassifierCNN::eval Error: unexpected prediction in eval_feature()");


        seg_points++;
--- a/modules/text/src/ocr_hmm_decoder.cpp
+++ b/modules/text/src/ocr_hmm_decoder.cpp
@ -74,6 +74,22 @@ void OCRHMMDecoder::run(Mat& image, string& output_text, vector<Rect>* component
        component_confidences->clear();
 }

+void OCRHMMDecoder::run(Mat& image, Mat& mask, string& output_text, vector<Rect>* component_rects,
+                        vector<string>* component_texts, vector<float>* component_confidences,
+                        int component_level)
+{
+    CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );
+    CV_Assert( mask.type() == CV_8UC1 );
+    CV_Assert( (component_level == OCR_LEVEL_TEXTLINE) || (component_level == OCR_LEVEL_WORD) );
+    output_text.clear();
+    if (component_rects != NULL)
+        component_rects->clear();
+    if (component_texts != NULL)
+        component_texts->clear();
+    if (component_confidences != NULL)
+        component_confidences->clear();
+}
+
 void OCRHMMDecoder::ClassifierCallback::eval( InputArray image, vector<int>& out_class, vector<double>& out_confidence)
 {
    CV_Assert(( image.getMat().type() == CV_8UC3 ) || ( image.getMat().type() == CV_8UC1 ));
@ -263,6 +279,264 @@ public:
                    obs.push_back(out_class[0]);
                observations.push_back(out_class);
                confidences.push_back(out_conf);
+                //cout << " out class = " << vocabulary[out_class[0]] << endl;
+            }
+
+
+            //This must be extracted from dictionary, or just assumed to be equal for all characters
+            vector<double> start_p(vocabulary.size());
+            for (int i=0; i<(int)vocabulary.size(); i++)
+                start_p[i] = 1.0/vocabulary.size();
+
+
+            Mat V = Mat::zeros((int)observations.size(),(int)vocabulary.size(),CV_64FC1);
+            vector<string> path(vocabulary.size());
+
+            // Initialize base cases (t == 0)
+            for (int i=0; i<(int)vocabulary.size(); i++)
+            {
+                for (int j=0; j<(int)observations[0].size(); j++)
+                {
+                    emission_p.at<double>(observations[0][j],obs[0]) = confidences[0][j];
+                }
+                V.at<double>(0,i) = start_p[i] * emission_p.at<double>(i,obs[0]);
+                path[i] = vocabulary.at(i);
+            }
+
+
+            // Run Viterbi for t > 0
+            for (int t=1; t<(int)obs.size(); t++)
+            {
+
+                //Dude this has to be done each time!!
+                emission_p = Mat::eye(62,62,CV_64FC1);
+                for (int e=0; e<(int)observations[t].size(); e++)
+                {
+                    emission_p.at<double>(observations[t][e],obs[t]) = confidences[t][e];
+                }
+
+                vector<string> newpath(vocabulary.size());
+
+                for (int i=0; i<(int)vocabulary.size(); i++)
+                {
+                    double max_prob = 0;
+                    int best_idx = 0;
+                    for (int j=0; j<(int)vocabulary.size(); j++)
+                    {
+                        double prob = V.at<double>(t-1,j) * transition_p.at<double>(j,i) * emission_p.at<double>(i,obs[t]);
+                        if ( prob > max_prob)
+                        {
+                            max_prob = prob;
+                            best_idx = j;
+                        }
+                    }
+
+                    V.at<double>(t,i) = max_prob;
+                    newpath[i] = path[best_idx] + vocabulary.at(i);
+                }
+
+                // Don't need to remember the old paths
+                path.swap(newpath);
+            }
+
+            double max_prob = 0;
+            int best_idx = 0;
+            for (int i=0; i<(int)vocabulary.size(); i++)
+            {
+                double prob = V.at<double>((int)obs.size()-1,i);
+                if ( prob > max_prob)
+                {
+                    max_prob = prob;
+                    best_idx = i;
+                }
+            }
+
+            //cout << path[best_idx] << endl;
+            if (out_sequence.size()>0) out_sequence = out_sequence+" "+path[best_idx];
+            else out_sequence = path[best_idx];
+
+            if (component_rects != NULL)
+                component_rects->push_back(words_rect[w]);
+            if (component_texts != NULL)
+                component_texts->push_back(path[best_idx]);
+            if (component_confidences != NULL)
+                component_confidences->push_back((float)max_prob);
+
+        }
+
+        return;
+    }
+
+    void run( Mat& image,
+              Mat& mask,
+              string& out_sequence,
+              vector<Rect>* component_rects,
+              vector<string>* component_texts,
+              vector<float>* component_confidences,
+              int component_level)
+    {
+
+        CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );
+        CV_Assert( mask.type() == CV_8UC1 );
+        CV_Assert( (image.cols > 0) && (image.rows > 0) );
+        CV_Assert( (image.cols == mask.cols) && (image.rows == mask.rows) );
+        CV_Assert( component_level == OCR_LEVEL_WORD );
+
+        out_sequence.clear();
+        if (component_rects != NULL)
+            component_rects->clear();
+        if (component_texts != NULL)
+            component_texts->clear();
+        if (component_confidences != NULL)
+            component_confidences->clear();
+
+        // First we split a line into words
+        vector<Mat> words_mask;
+        vector<Rect> words_rect;
+
+        /// Find contours
+        vector<vector<Point> > contours;
+        vector<Vec4i> hierarchy;
+        Mat tmp;
+        mask.copyTo(tmp);
+        findContours( tmp, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE, Point(0, 0) );
+        if (contours.size() < 6)
+        {
+            //do not split lines with less than 6 characters
+            words_mask.push_back(mask);
+            words_rect.push_back(Rect(0,0,mask.cols,mask.rows));
+        }
+        else
+        {
+
+            Mat_<float> vector_w((int)mask.cols,1);
+            reduce(mask, vector_w, 0, REDUCE_SUM, -1);
+
+            vector<int> spaces;
+            vector<int> spaces_start;
+            vector<int> spaces_end;
+            int space_count=0;
+            int last_one_idx;
+
+            int s_init = 0, s_end=vector_w.cols;
+            for (int s=0; s<vector_w.cols; s++)
+            {
+                if (vector_w.at<float>(0,s) == 0)
+                   s_init = s+1;
+                else
+                  break;
+            }
+            for (int s=vector_w.cols-1; s>=0; s--)
+            {
+                if (vector_w.at<float>(0,s) == 0)
+                   s_end = s;
+                else
+                  break;
+            }
+
+            for (int s=s_init; s<s_end; s++)
+            {
+                if (vector_w.at<float>(0,s) == 0)
+                {
+                    space_count++;
+                } else {
+                    if (space_count!=0)
+                    {
+                        spaces.push_back(space_count);
+                        spaces_start.push_back(last_one_idx);
+                        spaces_end.push_back(s-1);
+                    }
+                    space_count = 0;
+                    last_one_idx = s;
+                }
+            }
+            Scalar mean_space,std_space;
+            meanStdDev(Mat(spaces),mean_space,std_space);
+            int num_word_spaces = 0;
+            int last_word_space_end = 0;
+            for (int s=0; s<(int)spaces.size(); s++)
+            {
+                if (spaces_end.at(s)-spaces_start.at(s) > mean_space[0]+(mean_space[0]*1.1)) //this 1.1 is a param?
+                {
+                    if (num_word_spaces == 0)
+                    {
+                        //cout << " we have a word from  0  to " << spaces_start.at(s) << endl;
+                        Mat word_mask;
+                        Rect word_rect = Rect(0,0,spaces_start.at(s),mask.rows);
+                        mask(word_rect).copyTo(word_mask);
+
+                        words_mask.push_back(word_mask);
+                        words_rect.push_back(word_rect);
+                    }
+                    else
+                    {
+                        //cout << " we have a word from " << last_word_space_end << " to " << spaces_start.at(s) << endl;
+                        Mat word_mask;
+                        Rect word_rect = Rect(last_word_space_end,0,spaces_start.at(s)-last_word_space_end,mask.rows);
+                        mask(word_rect).copyTo(word_mask);
+
+                        words_mask.push_back(word_mask);
+                        words_rect.push_back(word_rect);
+                    }
+                    num_word_spaces++;
+                    last_word_space_end = spaces_end.at(s);
+                }
+            }
+            //cout << " we have a word from " << last_word_space_end << " to " << vector_w.cols << endl << endl << endl;
+            Mat word_mask;
+            Rect word_rect = Rect(last_word_space_end,0,vector_w.cols-last_word_space_end,mask.rows);
+            mask(word_rect).copyTo(word_mask);
+
+            words_mask.push_back(word_mask);
+            words_rect.push_back(word_rect);
+
+        }
+
+        for (int w=0; w<(int)words_mask.size(); w++)
+        {
+
+            vector< vector<int> > observations;
+            vector< vector<double> > confidences;
+            vector<int> obs;
+            // First find contours and sort by x coordinate of bbox
+            words_mask[w].copyTo(tmp);
+            if (tmp.empty())
+              continue;
+            contours.clear();
+            hierarchy.clear();
+            /// Find contours
+            findContours( tmp, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE, Point(0, 0) );
+            vector<Rect> contours_rect;
+            for (int i=0; i<(int)contours.size(); i++)
+            {
+                contours_rect.push_back(boundingRect(contours[i]));
+            }
+
+            sort(contours_rect.begin(), contours_rect.end(), sort_rect_horiz);
+
+            // Do character recognition foreach contour
+            for (int i=0; i<(int)contours.size(); i++)
+            {
+                vector<int> out_class;
+                vector<double> out_conf;
+                //take the center of the char rect and translate it to the real origin
+                Point char_center = Point(contours_rect.at(i).x+contours_rect.at(i).width/2,
+                                          contours_rect.at(i).y+contours_rect.at(i).height/2);
+                char_center.x += words_rect[w].x;
+                char_center.y += words_rect[w].y;
+                int win_size = max(contours_rect.at(i).width,contours_rect.at(i).height);
+                win_size += (int)(win_size*0.6); // add some pixels in the border TODO: is this a parameter for the user space?
+                Rect char_rect = Rect(char_center.x-win_size/2,char_center.y-win_size/2,win_size,win_size);
+                char_rect &= Rect(0,0,image.cols,image.rows);
+                Mat tmp_image;
+                image(char_rect).copyTo(tmp_image);
+
+                classifier->eval(tmp_image,out_class,out_conf);
+                if (!out_class.empty())
+                    obs.push_back(out_class[0]);
+                //cout << " out class = " << vocabulary[out_class[0]] << "(" << out_conf[0] << ")" << endl;
+                observations.push_back(out_class);
+                confidences.push_back(out_conf);
            }


@ -335,7 +609,8 @@ public:
            }

            //cout << path[best_idx] << endl;
-            out_sequence = out_sequence+" "+path[best_idx];
+            if (out_sequence.size()>0) out_sequence = out_sequence+" "+path[best_idx];
+            else out_sequence = path[best_idx];

            if (component_rects != NULL)
                component_rects->push_back(words_rect[w]);
@ -598,6 +873,278 @@ Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string&
    return makePtr<OCRHMMClassifierKNN>(filename);
 }

+
+
+class CV_EXPORTS OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback
+{
+public:
+    //constructor
+    OCRHMMClassifierCNN(const std::string& filename);
+    // Destructor
+    ~OCRHMMClassifierCNN() {}
+
+    void eval( InputArray image, vector<int>& out_class, vector<double>& out_confidence );
+
+protected:
+    void normalizeAndZCA(Mat& patches);
+    double eval_feature(Mat& feature, double* prob_estimates);
+
+private:
+    int nr_class;		 // number of classes
+    int nr_feature;  // number of features
+    Mat feature_min; // scale range
+    Mat feature_max;
+    Mat weights;     // Logistic Regression weights
+    Mat kernels;     // CNN kernels
+    Mat M, P;        // ZCA Whitening parameters
+    int window_size; // window size
+    int quad_size;
+    int patch_size;
+    int num_quads;   // extract 25 quads (12x12) from each image
+    int num_tiles;   // extract 25 patches (8x8) from each quad
+    double alpha;    // used in non-linear activation function z = max(0, |D*a| - alpha)
+};
+
+OCRHMMClassifierCNN::OCRHMMClassifierCNN (const string& filename)
+{
+    if (ifstream(filename.c_str()))
+    {
+        FileStorage fs(filename, FileStorage::READ);
+        // Load kernels bank and withenning params
+        fs["kernels"] >> kernels;
+        fs["M"] >> M;
+        fs["P"] >> P;
+        // Load Logistic Regression weights
+        fs["weights"] >> weights;
+        // Load feature scaling ranges
+        fs["feature_min"] >> feature_min;
+        fs["feature_max"] >> feature_max;
+        fs.release();
+    }
+    else
+        CV_Error(Error::StsBadArg, "Default classifier data file not found!");
+
+    // check all matrix dimensions match correctly and no one is empty
+    CV_Assert( (M.cols > 0) && (M.rows > 0) );
+    CV_Assert( (P.cols > 0) && (P.rows > 0) );
+    CV_Assert( (kernels.cols > 0) && (kernels.rows > 0) );
+    CV_Assert( (weights.cols > 0) && (weights.rows > 0) );
+    CV_Assert( (feature_min.cols > 0) && (feature_min.rows > 0) );
+    CV_Assert( (feature_max.cols > 0) && (feature_max.rows > 0) );
+
+    nr_feature  = weights.rows;
+    nr_class    = weights.cols;
+    patch_size  = (int)sqrt(kernels.cols);
+    // algorithm internal parameters
+    window_size = 32;
+    num_quads   = 25;
+    num_tiles   = 25;
+    quad_size   = 12;
+    alpha       = 0.5;
+}
+
+void OCRHMMClassifierCNN::eval( InputArray _src, vector<int>& out_class, vector<double>& out_confidence )
+{
+
+    CV_Assert(( _src.getMat().type() == CV_8UC3 ) || ( _src.getMat().type() == CV_8UC1 ));
+
+    out_class.clear();
+    out_confidence.clear();
+
+
+    Mat img = _src.getMat();
+    if(img.type() == CV_8UC3)
+    {
+        cvtColor(img,img,COLOR_RGB2GRAY);
+    }
+
+    // shall we resize the input image or make a copy ?
+    resize(img,img,Size(window_size,window_size));
+
+    Mat quad;
+    Mat tmp;
+
+    int patch_count = 0;
+    vector< vector<double> > data_pool(9);
+
+
+    int quad_id = 1;
+    for (int q_x=0; q_x<=window_size-quad_size; q_x=q_x+(int)(quad_size/2-1))
+    {
+        for (int q_y=0; q_y<=window_size-quad_size; q_y=q_y+(int)(quad_size/2-1))
+        {
+            Rect quad_rect = Rect(q_x,q_y,quad_size,quad_size);
+            quad = img(quad_rect);
+
+            //start sliding window (8x8) in each tile and store the patch as row in data_pool
+            for (int w_x=0; w_x<=quad_size-patch_size; w_x++)
+            {
+                for (int w_y=0; w_y<=quad_size-patch_size; w_y++)
+                {
+                    quad(Rect(w_x,w_y,patch_size,patch_size)).copyTo(tmp);
+                    tmp = tmp.reshape(0,1);
+                    tmp.convertTo(tmp, CV_64F);
+                    normalizeAndZCA(tmp);
+                    vector<double> patch;
+                    tmp.copyTo(patch);
+                    if ((quad_id == 1)||(quad_id == 2)||(quad_id == 6)||(quad_id == 7))
+                        data_pool[0].insert(data_pool[0].end(),patch.begin(),patch.end());
+                    if ((quad_id == 2)||(quad_id == 7)||(quad_id == 3)||(quad_id == 8)||(quad_id == 4)||(quad_id == 9))
+                        data_pool[1].insert(data_pool[1].end(),patch.begin(),patch.end());
+                    if ((quad_id == 4)||(quad_id == 9)||(quad_id == 5)||(quad_id == 10))
+                        data_pool[2].insert(data_pool[2].end(),patch.begin(),patch.end());
+                    if ((quad_id == 6)||(quad_id == 11)||(quad_id == 16)||(quad_id == 7)||(quad_id == 12)||(quad_id == 17))
+                        data_pool[3].insert(data_pool[3].end(),patch.begin(),patch.end());
+                    if ((quad_id == 7)||(quad_id == 12)||(quad_id == 17)||(quad_id == 8)||(quad_id == 13)||(quad_id == 18)||(quad_id == 9)||(quad_id == 14)||(quad_id == 19))
+                        data_pool[4].insert(data_pool[4].end(),patch.begin(),patch.end());
+                    if ((quad_id == 9)||(quad_id == 14)||(quad_id == 19)||(quad_id == 10)||(quad_id == 15)||(quad_id == 20))
+                        data_pool[5].insert(data_pool[5].end(),patch.begin(),patch.end());
+                    if ((quad_id == 16)||(quad_id == 21)||(quad_id == 17)||(quad_id == 22))
+                        data_pool[6].insert(data_pool[6].end(),patch.begin(),patch.end());
+                    if ((quad_id == 17)||(quad_id == 22)||(quad_id == 18)||(quad_id == 23)||(quad_id == 19)||(quad_id == 24))
+                        data_pool[7].insert(data_pool[7].end(),patch.begin(),patch.end());
+                    if ((quad_id == 19)||(quad_id == 24)||(quad_id == 20)||(quad_id == 25))
+                        data_pool[8].insert(data_pool[8].end(),patch.begin(),patch.end());
+                    patch_count++;
+                }
+            }
+
+            quad_id++;
+        }
+    }
+
+    //do dot product of each normalized and whitened patch
+    //each pool is averaged and this yields a representation of 9xD
+    Mat feature = Mat::zeros(9,kernels.rows,CV_64FC1);
+    for (int i=0; i<9; i++)
+    {
+        Mat pool = Mat(data_pool[i]);
+        pool = pool.reshape(0,(int)data_pool[i].size()/kernels.cols);
+        for (int p=0; p<pool.rows; p++)
+        {
+            for (int f=0; f<kernels.rows; f++)
+            {
+                feature.row(i).at<double>(0,f) = feature.row(i).at<double>(0,f) + max(0.0,std::abs(pool.row(p).dot(kernels.row(f)))-alpha);
+            }
+        }
+    }
+    feature = feature.reshape(0,1);
+
+
+    // data must be normalized within the range obtained during training
+    double lower = -1.0;
+    double upper =  1.0;
+    for (int k=0; k<feature.cols; k++)
+    {
+        feature.at<double>(0,k) = lower + (upper-lower) *
+                (feature.at<double>(0,k)-feature_min.at<double>(0,k))/
+                (feature_max.at<double>(0,k)-feature_min.at<double>(0,k));
+    }
+
+    double *p = new double[nr_class];
+    double predict_label = eval_feature(feature,p);
+    //cout << " Prediction: " << vocabulary[predict_label] << " with probability " << p[0] << endl;
+    if (predict_label < 0)
+        CV_Error(Error::StsInternal, "OCRHMMClassifierCNN::eval Error: unexpected prediction in eval_feature()");
+
+    out_class.push_back((int)predict_label);
+    out_confidence.push_back(p[(int)predict_label]);
+
+    for (int i = 0; i<nr_class; i++)
+    {
+      if ( (i != (int)predict_label) && (p[i] != 0.) )
+      {
+        out_class.push_back(i);
+        out_confidence.push_back(p[i]);
+      }
+    }
+
+
+}
+
+// normalize for contrast and apply ZCA whitening to a set of image patches
+void OCRHMMClassifierCNN::normalizeAndZCA(Mat& patches)
+{
+
+    //Normalize for contrast
+    for (int i=0; i<patches.rows; i++)
+    {
+        Scalar row_mean, row_std;
+        meanStdDev(patches.row(i),row_mean,row_std);
+        row_std[0] = sqrt(pow(row_std[0],2)*patches.cols/(patches.cols-1)+10);
+        patches.row(i) = (patches.row(i) - row_mean[0]) / row_std[0];
+    }
+
+
+    //ZCA whitening
+    if ((M.dims == 0) || (P.dims == 0))
+    {
+        Mat CC;
+        calcCovarMatrix(patches,CC,M,COVAR_NORMAL|COVAR_ROWS|COVAR_SCALE);
+        CC = CC * patches.rows / (patches.rows-1);
+
+
+        Mat e_val,e_vec;
+        eigen(CC.t(),e_val,e_vec);
+        e_vec = e_vec.t();
+        sqrt(1./(e_val + 0.1), e_val);
+
+
+        Mat V = Mat::zeros(e_vec.rows, e_vec.cols, CV_64FC1);
+        Mat D = Mat::eye(e_vec.rows, e_vec.cols, CV_64FC1);
+
+        for (int i=0; i<e_vec.cols; i++)
+        {
+            e_vec.col(e_vec.cols-i-1).copyTo(V.col(i));
+            D.col(i) = D.col(i) * e_val.at<double>(0,e_val.rows-i-1);
+        }
+
+        P = V * D * V.t();
+    }
+
+    for (int i=0; i<patches.rows; i++)
+        patches.row(i) = patches.row(i) - M;
+
+    patches = patches * P;
+
+}
+
+double OCRHMMClassifierCNN::eval_feature(Mat& feature, double* prob_estimates)
+{
+    for(int i=0;i<nr_class;i++)
+        prob_estimates[i] = 0;
+
+    for(int idx=0; idx<nr_feature; idx++)
+        for(int i=0;i<nr_class;i++)
+            prob_estimates[i] += weights.at<float>(idx,i)*feature.at<double>(0,idx); //TODO use vectorized dot product
+
+    int dec_max_idx = 0;
+    for(int i=1;i<nr_class;i++)
+    {
+        if(prob_estimates[i] > prob_estimates[dec_max_idx])
+            dec_max_idx = i;
+    }
+
+    for(int i=0;i<nr_class;i++)
+        prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
+
+    double sum=0;
+    for(int i=0; i<nr_class; i++)
+        sum+=prob_estimates[i];
+
+    for(int i=0; i<nr_class; i++)
+        prob_estimates[i]=prob_estimates[i]/sum;
+
+    return dec_max_idx;
+}
+
+
+Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename)
+
+{
+    return makePtr<OCRHMMClassifierCNN>(filename);
+}
+
 /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).

@param vocabulary The language vocabulary (chars when ascii english text).
--- a/modules/text/src/ocr_tesseract.cpp
+++ b/modules/text/src/ocr_tesseract.cpp
@ -70,6 +70,22 @@ void OCRTesseract::run(Mat& image, string& output_text, vector<Rect>* component_
        component_confidences->clear();
 }

+void OCRTesseract::run(Mat& image, Mat& mask, string& output_text, vector<Rect>* component_rects,
+                       vector<string>* component_texts, vector<float>* component_confidences,
+                       int component_level)
+{
+    CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );
+    CV_Assert( mask.type() == CV_8UC1 );
+    CV_Assert( (component_level == OCR_LEVEL_TEXTLINE) || (component_level == OCR_LEVEL_WORD) );
+    output_text.clear();
+    if (component_rects != NULL)
+        component_rects->clear();
+    if (component_texts != NULL)
+        component_texts->clear();
+    if (component_confidences != NULL)
+        component_confidences->clear();
+}
+
 class OCRTesseractImpl : public OCRTesseract
 {
 private:
@ -189,6 +205,16 @@ public:
 #endif
    }

+    void run(Mat& image, Mat& mask, string& output, vector<Rect>* component_rects=NULL,
+             vector<string>* component_texts=NULL, vector<float>* component_confidences=NULL,
+             int component_level=0)
+    {
+        CV_Assert( mask.type() == CV_8UC1 );
+        CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );
+
+        run( mask, output, component_rects, component_texts, component_confidences, component_level);
+    }
+

 };

--- a/modules/ximgproc/include/opencv2/ximgproc/disparity_filter.hpp
+++ b/modules/ximgproc/include/opencv2/ximgproc/disparity_filter.hpp
@ -39,6 +39,7 @@
 #ifdef __cplusplus

 #include <opencv2/core.hpp>
+#include <opencv2/calib3d.hpp>

 namespace cv {
 namespace ximgproc {
@ -63,15 +64,15 @@ public:

    @param filtered_disparity_map output disparity map.

-    @param ROI region of the disparity map to filter.
-
    @param disparity_map_right optional argument, some implementations might also use the disparity map
    of the right view to compute confidence maps, for instance.

+    @param ROI region of the disparity map to filter. Optional, usually it should be set automatically.
+
    @param right_view optional argument, some implementations might also use the right view of the original
    stereo-pair.
     */
-    CV_WRAP virtual void filter(InputArray disparity_map_left, InputArray left_view, OutputArray filtered_disparity_map, Rect ROI, InputArray disparity_map_right = Mat(), InputArray right_view = Mat()) = 0;
+    CV_WRAP virtual void filter(InputArray disparity_map_left, InputArray left_view, OutputArray filtered_disparity_map, InputArray disparity_map_right = Mat(), Rect ROI = Rect(), InputArray right_view = Mat()) = 0;
 };

 /** @brief Disparity map filter based on Weighted Least Squares filter (in form of Fast Global Smoother that
@ -106,8 +107,7 @@ public:
    /** @see getLRCthresh */
    CV_WRAP virtual void setLRCthresh(int _LRC_thresh) = 0;
    /** @brief DepthDiscontinuityRadius is a parameter used in confidence computation. It defines the size of
-    low-confidence regions around depth discontinuities. For typical window sizes used in stereo matching the
-    optimal value is around 5.
+    low-confidence regions around depth discontinuities.
     */
    CV_WRAP virtual int getDepthDiscontinuityRadius() = 0;
    /** @see getDepthDiscontinuityRadius */
@ -117,16 +117,36 @@ public:
    correct disparity values with a high degree of confidence).
     */
    CV_WRAP virtual Mat getConfidenceMap() = 0;
-
+    /** @brief Get the ROI used in the last filter call
+     */
+    CV_WRAP virtual Rect getROI() = 0;
 };

-/** @brief Factory method, create instance of DisparityWLSFilter and execute the initialization routines.
+/** @brief Convenience factory method that creates an instance of DisparityWLSFilter and sets up all the relevant
+filter parameters automatically based on the matcher instance. Currently supports only StereoBM and StereoSGBM.
+
+@param matcher_left stereo matcher instance that will be used with the filter
+*/
+CV_EXPORTS_W
+Ptr<DisparityWLSFilter> createDisparityWLSFilter(Ptr<StereoMatcher> matcher_left);
+
+/** @brief Convenience method to set up the matcher for computing the right-view disparity map
+that is required in case of filtering with confidence.
+
+@param matcher_left main stereo matcher instance that will be used with the filter
+*/
+CV_EXPORTS_W
+Ptr<StereoMatcher> createRightMatcher(Ptr<StereoMatcher> matcher_left);
+
+/** @brief More generic factory method, create instance of DisparityWLSFilter and execute basic
+initialization routines. When using this method you will need to set-up the ROI, matchers and
+other parameters by yourself.

@param use_confidence filtering with confidence requires two disparity maps (for the left and right views) and is
 approximately two times slower. However, quality is typically significantly better.
 */
 CV_EXPORTS_W
-Ptr<DisparityWLSFilter> createDisparityWLSFilter(bool use_confidence);
+Ptr<DisparityWLSFilter> createDisparityWLSFilterGeneric(bool use_confidence);

 //////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////
--- a/modules/ximgproc/perf/perf_disparity_wls_filter.cpp
+++ b/modules/ximgproc/perf/perf_disparity_wls_filter.cpp
@ -85,8 +85,8 @@ PERF_TEST_P( DisparityWLSFilterPerfTest, perf, Combine(GuideTypes::all(), SrcTyp
    cv::setNumThreads(cv::getNumberOfCPUs());
    TEST_CYCLE_N(10)
    {
-        Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(use_conf);
-        wls_filter->filter(disp_left,guide,dst,ROI,disp_right);
+        Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilterGeneric(use_conf);
+        wls_filter->filter(disp_left,guide,dst,disp_right,ROI);
    }

    SANITY_CHECK(dst);
--- a/modules/ximgproc/samples/disparity_filtering.cpp
+++ b/modules/ximgproc/samples/disparity_filtering.cpp
@ -23,13 +23,13 @@ const String keys =
    "{algorithm      |bm                | stereo matching method (bm or sgbm)                               }"
    "{filter         |wls_conf          | used post-filtering (wls_conf or wls_no_conf)                     }"
    "{no-display     |                  | don't display results                                             }"
-    "{no-downscale   |                  | prevent stereo matching on downscaled views                       }"
+    "{no-downscale   |                  | force stereo matching on full-sized views to improve quality      }"
    "{dst_conf_path  |None              | optional path to save the confidence map used in filtering        }"
    "{vis_mult       |1.0               | coefficient used to scale disparity map visualizations            }"
    "{max_disparity  |160               | parameter of stereo matching                                      }"
-    "{window_size    |19                | parameter of stereo matching                                      }"
+    "{window_size    |-1                | parameter of stereo matching                                      }"
    "{wls_lambda     |8000.0            | parameter of post-filtering                                       }"
-    "{wls_sigma      |1.0               | parameter of post-filtering                                       }"
+    "{wls_sigma      |1.5               | parameter of post-filtering                                       }"
    ;

 int main(int argc, char** argv)
@ -54,17 +54,30 @@ int main(int argc, char** argv)
    bool no_display = parser.has("no-display");
    bool no_downscale = parser.has("no-downscale");
    int max_disp = parser.get<int>("max_disparity");
-    int wsize = parser.get<int>("window_size");
    double lambda = parser.get<double>("wls_lambda");
    double sigma  = parser.get<double>("wls_sigma");
    double vis_mult = parser.get<double>("vis_mult");

+    int wsize;
+    if(parser.get<int>("window_size")>=0) //user provided window_size value
+        wsize = parser.get<int>("window_size");
+    else
+    {
+        if(algo=="sgbm")
+            wsize = 3; //default window size for SGBM
+        else if(!no_downscale && algo=="bm" && filter=="wls_conf")
+            wsize = 7; //default window size for BM on downscaled views (downscaling is performed only for wls_conf)
+        else
+            wsize = 15; //default window size for BM on full-sized views
+    }
+
    if (!parser.check())
    {
        parser.printErrors();
        return -1;
    }

+    //! [load_views]
    Mat left  = imread(left_im ,IMREAD_COLOR);
    if ( left.empty() )
    {
@ -78,6 +91,7 @@ int main(int argc, char** argv)
        cout<<"Cannot read image file: "<<right_im;
        return -1;
    }
+    //! [load_views]

    bool noGT;
    Mat GT_disp;
@ -99,6 +113,7 @@ int main(int argc, char** argv)
    Mat conf_map = Mat(left.rows,left.cols,CV_8U);
    conf_map = Scalar(255);
    Rect ROI;
+    Ptr<DisparityWLSFilter> wls_filter;
    double matching_time, filtering_time;
    if(max_disp<=0 || max_disp%16!=0)
    {
@ -110,17 +125,19 @@ int main(int argc, char** argv)
        cout<<"Incorrect window_size value: it should be positive and odd";
        return -1;
    }
-    if(filter=="wls_conf")
+    if(filter=="wls_conf") // filtering with confidence (significantly better quality than wls_no_conf)
    {
        if(!no_downscale)
        {
-            wsize = wsize/2;
-            if(wsize%2==0) wsize++;
+            // downscale the views to speed-up the matching stage, as we will need to compute both left
+            // and right disparity maps for confidence map computation
+            //! [downscale]
            max_disp/=2;
            if(max_disp%16!=0)
                max_disp += 16-(max_disp%16);
            resize(left ,left_for_matcher ,Size(),0.5,0.5);
            resize(right,right_for_matcher,Size(),0.5,0.5);
+            //! [downscale]
        }
        else
        {
@ -128,38 +145,31 @@ int main(int argc, char** argv)
            right_for_matcher = right.clone();
        }

-
        if(algo=="bm")
        {
+            //! [matching]
            Ptr<StereoBM> left_matcher = StereoBM::create(max_disp,wsize);
-            left_matcher->setMinDisparity(0);
-            Ptr<StereoBM> right_matcher = StereoBM::create(max_disp,wsize);
-            right_matcher->setMinDisparity(-max_disp+1);
-            left_matcher->setTextureThreshold(0);
-            left_matcher->setUniquenessRatio(0);
-            right_matcher->setTextureThreshold(0);
-            right_matcher->setUniquenessRatio(0);
+            wls_filter = createDisparityWLSFilter(left_matcher);
+            Ptr<StereoMatcher> right_matcher = createRightMatcher(left_matcher);
+
            cvtColor(left_for_matcher,  left_for_matcher,  COLOR_BGR2GRAY);
            cvtColor(right_for_matcher, right_for_matcher, COLOR_BGR2GRAY);
-            ROI = computeROI(left_for_matcher.size(),left_matcher);

            matching_time = (double)getTickCount();
            left_matcher-> compute(left_for_matcher, right_for_matcher,left_disp);
            right_matcher->compute(right_for_matcher,left_for_matcher, right_disp);
            matching_time = ((double)getTickCount() - matching_time)/getTickFrequency();
+            //! [matching]
        }
        else if(algo=="sgbm")
        {
            Ptr<StereoSGBM> left_matcher  = StereoSGBM::create(0,max_disp,wsize);
-            left_matcher->setMinDisparity(0);
-            Ptr<StereoSGBM> right_matcher = StereoSGBM::create(-max_disp+1,max_disp,wsize);
-            left_matcher->setUniquenessRatio(0);
-            left_matcher->setDisp12MaxDiff(1000000);
-            left_matcher->setSpeckleWindowSize(0);
-            right_matcher->setUniquenessRatio(0);
-            right_matcher->setDisp12MaxDiff(1000000);
-            right_matcher->setSpeckleWindowSize(0);
-            ROI = computeROI(left_for_matcher.size(),left_matcher);
+            left_matcher->setP1(24*wsize*wsize);
+            left_matcher->setP2(96*wsize*wsize);
+            left_matcher->setPreFilterCap(63);
+            left_matcher->setMode(StereoSGBM::MODE_SGBM_3WAY);
+            wls_filter = createDisparityWLSFilter(left_matcher);
+            Ptr<StereoMatcher> right_matcher = createRightMatcher(left_matcher);

            matching_time = (double)getTickCount();
            left_matcher-> compute(left_for_matcher, right_for_matcher,left_disp);
@ -172,14 +182,17 @@ int main(int argc, char** argv)
            return -1;
        }

-        Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(true);
+        //! [filtering]
        wls_filter->setLambda(lambda);
        wls_filter->setSigmaColor(sigma);
        filtering_time = (double)getTickCount();
-        wls_filter->filter(left_disp,left,filtered_disp,ROI,right_disp);
+        wls_filter->filter(left_disp,left,filtered_disp,right_disp);
        filtering_time = ((double)getTickCount() - filtering_time)/getTickFrequency();
+        //! [filtering]
        conf_map = wls_filter->getConfidenceMap();

+        // Get the ROI that was used in the last filter call:
+        ROI = wls_filter->getROI();
        if(!no_downscale)
        {
            // upscale raw disparity and ROI back for a proper comparison:
@ -190,6 +203,9 @@ int main(int argc, char** argv)
    }
    else if(filter=="wls_no_conf")
    {
+        /* There is no convenience function for the case of filtering with no confidence, so we
+        will need to set the ROI and matcher parameters manually */
+
        left_for_matcher  = left.clone();
        right_for_matcher = right.clone();

@ -201,6 +217,8 @@ int main(int argc, char** argv)
            cvtColor(left_for_matcher,  left_for_matcher, COLOR_BGR2GRAY);
            cvtColor(right_for_matcher, right_for_matcher, COLOR_BGR2GRAY);
            ROI = computeROI(left_for_matcher.size(),matcher);
+            wls_filter = createDisparityWLSFilterGeneric(false);
+            wls_filter->setDepthDiscontinuityRadius((int)ceil(0.33*wsize));

            matching_time = (double)getTickCount();
            matcher->compute(left_for_matcher,right_for_matcher,left_disp);
@ -212,7 +230,12 @@ int main(int argc, char** argv)
            matcher->setUniquenessRatio(0);
            matcher->setDisp12MaxDiff(1000000);
            matcher->setSpeckleWindowSize(0);
+            matcher->setP1(24*wsize*wsize);
+            matcher->setP2(96*wsize*wsize);
+            matcher->setMode(StereoSGBM::MODE_SGBM_3WAY);
            ROI = computeROI(left_for_matcher.size(),matcher);
+            wls_filter = createDisparityWLSFilterGeneric(false);
+            wls_filter->setDepthDiscontinuityRadius((int)ceil(0.5*wsize));

            matching_time = (double)getTickCount();
            matcher->compute(left_for_matcher,right_for_matcher,left_disp);
@ -224,11 +247,10 @@ int main(int argc, char** argv)
            return -1;
        }

-        Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(false);
        wls_filter->setLambda(lambda);
        wls_filter->setSigmaColor(sigma);
        filtering_time = (double)getTickCount();
-        wls_filter->filter(left_disp,left,filtered_disp,ROI);
+        wls_filter->filter(left_disp,left,filtered_disp,Mat(),ROI);
        filtering_time = ((double)getTickCount() - filtering_time)/getTickFrequency();
    }
    else
@ -292,6 +314,7 @@ int main(int argc, char** argv)
            imshow("ground-truth disparity", GT_disp_vis);
        }

+        //! [visualization]
        Mat raw_disp_vis;
        getDisparityVis(left_disp,raw_disp_vis,vis_mult);
        namedWindow("raw disparity", WINDOW_AUTOSIZE);
@ -301,6 +324,7 @@ int main(int argc, char** argv)
        namedWindow("filtered disparity", WINDOW_AUTOSIZE);
        imshow("filtered disparity", filtered_disp_vis);
        waitKey();
+        //! [visualization]
    }

    return 0;
--- a/modules/ximgproc/src/disparity_filters.cpp
+++ b/modules/ximgproc/src/disparity_filters.cpp
@ -49,17 +49,22 @@ using std::vector;
 class DisparityWLSFilterImpl : public DisparityWLSFilter
 {
 protected:
-    double lambda,sigma_color;
+    int left_offset, right_offset, top_offset, bottom_offset;
+    Rect valid_disp_ROI;
+    Rect right_view_valid_disp_ROI;
+    int min_disp;
    bool use_confidence;
    Mat confidence_map;
+
+    double lambda,sigma_color;
    int LRC_thresh,depth_discontinuity_radius;
    float depth_discontinuity_roll_off_factor;
    float resize_factor;
    int num_stripes;

-    void init(double _lambda, double _sigma_color, bool _use_confidence);
-    void computeDepthDiscontinuityMaps(Mat& left_disp, Mat& right_disp, Mat& left_dst, Mat& right_dst, Rect ROI);
-    void computeConfidenceMap(InputArray left_disp, InputArray right_disp, Rect ROI);
+    void init(double _lambda, double _sigma_color, bool _use_confidence, int l_offs, int r_offs, int t_offs, int b_offs, int _min_disp);
+    void computeDepthDiscontinuityMaps(Mat& left_disp, Mat& right_disp, Mat& left_dst, Mat& right_dst);
+    void computeConfidenceMap(InputArray left_disp, InputArray right_disp);

 protected:
    struct ComputeDiscontinuityAwareLRC_ParBody : public ParallelLoopBody
@ -99,13 +104,13 @@ protected:

    void boxFilterOp(Mat& src,Mat& dst)
    {
-        int rad = (int)ceil(resize_factor*depth_discontinuity_radius);
+        int rad = depth_discontinuity_radius;
        boxFilter(src,dst,CV_32F,Size(2*rad+1,2*rad+1),Point(-1,-1));
    }

    void sqrBoxFilterOp(Mat& src,Mat& dst)
    {
-        int rad = (int)ceil(resize_factor*depth_discontinuity_radius);
+        int rad = depth_discontinuity_radius;
        sqrBoxFilter(src,dst,CV_32F,Size(2*rad+1,2*rad+1),Point(-1,-1));
    }

@ -115,22 +120,33 @@ protected:
    }

 public:
-    static Ptr<DisparityWLSFilterImpl> create(bool _use_confidence);
-    void filter(InputArray disparity_map_left, InputArray left_view, OutputArray filtered_disparity_map, Rect ROI, InputArray disparity_map_right, InputArray);
+    static Ptr<DisparityWLSFilterImpl> create(bool _use_confidence, int l_offs, int r_offs, int t_offs, int b_offs, int min_disp);
+    void filter(InputArray disparity_map_left, InputArray left_view, OutputArray filtered_disparity_map, InputArray disparity_map_right, Rect ROI, InputArray);

    double getLambda() {return lambda;}
    void setLambda(double _lambda) {lambda = _lambda;}
+
    double getSigmaColor() {return sigma_color;}
    void setSigmaColor(double _sigma_color) {sigma_color = _sigma_color;}
-    Mat getConfidenceMap() {return confidence_map;}
+
    int getLRCthresh() {return LRC_thresh;}
    void setLRCthresh(int _LRC_thresh) {LRC_thresh = _LRC_thresh;}
+
    int getDepthDiscontinuityRadius() {return depth_discontinuity_radius;}
    void setDepthDiscontinuityRadius(int _disc_radius) {depth_discontinuity_radius = _disc_radius;}
+
+    Mat getConfidenceMap() {return confidence_map;}
+    Rect getROI() {return valid_disp_ROI;}
 };

-void DisparityWLSFilterImpl::init(double _lambda, double _sigma_color, bool _use_confidence)
+void DisparityWLSFilterImpl::init(double _lambda, double _sigma_color, bool _use_confidence,  int l_offs, int r_offs, int t_offs, int b_offs, int _min_disp)
 {
+    left_offset = l_offs; right_offset  = r_offs;
+    top_offset  = t_offs; bottom_offset = b_offs;
+    min_disp = _min_disp;
+    valid_disp_ROI = Rect();
+    right_view_valid_disp_ROI = Rect();
+    min_disp=0;
    lambda = _lambda;
    sigma_color = _sigma_color;
    use_confidence = _use_confidence;
@ -142,11 +158,10 @@ void DisparityWLSFilterImpl::init(double _lambda, double _sigma_color, bool _use
    num_stripes = getNumThreads();
 }

-void DisparityWLSFilterImpl::computeDepthDiscontinuityMaps(Mat& left_disp, Mat& right_disp, Mat& left_dst, Mat& right_dst, Rect ROI)
+void DisparityWLSFilterImpl::computeDepthDiscontinuityMaps(Mat& left_disp, Mat& right_disp, Mat& left_dst, Mat& right_dst)
 {
-    Rect right_ROI(left_disp.cols-(ROI.x+ROI.width),ROI.y,ROI.width,ROI.height);
-    Mat left_disp_ROI (left_disp, ROI);
-    Mat right_disp_ROI(right_disp,right_ROI);
+    Mat left_disp_ROI (left_disp, valid_disp_ROI);
+    Mat right_disp_ROI(right_disp,right_view_valid_disp_ROI);
    Mat ldisp,rdisp,ldisp_squared,rdisp_squared;

    {
@ -171,36 +186,37 @@ void DisparityWLSFilterImpl::computeDepthDiscontinuityMaps(Mat& left_disp, Mat&

    left_dst  = Mat::zeros(left_disp.rows,left_disp.cols,CV_32F);
    right_dst = Mat::zeros(right_disp.rows,right_disp.cols,CV_32F);
-    Mat left_dst_ROI (left_dst,ROI);
-    Mat right_dst_ROI(right_dst,right_ROI);
+    Mat left_dst_ROI (left_dst,valid_disp_ROI);
+    Mat right_dst_ROI(right_dst,right_view_valid_disp_ROI);

    parallel_for_(Range(0,num_stripes),ComputeDepthDisc_ParBody(*this,ldisp,ldisp_squared,left_dst_ROI ,num_stripes));
    parallel_for_(Range(0,num_stripes),ComputeDepthDisc_ParBody(*this,rdisp,rdisp_squared,right_dst_ROI,num_stripes));
 }


-void DisparityWLSFilterImpl::computeConfidenceMap(InputArray left_disp, InputArray right_disp, Rect ROI)
+void DisparityWLSFilterImpl::computeConfidenceMap(InputArray left_disp, InputArray right_disp)
 {
    Mat ldisp = left_disp.getMat();
    Mat rdisp = right_disp.getMat();
    Mat depth_discontinuity_map_left,depth_discontinuity_map_right;
-    computeDepthDiscontinuityMaps(ldisp,rdisp,depth_discontinuity_map_left,depth_discontinuity_map_right,ROI);
+    right_view_valid_disp_ROI = Rect(ldisp.cols-(valid_disp_ROI.x+valid_disp_ROI.width),valid_disp_ROI.y,
+                                     valid_disp_ROI.width,valid_disp_ROI.height);
+    computeDepthDiscontinuityMaps(ldisp,rdisp,depth_discontinuity_map_left,depth_discontinuity_map_right);

-    Rect right_ROI(ldisp.cols-(ROI.x+ROI.width),ROI.y,ROI.width,ROI.height);
    confidence_map = depth_discontinuity_map_left;

-    parallel_for_(Range(0,num_stripes),ComputeDiscontinuityAwareLRC_ParBody(*this,ldisp,rdisp, depth_discontinuity_map_left,depth_discontinuity_map_right,confidence_map,ROI,right_ROI,num_stripes));
+    parallel_for_(Range(0,num_stripes),ComputeDiscontinuityAwareLRC_ParBody(*this,ldisp,rdisp, depth_discontinuity_map_left,depth_discontinuity_map_right,confidence_map,valid_disp_ROI,right_view_valid_disp_ROI,num_stripes));
    confidence_map = 255.0f*confidence_map;
 }

-Ptr<DisparityWLSFilterImpl> DisparityWLSFilterImpl::create(bool _use_confidence)
+Ptr<DisparityWLSFilterImpl> DisparityWLSFilterImpl::create(bool _use_confidence, int l_offs=0, int r_offs=0, int t_offs=0, int b_offs=0, int min_disp=0)
 {
    DisparityWLSFilterImpl *wls = new DisparityWLSFilterImpl();
-    wls->init(8000.0,1.0,_use_confidence);
+    wls->init(8000.0,1.0,_use_confidence,l_offs, r_offs, t_offs, b_offs, min_disp);
    return Ptr<DisparityWLSFilterImpl>(wls);
 }

-void DisparityWLSFilterImpl::filter(InputArray disparity_map_left, InputArray left_view, OutputArray filtered_disparity_map, Rect ROI, InputArray disparity_map_right, InputArray)
+void DisparityWLSFilterImpl::filter(InputArray disparity_map_left, InputArray left_view, OutputArray filtered_disparity_map, InputArray disparity_map_right, Rect ROI, InputArray)
 {
    CV_Assert( !disparity_map_left.empty() && (disparity_map_left.depth() == CV_16S) && (disparity_map_left.channels() == 1) );
    CV_Assert( !left_view.empty() && (left_view.depth() == CV_8U) && (left_view.channels() == 3 || left_view.channels() == 1) );
@ -209,6 +225,12 @@ void DisparityWLSFilterImpl::filter(InputArray disparity_map_left, InputArray le
        resize_factor = disparity_map_left.cols()/(float)left_view.cols();
    else
        resize_factor = 1.0;
+    if(ROI.area()!=0) /* user provided a ROI */
+        valid_disp_ROI = ROI;
+    else
+        valid_disp_ROI = Rect(left_offset,top_offset,
+                              disparity_map_left.cols()-left_offset-right_offset,
+                              disparity_map_left.rows()-top_offset-bottom_offset);

    if(!use_confidence)
    {
@ -220,13 +242,16 @@ void DisparityWLSFilterImpl::filter(InputArray disparity_map_left, InputArray le
            float y_ratio = src_full_size.rows/(float)disp_full_size.rows;
            resize(disp_full_size,disp_full_size,src_full_size.size());
            disp_full_size = disp_full_size*x_ratio;
-            ROI = Rect((int)(ROI.x*x_ratio),(int)(ROI.y*y_ratio),(int)(ROI.width*x_ratio),(int)(ROI.height*y_ratio));
+            ROI = Rect((int)(valid_disp_ROI.x*x_ratio),    (int)(valid_disp_ROI.y*y_ratio),
+                       (int)(valid_disp_ROI.width*x_ratio),(int)(valid_disp_ROI.height*y_ratio));
        }
+        else
+            ROI = valid_disp_ROI;
        disp = Mat(disp_full_size,ROI);
        src  = Mat(src_full_size ,ROI);
        filtered_disparity_map.create(disp_full_size.size(), disp_full_size.type());
        Mat& dst_full_size = filtered_disparity_map.getMatRef();
-        dst_full_size = Scalar(-16);
+        dst_full_size = Scalar(16*(min_disp-1));
        dst = Mat(dst_full_size,ROI);
        Mat filtered_disp;
        fastGlobalSmootherFilter(src,disp,filtered_disp,lambda,sigma_color);
@ -237,7 +262,7 @@ void DisparityWLSFilterImpl::filter(InputArray disparity_map_left, InputArray le
        CV_Assert( !disparity_map_right.empty() && (disparity_map_right.depth() == CV_16S) && (disparity_map_right.channels() == 1) );
        CV_Assert( (disparity_map_left.cols() == disparity_map_right.cols()) );
        CV_Assert( (disparity_map_left.rows() == disparity_map_right.rows()) );
-        computeConfidenceMap(disparity_map_left,disparity_map_right,ROI);
+        computeConfidenceMap(disparity_map_left,disparity_map_right);
        Mat disp_full_size = disparity_map_left.getMat();
        Mat src_full_size = left_view.getMat();
        if(disp_full_size.size!=src_full_size.size)
@ -247,13 +272,16 @@ void DisparityWLSFilterImpl::filter(InputArray disparity_map_left, InputArray le
            resize(disp_full_size,disp_full_size,src_full_size.size());
            disp_full_size = disp_full_size*x_ratio;
            resize(confidence_map,confidence_map,src_full_size.size());
-            ROI = Rect((int)(ROI.x*x_ratio),(int)(ROI.y*y_ratio),(int)(ROI.width*x_ratio),(int)(ROI.height*y_ratio));
+            ROI = Rect((int)(valid_disp_ROI.x*x_ratio),    (int)(valid_disp_ROI.y*y_ratio),
+                       (int)(valid_disp_ROI.width*x_ratio),(int)(valid_disp_ROI.height*y_ratio));
        }
+        else
+            ROI = valid_disp_ROI;
        disp = Mat(disp_full_size,ROI);
        src  = Mat(src_full_size ,ROI);
        filtered_disparity_map.create(disp_full_size.size(), disp_full_size.type());
        Mat& dst_full_size = filtered_disparity_map.getMatRef();
-        dst_full_size = Scalar(-16);
+        dst_full_size = Scalar(16*(min_disp-1));
        dst = Mat(dst_full_size,ROI);
        Mat conf(confidence_map,ROI);

@ -355,7 +383,73 @@ void DisparityWLSFilterImpl::ParallelMatOp_ParBody::operator() (const Range& ran
 }

 CV_EXPORTS_W
-Ptr<DisparityWLSFilter> createDisparityWLSFilter(bool use_confidence)
+Ptr<DisparityWLSFilter> createDisparityWLSFilter(Ptr<StereoMatcher> matcher_left)
+{
+    Ptr<DisparityWLSFilter> wls;
+    matcher_left->setDisp12MaxDiff(1000000);
+    matcher_left->setSpeckleWindowSize(0);
+
+    int min_disp = matcher_left->getMinDisparity();
+    int num_disp = matcher_left->getNumDisparities();
+    int wsize    = matcher_left->getBlockSize();
+    int wsize2   = wsize/2;
+
+    if(Ptr<StereoBM> bm = matcher_left.dynamicCast<StereoBM>())
+    {
+        bm->setTextureThreshold(0);
+        bm->setUniquenessRatio(0);
+        wls = DisparityWLSFilterImpl::create(true,max(0,min_disp+num_disp)+wsize2,max(0,-min_disp)+wsize2,wsize2,wsize2,min_disp);
+        wls->setDepthDiscontinuityRadius((int)ceil(0.33*wsize));
+    }
+    else if(Ptr<StereoSGBM> sgbm = matcher_left.dynamicCast<StereoSGBM>())
+    {
+        sgbm->setUniquenessRatio(0);
+        wls = DisparityWLSFilterImpl::create(true,max(0,min_disp+num_disp),max(0,-min_disp),0,0,min_disp);
+        wls->setDepthDiscontinuityRadius((int)ceil(0.5*wsize));
+    }
+    else
+        CV_Error(Error::StsBadArg, "DisparityWLSFilter natively supports only StereoBM and StereoSGBM");
+
+    return wls;
+}
+
+CV_EXPORTS_W
+Ptr<StereoMatcher> createRightMatcher(Ptr<StereoMatcher> matcher_left)
+{
+    int min_disp = matcher_left->getMinDisparity();
+    int num_disp = matcher_left->getNumDisparities();
+    int wsize    = matcher_left->getBlockSize();
+    if(Ptr<StereoBM> bm = matcher_left.dynamicCast<StereoBM>())
+    {
+        Ptr<StereoBM> right_bm = StereoBM::create(num_disp,wsize);
+        right_bm->setMinDisparity(-(min_disp+num_disp)+1);
+        right_bm->setTextureThreshold(0);
+        right_bm->setUniquenessRatio(0);
+        right_bm->setDisp12MaxDiff(1000000);
+        right_bm->setSpeckleWindowSize(0);
+        return right_bm;
+    }
+    else if(Ptr<StereoSGBM> sgbm = matcher_left.dynamicCast<StereoSGBM>())
+    {
+        Ptr<StereoSGBM> right_sgbm = StereoSGBM::create(-(min_disp+num_disp)+1,num_disp,wsize);
+        right_sgbm->setUniquenessRatio(0);
+        right_sgbm->setP1(sgbm->getP1());
+        right_sgbm->setP2(sgbm->getP2());
+        right_sgbm->setMode(sgbm->getMode());
+        right_sgbm->setPreFilterCap(sgbm->getPreFilterCap());
+        right_sgbm->setDisp12MaxDiff(1000000);
+        right_sgbm->setSpeckleWindowSize(0);
+        return right_sgbm;
+    }
+    else
+    {
+        CV_Error(Error::StsBadArg, "createRightMatcher supports only StereoBM and StereoSGBM");
+        return Ptr<StereoMatcher>();
+    }
+}
+
+CV_EXPORTS_W
+Ptr<DisparityWLSFilter> createDisparityWLSFilterGeneric(bool use_confidence)
 {
    return Ptr<DisparityWLSFilter>(DisparityWLSFilterImpl::create(use_confidence));
 }
--- a/modules/ximgproc/test/test_disparity_wls_filter.cpp
+++ b/modules/ximgproc/test/test_disparity_wls_filter.cpp
@ -83,10 +83,10 @@ TEST(DisparityWLSFilterTest, ReferenceAccuracy)
    cv::setNumThreads(cv::getNumberOfCPUs());
    Mat res;

-    Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(true);
+    Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilterGeneric(true);
    wls_filter->setLambda(8000.0);
    wls_filter->setSigmaColor(0.5);
-    wls_filter->filter(left_disp,left,res,ROI,right_disp);
+    wls_filter->filter(left_disp,left,res,right_disp,ROI);

    double MSE = computeMSE(GT,res,ROI);
    double BadPercent = computeBadPixelPercent(GT,res,ROI);
@ -134,17 +134,17 @@ TEST_P(DisparityWLSFilterTest, MultiThreadReproducibility)
        double lambda = rng.uniform(100.0, 10000.0);
        double sigma  = rng.uniform(1.0, 100.0);

-        Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(use_conf);
+        Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilterGeneric(use_conf);
        wls_filter->setLambda(lambda);
        wls_filter->setSigmaColor(sigma);

        cv::setNumThreads(cv::getNumberOfCPUs());
        Mat resMultiThread;
-        wls_filter->filter(left_disp,left,resMultiThread,ROI,right_disp);
+        wls_filter->filter(left_disp,left,resMultiThread,right_disp,ROI);

        cv::setNumThreads(1);
        Mat resSingleThread;
-        wls_filter->filter(left_disp,left,resSingleThread,ROI,right_disp);
+        wls_filter->filter(left_disp,left,resSingleThread,right_disp,ROI);

        EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF);
        EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF*left.total());
--- a/modules/ximgproc/tutorials/disparity_filtering.markdown
+++ b/modules/ximgproc/tutorials/disparity_filtering.markdown
@ -0,0 +1,76 @@
+Disparity map post-filtering {#tutorial_ximgproc_disparity_filtering}
+============================
+
+Introduction
+------------
+
+Stereo matching algorithms, especially highly-optimized ones that are intended for real-time processing
+on CPU, tend to make quite a few errors on challenging sequences. These errors are usually concentrated
+in uniform texture-less areas, half-occlusions and regions near depth discontinuities. One way of dealing
+with stereo-matching errors is to use various techniques of detecting potentially inaccurate disparity
+values and invalidate them, therefore making the disparity map semi-sparse. Several such techniques are
+already implemented in the StereoBM and StereoSGBM algorithms. Another way would be to use some kind of
+filtering procedure to align the disparity map edges with those of the source image and to propagate
+the disparity values from high- to low-confidence regions like half-occlusions. Recent advances in
+edge-aware filtering have enabled performing such post-filtering under the constraints of real-time
+processing on CPU.
+
+In this tutorial you will learn how to use the disparity map post-filtering to improve the results
+of StereoBM and StereoSGBM algorithms.
+
+Source Stereoscopic Image
+-------------------------
+
+![Left view](images/ambush_5_left.jpg)
+![Right view](images/ambush_5_right.jpg)
+
+Source Code
+-----------
+
+We will be using snippets from the example application, that can be downloaded [here ](https://github.com/Itseez/opencv_contrib/blob/master/modules/ximgproc/samples/disparity_filtering.cpp).
+
+Explanation
+-----------
+
+The provided example has several options that yield different trade-offs between the speed and
+the quality of the resulting disparity map. Both the speed and the quality are measured if the user
+has provided the ground-truth disparity map. In this tutorial we will take a detailed look at the
+default pipeline, that was designed to provide the best possible quality under the constraints of
+real-time processing on CPU.
+
+-#  **Load left and right views**
+    @snippet ximgproc/samples/disparity_filtering.cpp load_views
+    We start by loading the source stereopair. For this tutorial we will take a somewhat challenging
+    example from the MPI-Sintel dataset with a lot of texture-less regions.
+
+-#  **Prepare the views for matching**
+    @snippet ximgproc/samples/disparity_filtering.cpp downscale
+    We perform downscaling of the views to speed-up the matching stage at the cost of minor
+    quality degradation. To get the best possible quality downscaling should be avoided.
+
+-#  **Perform matching and create the filter instance**
+    @snippet ximgproc/samples/disparity_filtering.cpp matching
+    We are using StereoBM for faster processing. If speed is not critical, though,
+    StereoSGBM would provide better quality. The filter instance is created by providing
+    the StereoMatcher instance that we intend to use. Another matcher instance is
+    returned by the createRightMatcher function. These two matcher instances are then
+    used to compute disparity maps both for the left and right views, that are required
+    by the filter.
+
+-#  **Perform filtering**
+    @snippet ximgproc/samples/disparity_filtering.cpp filtering
+    Disparity maps computed by the respective matcher instances, as well as the source left view
+    are passed to the filter. Note that we are using the original non-downscaled view to guide the
+    filtering process. The disparity map is automatically upscaled in an edge-aware fashion to match
+    the original view resolution. The result is stored in filtered_disp.
+
+-#  **Visualize the disparity maps**
+    @snippet ximgproc/samples/disparity_filtering.cpp visualization
+    We use a convenience function getDisparityVis to visualize the disparity maps. The second parameter
+    defines the contrast (all disparity values are scaled by this value in the visualization).
+
+Results
+-------
+
+![Result of the StereoBM](images/ambush_5_bm.png)
+![Result of the demonstrated pipeline (StereoBM on downscaled views with post-filtering)](images/ambush_5_bm_with_filter.png)
--- a/modules/ximgproc/tutorials/images/ambush_5_bm.png
+++ b/modules/ximgproc/tutorials/images/ambush_5_bm.png
--- a/modules/ximgproc/tutorials/images/ambush_5_bm_with_filter.png
+++ b/modules/ximgproc/tutorials/images/ambush_5_bm_with_filter.png
--- a/modules/ximgproc/tutorials/images/ambush_5_left.jpg
+++ b/modules/ximgproc/tutorials/images/ambush_5_left.jpg
--- a/modules/ximgproc/tutorials/images/ambush_5_right.jpg
+++ b/modules/ximgproc/tutorials/images/ambush_5_right.jpg