diff --git a/modules/adas/tools/fcw_detect/fcw_detect.cpp b/modules/adas/tools/fcw_detect/fcw_detect.cpp
index 47030b061..1a68ca944 100644
--- a/modules/adas/tools/fcw_detect/fcw_detect.cpp
+++ b/modules/adas/tools/fcw_detect/fcw_detect.cpp
@@ -48,15 +48,29 @@ static Mat visualize(const Mat &image, const vector<Rect> &objects)
     }
     return img;
 }
+static bool read_window_size(const char *str, int *rows, int *cols)
+{
+    int pos = 0;
+    if( sscanf(str, "%dx%d%n", rows, cols, &pos) != 2 || str[pos] != '\0' ||
+        *rows <= 0 || *cols <= 0)
+    {
+        return false;
+    }
+    return true;
+}
 
 int main(int argc, char *argv[])
 {
     const string keys =
-        "{help           |           | print this message}"
-        "{model_filename | model.xml | filename for reading model}"
-        "{image_path     |  test.png | path to image for detection}"
-        "{out_image_path |   out.png | path to image for output}"
-        "{threshold      |       0.0 | threshold for cascade}"
+        "{help            |           | print this message}"
+        "{model_filename  | model.xml | filename for reading model}"
+        "{image_path      |  test.png | path to image for detection}"
+        "{out_image_path  |   out.png | path to image for output}"
+        "{threshold       |       0.0 | threshold for cascade}"
+        "{step            |         8 | sliding window step}"
+        "{min_window_size |     40x40 | min window size in pixels}"
+        "{max_window_size |   300x300 | max window size in pixels}"
+        "{is_grayscale    |     false | read the image as grayscale}"
         ;
 
     CommandLineParser parser(argc, argv, keys);
@@ -71,7 +85,31 @@ int main(int argc, char *argv[])
     string model_filename = parser.get<string>("model_filename");
     string image_path = parser.get<string>("image_path");
     string out_image_path = parser.get<string>("out_image_path");
+    bool is_grayscale = parser.get<bool>("is_grayscale");
     float threshold = parser.get<float>("threshold");
+    int step = parser.get<float>("step");
+    
+    int min_rows, min_cols, max_rows, max_cols;
+    string min_window_size = parser.get<string>("min_window_size");
+    if( !read_window_size(min_window_size.c_str(), &min_rows,
+        &min_cols) )
+    {
+        cerr << "Error reading min window size from `" << min_window_size << "`" << endl;
+        return 1;
+    }
+    string max_window_size = parser.get<string>("max_window_size");
+    if( !read_window_size(max_window_size.c_str(), &max_rows,
+        &max_cols) )
+    {
+        cerr << "Error reading max window size from `" << max_window_size << "`" << endl;
+        return 1;
+    }
+    
+    int color;
+    if(is_grayscale == false)
+      color = cv::IMREAD_COLOR;
+    else
+      color = cv::IMREAD_GRAYSCALE;
 
 
     if( !parser.check() )
@@ -85,8 +123,10 @@ int main(int argc, char *argv[])
     detector.read(fs["icfdetector"]);
     fs.release();
     vector<Rect> objects;
-    Mat img = imread(image_path);
-    detector.detect(img, objects, 1.1f, Size(40, 40),
-        Size(300, 300), threshold);
+    Mat img = imread(image_path, color);
+    std::vector<float> values;
+    detector.detect(img, objects, 1.1f, Size(min_cols, min_rows), Size(max_cols, max_rows), threshold, step, values);
     imwrite(out_image_path, visualize(img, objects));
+    
+    
 }
diff --git a/modules/adas/tools/fcw_train/fcw_train.cpp b/modules/adas/tools/fcw_train/fcw_train.cpp
index 2b51cd61a..3b4af4b97 100644
--- a/modules/adas/tools/fcw_train/fcw_train.cpp
+++ b/modules/adas/tools/fcw_train/fcw_train.cpp
@@ -27,6 +27,9 @@ using cv::imread;
 using cv::CommandLineParser;
 using cv::FileStorage;
 
+#include <ctime>        // std::time
+#include <cstdlib>      // std::rand, std::srand
+
 #include <opencv2/xobjdetect.hpp>
 
 using cv::xobjdetect::ICFDetectorParams;
@@ -46,8 +49,11 @@ static bool read_model_size(const char *str, int *rows, int *cols)
     return true;
 }
 
+static int randomPred (int i) { return std::rand()%i;}
+
 int main(int argc, char *argv[])
 {
+    
     const string keys =
         "{help           |           | print this message}"
         "{pos_path       |       pos | path to training object samples}"
@@ -57,8 +63,15 @@ int main(int argc, char *argv[])
         "{weak_count     |       100 | number of weak classifiers in cascade}"
         "{model_size     |     40x40 | model size in pixels}"
         "{model_filename | model.xml | filename for saving model}"
+        "{features_type  |       icf | features type, \"icf\" or \"acf\"}"
+        "{alpha          |      0.02 | alpha value}"
+        "{is_grayscale   |     false | read the image as grayscale}"
+        "{use_fast_log   |     false | use fast log function}"
+        "{limit_ps       |        -1 | limit to positive samples (-1 means all)}"
+        "{limit_bg       |        -1 | limit to negative samples (-1 means all)}"
         ;
 
+
     CommandLineParser parser(argc, argv, keys);
     parser.about("FCW trainer");
 
@@ -76,7 +89,14 @@ int main(int argc, char *argv[])
     params.feature_count = parser.get<int>("feature_count");
     params.weak_count = parser.get<int>("weak_count");
     params.bg_per_image = parser.get<int>("bg_per_image");
-
+    params.features_type = parser.get<string>("features_type");
+    params.alpha = parser.get<float>("alpha");
+    params.is_grayscale = parser.get<bool>("is_grayscale");
+    params.use_fast_log = parser.get<bool>("use_fast_log");
+    
+    int limit_ps = parser.get<int>("limit_ps");
+    int limit_bg = parser.get<int>("limit_bg");    
+    
     string model_size = parser.get<string>("model_size");
     if( !read_model_size(model_size.c_str(), &params.model_n_rows,
         &params.model_n_cols) )
@@ -97,20 +117,69 @@ int main(int argc, char *argv[])
         return 1;
     }
 
-    if( params.bg_per_image <= 0 )
+    if( params.features_type != "icf" &&  params.features_type != "acf" )
     {
-        cerr << "bg_per_image must be positive number" << endl;
+        cerr << "features_type must be \"icf\" or \"acf\"" << endl;
+        return 1;
+    }
+    if( params.alpha <= 0 )
+    {
+        cerr << "alpha must be positive float number" << endl;
         return 1;
     }
-
     if( !parser.check() )
     {
         parser.printErrors();
         return 1;
     }
+    
+    std::vector<cv::String> pos_filenames;
+    glob(pos_path, pos_filenames);
+
+    std::vector<cv::String> bg_filenames;
+    glob(bg_path, bg_filenames);
+        
+    if(limit_ps != -1 && (int)pos_filenames.size() > limit_ps)
+      pos_filenames.erase(pos_filenames.begin()+limit_ps, pos_filenames.end());
+    if(limit_bg != -1 && (int)bg_filenames.size() > limit_bg)
+      bg_filenames.erase(bg_filenames.begin()+limit_bg, bg_filenames.end());
+    
+    //random pick input images
+    bool random_shuffle = false;
+    if(random_shuffle)
+    {
+      std::srand ( unsigned ( std::time(0) ) );
+      std::random_shuffle ( pos_filenames.begin(), pos_filenames.end(), randomPred );
+      std::random_shuffle ( bg_filenames.begin(), bg_filenames.end(), randomPred );
+    }
+    
+    int samples_size = (int)((params.bg_per_image * bg_filenames.size()) + pos_filenames.size());
+    int features_size = params.feature_count;
+    int max_features_allowed = (int)(INT_MAX/(sizeof(int)* samples_size));
+    int max_samples_allowed = (int)(INT_MAX/(sizeof(int)* features_size));
+    
+    if((int)((params.bg_per_image * bg_filenames.size()) + pos_filenames.size()) >max_samples_allowed)
+    {
+      std::cout<<std::endl<<"ERROR: exceeded maximum number of samples "<<std::endl<<std::endl;
+      std::cout<<"exceeded maximum number of samples (pos + neg) with "<<features_size<<" features is: "<<max_samples_allowed<<std::endl<<std::endl;
+      CV_Assert(false);
+    }
+    
+    if(params.feature_count >max_features_allowed)
+    {
+      std::cout<<std::endl<<"ERROR: exceeded maximum number of features"<<std::endl<<std::endl;
+      std::cout<<"maximum number of features with "<<samples_size<<" samples is: "<<max_features_allowed<<std::endl<<std::endl;
+      CV_Assert(false);
+    }
+    
+    std::cout<<pos_filenames.size()<<std::endl;
+    std::cout<<bg_filenames.size()<<std::endl;
+
+    ICFDetector detector;    
+
+    
+    detector.train(pos_filenames, bg_filenames, params);
 
-    ICFDetector detector;
-    detector.train(pos_path, bg_path, params);
     FileStorage fs(model_filename, FileStorage::WRITE);
     fs << "icfdetector";
     detector.write(fs);
diff --git a/modules/xobjdetect/doc/integral_channel_features.rst b/modules/xobjdetect/doc/integral_channel_features.rst
index 41865c17e..7dc0b0519 100644
--- a/modules/xobjdetect/doc/integral_channel_features.rst
+++ b/modules/xobjdetect/doc/integral_channel_features.rst
@@ -165,9 +165,14 @@ Params for ICFDetector training.
         int model_n_rows;
         int model_n_cols;
         int bg_per_image;
+        std::string features_type;
+        float alpha;
+        bool is_grayscale;
+        bool use_fast_log;
 
         ICFDetectorParams(): feature_count(UINT_MAX), weak_count(100),
-            model_n_rows(56), model_n_cols(56), bg_per_image(5)
+            model_n_rows(56), model_n_cols(56), bg_per_image(5), 
+            alpha(0.02), is_grayscale(false), use_fast_log(false)
         {}
     };
 
@@ -181,7 +186,7 @@ ICFDetector::train
 
 Train detector.
 
-.. ocv:function:: void ICFDetector::train(const String& pos_path, const String& bg_path, ICFDetectorParams params = ICFDetectorParams())
+.. ocv:function:: void ICFDetector::train(const std::vector<String>& pos_filenames, const std::vector<String>& bg_filenames, ICFDetectorParams params = ICFDetectorParams())
 
     :param pos_path: path to folder with images of objects (wildcards like ``/my/path/*.png`` are allowed)
     :param bg_path: path to folder with background images
@@ -192,13 +197,20 @@ ICFDetector::detect
 
 Detect objects on image.
 
-.. ocv:function:: void ICFDetector::detect(const Mat& image, vector<Rect>& objects, float scaleFactor, Size minSize, Size maxSize, float threshold)
+.. ocv:function:: void ICFDetector::detect(const Mat& image, vector<Rect>& objects, float scaleFactor, Size minSize, Size maxSize, float threshold, int slidingStep, std::vector<float>& values)
+
+.. ocv:function:: detect(const Mat& img, std::vector<Rect>& objects, float minScaleFactor, float maxScaleFactor, float factorStep, float threshold, int slidingStep, std::vector<float>& values);
 
     :param image: image for detection
     :param objects: output array of bounding boxes
     :param scaleFactor: scale between layers in detection pyramid
     :param minSize: min size of objects in pixels
     :param maxSize: max size of objects in pixels
+    :param minScaleFactor: min factor by which the image will be resized
+    :param maxScaleFactor: max factor by which the image will be resized
+    :param factorStep: scaling factor is incremented each pyramid layer according to this parameter
+    :param slidingStep: sliding window step
+    :param values: output vector with values of positive samples 
 
 ICFDetector::write
 ------------------
diff --git a/modules/xobjdetect/include/opencv2/xobjdetect.hpp b/modules/xobjdetect/include/opencv2/xobjdetect.hpp
index 6d86d7f7f..99cf1213e 100644
--- a/modules/xobjdetect/include/opencv2/xobjdetect.hpp
+++ b/modules/xobjdetect/include/opencv2/xobjdetect.hpp
@@ -43,6 +43,7 @@ the use of this software, even if advised of the possibility of such damage.
 #define __OPENCV_XOBJDETECT_XOBJDETECT_HPP__
 
 #include <opencv2/core.hpp>
+#include <opencv2/highgui.hpp>
 #include <vector>
 #include <string>
 
@@ -102,6 +103,8 @@ std::vector<std::vector<int> >
 generateFeatures(Size window_size, const std::string& type,
                  int count = INT_MAX, int channel_count = 10);
 
+//sort in-place of columns of the input matrix
+void sort_columns_without_copy(Mat& m, Mat indices = Mat());
 
 struct CV_EXPORTS WaldBoostParams
 {
@@ -127,8 +130,8 @@ public:
     Returns feature indices chosen for cascade.
     Feature enumeration starts from 0
     */
-    virtual std::vector<int> train(const Mat& /*data*/,
-                                   const Mat& /*labels*/) = 0;
+    virtual std::vector<int> train(Mat& /*data*/,
+                                   const Mat& /*labels*/, bool use_fast_log=false) = 0;
 
     /* Predict object class given object that can compute object features
 
@@ -157,9 +160,13 @@ struct CV_EXPORTS ICFDetectorParams
     int model_n_rows;
     int model_n_cols;
     int bg_per_image;
+    std::string features_type;
+    float alpha;
+    bool is_grayscale;
+    bool use_fast_log;
 
     ICFDetectorParams(): feature_count(UINT_MAX), weak_count(100),
-        model_n_rows(56), model_n_cols(56), bg_per_image(5)
+        model_n_rows(56), model_n_cols(56), bg_per_image(5), alpha(0.02), is_grayscale(false), use_fast_log(false)
     {}
 };
 
@@ -167,18 +174,18 @@ class CV_EXPORTS ICFDetector
 {
 public:
 
-    ICFDetector(): waldboost_(), features_() {}
+    ICFDetector(): waldboost_(), features_(), ftype_() {}
 
     /* Train detector
 
-        pos_path — path to folder with images of objects
+        pos_filenames — paths to objects images
 
-        bg_path — path to folder with background images
+        bg_filenames — path backgrounds images
 
         params — parameters for detector training
     */
-    void train(const String& pos_path,
-               const String& bg_path,
+    void train(const std::vector<String>& pos_filenames,
+               const std::vector<String>& bg_filenames,
                ICFDetectorParams params = ICFDetectorParams());
 
     /* Detect object on image
@@ -192,9 +199,35 @@ public:
         minSize — min size of objects in pixels
 
         maxSize — max size of objects in pixels
+        
+        slidingStep — sliding window step
+        
+        values — output vector with values of positive samples 
+        
     */
+        
     void detect(const Mat& image, std::vector<Rect>& objects,
-        float scaleFactor, Size minSize, Size maxSize, float threshold);
+        float scaleFactor, Size minSize, Size maxSize, float threshold, int slidingStep, std::vector<float>& values);
+    
+    /* Detect object on image
+
+        image — image for detection
+
+        object — output array of bounding boxes
+        
+        minScaleFactor — min factor image will be resized
+
+        maxScaleFactor — max factor image will be resized
+
+        factorStep — scaling factor is incremented according to factorStep
+        
+        slidingStep — sliding window step
+        
+        values — output vector with values of positive samples
+
+        
+    */
+    void detect(const Mat& img, std::vector<Rect>& objects, float minScaleFactor, float maxScaleFactor, float factorStep, float threshold, int slidingStep, std::vector<float>& values);
 
     /* Write detector to FileStorage */
     void write(FileStorage &fs) const;
@@ -207,6 +240,7 @@ private:
     std::vector<std::vector<int> > features_;
     int model_n_rows_;
     int model_n_cols_;
+    std::string ftype_;
 };
 
 CV_EXPORTS void write(FileStorage& fs, String&, const ICFDetector& detector);
diff --git a/modules/xobjdetect/include/opencv2/xobjdetect/private.hpp b/modules/xobjdetect/include/opencv2/xobjdetect/private.hpp
index 3229e10f1..3873413db 100644
--- a/modules/xobjdetect/include/opencv2/xobjdetect/private.hpp
+++ b/modules/xobjdetect/include/opencv2/xobjdetect/private.hpp
@@ -33,10 +33,12 @@ public:
             {-1, +1}
 
         weights — matrix of sample weights, size 1 x N
+         
+        visited_features: vector of already visited features (ignored in successive calls)
 
     Returns chosen feature index. Feature enumeration starts from 0
     */
-    int train(const Mat& data, const Mat& labels, const Mat& weights);
+    int train(const Mat& data, const Mat& labels, const Mat& weights, const std::vector<int>& visited_features, bool use_fast_log = false);
 
     /* Predict object class given
 
diff --git a/modules/xobjdetect/src/acffeature.cpp b/modules/xobjdetect/src/acffeature.cpp
index fd3fd15da..f23902304 100644
--- a/modules/xobjdetect/src/acffeature.cpp
+++ b/modules/xobjdetect/src/acffeature.cpp
@@ -120,18 +120,14 @@ void ICFFeatureEvaluatorImpl::setChannels(InputArrayOfArrays channels)
     channels_.clear();
     vector<Mat> ch;
     channels.getMatVector(ch);
-    CV_Assert(ch.size() == 10);
 
     for( size_t i = 0; i < ch.size(); ++i )
     {
         const Mat &channel = ch[i];
         Mat integral_channel;
         integral(channel, integral_channel, CV_32F);
-        Mat_<int> chan(integral_channel.rows, integral_channel.cols);
-        for( int row = 0; row < integral_channel.rows; ++row )
-            for( int col = 0; col < integral_channel.cols; ++col )
-                chan(row, col) = (int)integral_channel.at<float>(row, col);
-        channels_.push_back(chan.clone());
+        integral_channel.convertTo(integral_channel, CV_32S);
+        channels_.push_back(integral_channel.clone());
     }
 }
 
@@ -140,11 +136,13 @@ void ICFFeatureEvaluatorImpl::setPosition(Size position)
     position_ = position;
 }
 
+
 int ICFFeatureEvaluatorImpl::evaluate(size_t feature_ind) const
 {
-    CV_Assert(channels_.size() == 10);
-    CV_Assert(feature_ind < features_.size());
-
+    /*
+    
+    //following return is equal to this commented code, left here for readability. The new code runs much faster.
+    * 
     const vector<int>& feature = features_[feature_ind];
     int x = feature[0] + position_.height;
     int y = feature[1] + position_.width;
@@ -153,6 +151,14 @@ int ICFFeatureEvaluatorImpl::evaluate(size_t feature_ind) const
     int n = feature[4];
     const Mat_<int>& ch = channels_[n];
     return ch(y_to + 1, x_to + 1) - ch(y, x_to + 1) - ch(y_to + 1, x) + ch(y, x);
+    */
+  
+    CV_Assert(feature_ind < features_.size());
+
+    return *(channels_[features_[feature_ind][4]].ptr<int>()+((channels_[features_[feature_ind][4]].cols*(features_[feature_ind][3] + position_.width+1))+ features_[feature_ind][2] + position_.height + 1)) - 
+            *(channels_[features_[feature_ind][4]].ptr<int>()+((channels_[features_[feature_ind][4]].cols*(features_[feature_ind][1] + position_.width))+ features_[feature_ind][2] + position_.height + 1)) -
+            *(channels_[features_[feature_ind][4]].ptr<int>()+((channels_[features_[feature_ind][4]].cols*(features_[feature_ind][3] + position_.width+1))+ features_[feature_ind][0] + position_.height)) +
+            *(channels_[features_[feature_ind][4]].ptr<int>()+((channels_[features_[feature_ind][4]].cols*(features_[feature_ind][1] + position_.width))+ features_[feature_ind][0] + position_.height));
 }
 
 class ACFFeatureEvaluatorImpl : public FeatureEvaluatorImpl
@@ -173,7 +179,6 @@ void ACFFeatureEvaluatorImpl::setChannels(InputArrayOfArrays channels)
     channels_.clear();
     vector<Mat> ch;
     channels.getMatVector(ch);
-    CV_Assert(ch.size() == 10);
 
     for( size_t i = 0; i < ch.size(); ++i )
     {
@@ -203,7 +208,6 @@ void ACFFeatureEvaluatorImpl::setPosition(Size position)
 
 int ACFFeatureEvaluatorImpl::evaluate(size_t feature_ind) const
 {
-    CV_Assert(channels_.size() == 10);
     CV_Assert(feature_ind < features_.size());
 
     const vector<int>& feature = features_[feature_ind];
@@ -271,13 +275,23 @@ vector<vector<int> > generateFeatures(Size window_size, const std::string& type,
 
 void computeChannels(InputArray image, vector<Mat>& channels)
 {
-    Mat src(image.getMat().rows, image.getMat().cols, CV_32FC3);
-    image.getMat().convertTo(src, CV_32FC3, 1./255);
-
     Mat_<float> grad;
-    Mat luv, gray;
-    cvtColor(src, gray, CV_RGB2GRAY);
-    cvtColor(src, luv, CV_RGB2Luv);
+    Mat luv, gray, src;
+    
+    if(image.getMat().channels() > 1)
+    {
+      src = Mat(image.getMat().rows, image.getMat().cols, CV_32FC3);
+      image.getMat().convertTo(src, CV_32FC3, 1./255);
+
+      cvtColor(src, gray, CV_RGB2GRAY);
+      cvtColor(src, luv, CV_RGB2Luv);
+    }
+    else
+    {
+      src = Mat(image.getMat().rows, image.getMat().cols, CV_32FC1);
+      image.getMat().convertTo(src, CV_32FC1, 1./255);
+      src.copyTo(gray);
+    }
 
     Mat_<float> row_der, col_der;
     Sobel(gray, row_der, CV_32F, 0, 1);
@@ -304,10 +318,13 @@ void computeChannels(InputArray image, vector<Mat>& channels)
 
     channels.clear();
 
-    Mat luv_channels[3];
-    split(luv, luv_channels);
-    for( int i = 0; i < 3; ++i )
-        channels.push_back(luv_channels[i]);
+    if(image.getMat().channels() > 1)
+    {
+      Mat luv_channels[3];
+      split(luv, luv_channels);
+      for( int i = 0; i < 3; ++i )
+          channels.push_back(luv_channels[i]);
+    }
 
     channels.push_back(grad);
 
diff --git a/modules/xobjdetect/src/icfdetector.cpp b/modules/xobjdetect/src/icfdetector.cpp
index 068998d54..2e103b4ff 100644
--- a/modules/xobjdetect/src/icfdetector.cpp
+++ b/modules/xobjdetect/src/icfdetector.cpp
@@ -58,34 +58,39 @@ using std::string;
 using std::min;
 using std::max;
 
+
 namespace cv
 {
+
 namespace xobjdetect
 {
 
-void ICFDetector::train(const String& pos_path,
-                        const String& bg_path,
+
+void ICFDetector::train(const vector<String>& pos_filenames,
+                        const vector<String>& bg_filenames,
                         ICFDetectorParams params)
 {
-    vector<String> pos_filenames;
-    glob(pos_path, pos_filenames);
-
-    vector<String> bg_filenames;
-    glob(bg_path, bg_filenames);
+  
+    int color;
+    if(params.is_grayscale == false)
+      color = IMREAD_COLOR;
+    else
+      color = IMREAD_GRAYSCALE;
 
     model_n_rows_ = params.model_n_rows;
     model_n_cols_ = params.model_n_cols;
+    ftype_ = params.features_type;
 
     Size model_size(params.model_n_cols, params.model_n_rows);
 
     vector<Mat> samples; /* positive samples + negative samples */
     Mat sample, resized_sample;
     int pos_count = 0;
-
+  
     for( size_t i = 0; i < pos_filenames.size(); ++i, ++pos_count )
     {
         cout << setw(6) << (i + 1) << "/" << pos_filenames.size() << "\r";
-        Mat img = imread(pos_filenames[i]);
+        Mat img = imread(pos_filenames[i], color);
         resize(img, resized_sample, model_size);
         samples.push_back(resized_sample.clone());
     }
@@ -96,18 +101,16 @@ void ICFDetector::train(const String& pos_path,
     for( size_t i = 0; i < bg_filenames.size(); ++i )
     {
         cout << setw(6) << (i + 1) << "/" << bg_filenames.size() << "\r";
-        Mat img = imread(bg_filenames[i]);
+        Mat img = imread(bg_filenames[i], color);
         for( int j = 0; j < params.bg_per_image; ++j, ++neg_count)
         {
             Rect r;
-            r.x = rng.uniform(0, img.cols);
-            r.width = rng.uniform(r.x + 1, img.cols);
-            r.y = rng.uniform(0, img.rows);
-            r.height = rng.uniform(r.y + 1, img.rows);
-
-            sample = img.colRange(r.x, r.width).rowRange(r.y, r.height);
-            resize(sample, resized_sample, model_size);
-            samples.push_back(resized_sample.clone());
+            r.x = rng.uniform(0, img.cols-model_size.width);
+            r.width = model_size.width;
+            r.y = rng.uniform(0, img.rows-model_size.height);
+            r.height = model_size.height;
+            sample = img.colRange(r.x, r.x + r.width).rowRange(r.y, r.y + r.height);
+            samples.push_back(sample.clone());
         }
     }
     cout << "\n";
@@ -118,9 +121,15 @@ void ICFDetector::train(const String& pos_path,
     for( int i = pos_count; i < pos_count + neg_count; ++i )
         labels(0, i) = -1;
 
-    vector<vector<int> > features = generateFeatures(model_size, "icf",
-        params.feature_count);
-    Ptr<FeatureEvaluator> evaluator = createFeatureEvaluator(features, "icf");
+    
+    vector<vector<int> > features;
+    if(params.is_grayscale == false)
+      features = generateFeatures(model_size, params.features_type,  params.feature_count, 10);
+    else
+      features = generateFeatures(model_size, params.features_type,  params.feature_count, 7);
+    
+    Ptr<FeatureEvaluator> evaluator = createFeatureEvaluator(features, params.features_type);
+
 
     Mat_<int> data = Mat_<int>::zeros((int)features.size(), (int)samples.size());
     Mat_<int> feature_col(1, (int)samples.size());
@@ -141,13 +150,13 @@ void ICFDetector::train(const String& pos_path,
     }
     cout << "\n";
     samples.clear();
-
+        
     WaldBoostParams wparams;
     wparams.weak_count = params.weak_count;
-    wparams.alpha = 0.02f;
+    wparams.alpha = params.alpha;
 
     waldboost_ = createWaldBoost(wparams);
-    vector<int> indices = waldboost_->train(data, labels);
+    vector<int> indices = waldboost_->train(data, labels, params.use_fast_log);
     cout << "indices: ";
     for( size_t i = 0; i < indices.size(); ++i )
         cout << indices[i] << " ";
@@ -163,6 +172,7 @@ void ICFDetector::write(FileStorage& fs) const
     fs << "{";
     fs << "model_n_rows" << model_n_rows_;
     fs << "model_n_cols" << model_n_cols_;
+    fs << "ftype" << String(ftype_.c_str());
     fs << "waldboost";
     waldboost_->write(fs);
     fs << "features" << "[";
@@ -177,8 +187,11 @@ void ICFDetector::write(FileStorage& fs) const
 void ICFDetector::read(const FileNode& node)
 {
     waldboost_ = Ptr<WaldBoost>(createWaldBoost(WaldBoostParams()));
+    String f_temp;
     node["model_n_rows"] >> model_n_rows_;
     node["model_n_cols"] >> model_n_cols_;
+    f_temp = (String)node["ftype"];    
+    this->ftype_ = (string)f_temp.c_str();
     waldboost_->read(node["waldboost"]);
     FileNode features = node["features"];
     features_.clear();
@@ -191,49 +204,99 @@ void ICFDetector::read(const FileNode& node)
 }
 
 void ICFDetector::detect(const Mat& img, vector<Rect>& objects,
-    float scaleFactor, Size minSize, Size maxSize, float threshold)
+    float scaleFactor, Size minSize, Size maxSize, float threshold, int slidingStep, std::vector<float>& values)
 {
+    
+    
     float scale_from = min(model_n_cols_ / (float)maxSize.width,
                            model_n_rows_ / (float)maxSize.height);
     float scale_to = max(model_n_cols_ / (float)minSize.width,
                          model_n_rows_ / (float)minSize.height);
     objects.clear();
-    Ptr<FeatureEvaluator> evaluator = createFeatureEvaluator(features_, "icf");
+    Ptr<FeatureEvaluator> evaluator = createFeatureEvaluator(features_, ftype_);
     Mat rescaled_image;
-    int step = 8;
     vector<Mat> channels;
+    
     for( float scale = scale_from; scale < scale_to + 0.001; scale *= scaleFactor )
     {
-        cout << "scale " << scale << endl;
         int new_width = int(img.cols * scale);
         new_width -= new_width % 4;
         int new_height = int(img.rows * scale);
         new_height -= new_height % 4;
-
+        
         resize(img, rescaled_image, Size(new_width, new_height));
         computeChannels(rescaled_image, channels);
         evaluator->setChannels(channels);
-        for( int row = 0; row <= rescaled_image.rows - model_n_rows_; row += step)
+        for( int row = 0; row <= rescaled_image.rows - model_n_rows_; row += slidingStep)
+        {
+            for( int col = 0; col <= rescaled_image.cols - model_n_cols_;
+                col += slidingStep )
+            {
+                evaluator->setPosition(Size(row, col));
+                float value = waldboost_->predict(evaluator);
+                if( value > threshold )
+                {
+                    values.push_back(value);
+                    int x = (int)(col / scale);
+                    int y = (int)(row / scale);
+                    int width = (int)(model_n_cols_ / scale);
+                    int height = (int)(model_n_rows_ / scale);
+                    objects.push_back(Rect(x, y, width, height));
+                }
+            }
+        }
+
+    }
+    
+}
+
+void ICFDetector::detect(const Mat& img, vector<Rect>& objects,
+    float minScaleFactor, float maxScaleFactor, float factorStep, float threshold, int slidingStep, std::vector<float>& values)
+{
+
+    if(factorStep <= 0)
+    {
+      CV_Error(CV_StsBadArg, "factorStep must be > 0");
+      CV_Assert(false);
+    }
+    
+    objects.clear();
+    Ptr<FeatureEvaluator> evaluator = createFeatureEvaluator(features_, ftype_);
+    Mat rescaled_image;
+    vector<Mat> channels;
+    
+    for( float scale = minScaleFactor; scale < maxScaleFactor + 0.001; scale += factorStep )
+    {
+        if(scale < 1.0)
+          resize(img, rescaled_image, Size(),scale, scale, INTER_AREA);
+        else if (scale > 1.0)
+          resize(img, rescaled_image, Size(),scale, scale, INTER_CUBIC);
+        else //scale == 1.0
+          img.copyTo(rescaled_image);
+          
+        computeChannels(rescaled_image, channels);
+        evaluator->setChannels(channels);
+        for( int row = 0; row <= rescaled_image.rows - model_n_rows_; row += slidingStep)
         {
             for( int col = 0; col <= rescaled_image.cols - model_n_cols_;
-                col += step )
+                col += slidingStep )
             {
                 evaluator->setPosition(Size(row, col));
                 float value = waldboost_->predict(evaluator);
                 if( value > threshold )
                 {
+                    values.push_back(value);
                     int x = (int)(col / scale);
                     int y = (int)(row / scale);
                     int width = (int)(model_n_cols_ / scale);
                     int height = (int)(model_n_rows_ / scale);
-                    cout << value << " " << x << " " << y << " " << width << " "
-                         << height << endl;
                     objects.push_back(Rect(x, y, width, height));
                 }
             }
         }
 
     }
+    
 }
 
 void write(FileStorage& fs, String&, const ICFDetector& detector)
diff --git a/modules/xobjdetect/src/stump.cpp b/modules/xobjdetect/src/stump.cpp
index c9c9911d8..221445a29 100644
--- a/modules/xobjdetect/src/stump.cpp
+++ b/modules/xobjdetect/src/stump.cpp
@@ -61,7 +61,27 @@ static void cumsum(const Mat_<float>& src, Mat_<float> dst)
     }
 }
 
-int Stump::train(const Mat& data, const Mat& labels, const Mat& weights)
+//fast log implementation. A bit less accurate but ~5x faster
+inline float fast_log2 (float val)
+{
+   int * const    exp_ptr = reinterpret_cast <int *> (&val);
+   int            x = *exp_ptr;
+   const int      log_2 = ((x >> 23) & 255) - 128;
+   x &= ~(255 << 23);
+   x += 127 << 23;
+   *exp_ptr = x;
+
+   val = ((-1.0f/3) * val + 2) * val - 2.0f/3;   // (1)
+
+   return (val + log_2);
+} 
+
+inline float fast_log (const float &val)
+{
+   return (fast_log2 (val) * 0.69314718f);
+}
+
+int Stump::train(const Mat& data, const Mat& labels, const Mat& weights, const std::vector<int>& visited_features, bool use_fast_log)
 {
     CV_Assert(labels.rows == 1 && labels.cols == data.cols);
     CV_Assert(weights.rows == 1 && weights.cols == data.cols);
@@ -95,8 +115,11 @@ int Stump::train(const Mat& data, const Mat& labels, const Mat& weights)
     /* For every feature */
     for( int row = 0; row < data.rows; ++row )
     {
-        for( int col = 0; col < data.cols; ++col )
-            d(0, col) = data.at<int>(row, col);
+        if(std::find(visited_features.begin(), visited_features.end(), row) != visited_features.end()) {
+              //feature discarded
+              continue;
+        }
+        data.row(row).copyTo(d.row(0));
 
         sortIdx(d, indices, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING);
 
@@ -141,8 +164,16 @@ int Stump::train(const Mat& data, const Mat& labels, const Mat& weights)
 
             err = sqrt(pos_right * neg_wrong) + sqrt(pos_wrong * neg_right);
 
-            h_pos = .5f * log((pos_right + eps) / (pos_wrong + eps));
-            h_neg = .5f * log((neg_wrong + eps) / (neg_right + eps));
+            if(use_fast_log)
+            {
+              h_pos = .5f * fast_log((pos_right + eps) / (pos_wrong + eps));
+              h_neg = .5f * fast_log((neg_wrong + eps) / (neg_right + eps));
+            }
+            else
+            {
+              h_pos = .5f * log((pos_right + eps) / (pos_wrong + eps));
+              h_neg = .5f * log((neg_wrong + eps) / (neg_right + eps));
+            }
 
             if( err < min_err )
             {
diff --git a/modules/xobjdetect/src/waldboost.cpp b/modules/xobjdetect/src/waldboost.cpp
index 340aefb2c..09027ced6 100644
--- a/modules/xobjdetect/src/waldboost.cpp
+++ b/modules/xobjdetect/src/waldboost.cpp
@@ -50,10 +50,93 @@ using std::cout;
 using std::endl;
 
 
+
+
+
+
 namespace cv
 {
+
 namespace xobjdetect
 {
+  //sort in-place of columns of the input matrix
+  void sort_columns_without_copy(Mat& m, Mat indices)
+  {
+    
+    if(indices.data == 0)
+      sortIdx(m, indices, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING);
+    
+    Mat indices_of_indices;
+    sortIdx(indices, indices_of_indices, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING);
+      
+    std::vector<bool> visited;
+    for(int c = 0; c<m.cols; c++)
+      visited.push_back(false);
+      
+    int ind_v = 0;
+    Mat temp_column = Mat();
+    int next = 0;
+    Mat column;
+    while(ind_v < m.cols)
+    {
+
+      if(temp_column.data == 0)
+      {
+        (m.col(indices_of_indices.at<int>(0,ind_v))).copyTo(column);
+      }
+      else
+      {
+        temp_column.copyTo(column);
+      }
+      
+      
+      if(indices_of_indices.at<int>(0,next) != next) //value is in the right place
+      {
+        //store the next value to change
+        (m.col(indices_of_indices.at<int>(0,next))).copyTo(temp_column);
+        //insert the value to change at the right place
+        column.copyTo(m.col(indices_of_indices.at<int>(0,next)));
+        
+        //find the index of the next value to change
+        next = indices_of_indices.at<int>(0,next);
+        //if the idenx is not visited yet
+        if(visited[next] == false)
+        {
+          //then mark it as visited, it will be computed in the next round
+          visited[next] = true;
+        }
+        else
+        {
+          //find first non visited index
+          int i = 0;
+          while(visited[i] == true && i<(int)visited.size())
+          {
+            i++;
+          }
+          ind_v = i;
+          next = i;
+          temp_column = Mat();
+          
+        }
+      }
+      else // value is already at the right place
+      {
+        visited[next] = true;
+        int i = 0;
+        while(visited[i] == true && i<(int)visited.size())
+        {
+          i++;
+        }
+        next = i;
+        temp_column = Mat();
+        ind_v = i;
+      }
+      
+      
+    }
+    
+    
+  }
 
 class WaldBoostImpl : public WaldBoost
 {
@@ -63,8 +146,8 @@ public:
         params_(params)
     {}
 
-    virtual std::vector<int> train(const Mat& data,
-                                   const Mat& labels);
+    virtual std::vector<int> train(Mat& data,
+                                   const Mat& labels, bool use_fast_log=false);
 
     virtual float predict(
         const Ptr<FeatureEvaluator>& feature_evaluator) const;
@@ -138,13 +221,12 @@ void WaldBoostImpl::write(FileStorage& fs) const
 
 }
 
-vector<int> WaldBoostImpl::train(const Mat& data_, const Mat& labels_)
+vector<int> WaldBoostImpl::train(Mat& data, const Mat& labels_, bool use_fast_log)
 {
-    CV_Assert(labels_.rows == 1 && labels_.cols == data_.cols);
-    CV_Assert(data_.rows >= params_.weak_count);
+    CV_Assert(labels_.rows == 1 && labels_.cols == data.cols);    
+    CV_Assert(data.rows >= params_.weak_count);
 
-    Mat labels, data;
-    data_.copyTo(data);
+    Mat labels;
     labels_.copyTo(labels);
 
     bool null_data = true;
@@ -175,18 +257,18 @@ vector<int> WaldBoostImpl::train(const Mat& data_, const Mat& labels_)
         feature_indices_pool.push_back(ind);
 
     vector<int> feature_indices;
+    vector<int> visited_features;
     Mat_<float> trace = Mat_<float>::zeros(labels.rows, labels.cols);
     stumps_.clear();
     thresholds_.clear();
     for( int i = 0; i < params_.weak_count; ++i)
-    {
-        cout << "stage " << i << endl;
+    {        
         Stump s;
-        int feature_ind = s.train(data, labels, weights);
-        cout << "feature_ind " << feature_ind << endl;
+        int feature_ind = s.train(data, labels, weights, visited_features, use_fast_log);
         stumps_.push_back(s);
         int ind = feature_indices_pool[feature_ind];
-        feature_indices_pool.erase(feature_indices_pool.begin() + feature_ind);
+        //we don't need to erase the feature index anymore, because we ignore them if already visited
+        //feature_indices_pool.erase(feature_indices_pool.begin() + feature_ind);
         feature_indices.push_back(ind);
 
         // Recompute weights
@@ -198,12 +280,13 @@ vector<int> WaldBoostImpl::train(const Mat& data_, const Mat& labels_)
             weights.at<float>(0, col) *= exp(-label * h);
         }
 
-        // Erase row for feature in data
-        Mat fixed_data;
-        fixed_data.push_back(data.rowRange(0, feature_ind));
-        fixed_data.push_back(data.rowRange(feature_ind + 1, data.rows));
+        // set to zero row for feature in data
+        for(int jc = 0; jc<data.cols; jc++)
+        {
+          data.at<int>(feature_ind, jc) = 0;
+        }
+        visited_features.push_back(feature_ind);
 
-        data = fixed_data;
 
 
         // Normalize weights
@@ -218,7 +301,6 @@ vector<int> WaldBoostImpl::train(const Mat& data_, const Mat& labels_)
         sortIdx(trace, indices, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING);
         Mat new_weights = Mat_<float>::zeros(weights.rows, weights.cols);
         Mat new_labels = Mat_<int>::zeros(labels.rows, labels.cols);
-        Mat new_data = Mat_<int>::zeros(data.rows, data.cols);
         Mat new_trace;
         for( int col = 0; col < new_weights.cols; ++col )
         {
@@ -226,15 +308,12 @@ vector<int> WaldBoostImpl::train(const Mat& data_, const Mat& labels_)
                 weights.at<float>(0, indices.at<int>(0, col));
             new_labels.at<int>(0, col) =
                 labels.at<int>(0, indices.at<int>(0, col));
-            for( int row = 0; row < new_data.rows; ++row )
-            {
-                new_data.at<int>(row, col) =
-                    data.at<int>(row, indices.at<int>(0, col));
-            }
         }
+        
+        //sort in-place to save memory
+        sort_columns_without_copy(data, indices);
         sort(trace, new_trace, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING);
 
-
         // Compute threshold for trace
         /*
         int col = 0;
@@ -262,19 +341,16 @@ vector<int> WaldBoostImpl::train(const Mat& data_, const Mat& labels_)
         }
 
         thresholds_.push_back(new_trace.at<float>(0, max_col));
-        cout << "threshold " << *(thresholds_.end() - 1) << endl;
-
-        cout << "col " << max_col << " size " << data.cols << endl;
 
         // Drop samples below threshold
-        new_data.colRange(max_col, new_data.cols).copyTo(data);
+        //uses Rois instead of copyTo to save memory
+        data = data(Rect(max_col, 0, data.cols - max_col, data.rows));
         new_trace.colRange(max_col, new_trace.cols).copyTo(trace);
         new_weights.colRange(max_col, new_weights.cols).copyTo(weights);
         new_labels.colRange(max_col, new_labels.cols).copyTo(labels);
 
         pos_count = count(labels, +1);
         neg_count = count(labels, -1);
-        cout << "pos_count " << pos_count << "; neg_count " << neg_count << endl;
 
         if( data.cols < 2 || neg_count == 0)
         {
@@ -293,6 +369,7 @@ float WaldBoostImpl::predict(
     {
         int value = feature_evaluator->evaluate(i);
         trace += stumps_[i].predict(value);
+        
         if( trace < thresholds_[i] )
             return -1;
     }