Increasing the dimension of features space in the SVMSGD::train function.

9 years ago · acd74037b3
parent 40bf97c6d1
commit acd74037b3
8 changed files with 397 additions and 334 deletions
--- a/include/opencv2/opencv.hpp
+++ b/include/opencv2/opencv.hpp
@ -75,7 +75,6 @@
 #endif
 #ifdef HAVE_OPENCV_ML
 #include "opencv2/ml.hpp"
-#include "opencv2/ml/svmsgd.hpp"
 #endif

 #endif
--- a/modules/ml/include/opencv2/ml.hpp
+++ b/modules/ml/include/opencv2/ml.hpp
@ -1496,6 +1496,121 @@ public:
    CV_WRAP static Ptr<LogisticRegression> create();
 };

+
+/****************************************************************************************\
+*                        Stochastic Gradient Descent SVM Classifier                      *
+\****************************************************************************************/
+
+/*!
+@brief Stochastic Gradient Descent SVM classifier
+
+SVMSGD provides a fast and easy-to-use implementation of the SVM classifier using the Stochastic Gradient Descent approach, as presented in @cite bottou2010large.
+The gradient descent show amazing performance for large-scale problems, reducing the computing time.
+
+
+
+First, create the SVMSGD object. Set parametrs of model (type, lambda, gamma0, c) using the functions setType, setLambda, setGamma0 and setC or the function setOptimalParametrs.
+Recommended model type is ASGD.
+
+Then the SVM model can be trained using the train features and the correspondent labels.
+
+After that, the label of a new feature vector can be predicted using the predict function.
+
+@code
+// Initialize object
+SVMSGD SvmSgd;
+
+// Train the Stochastic Gradient Descent SVM
+SvmSgd.train(trainFeatures, labels);
+
+// Predict label for the new feature vector (1xM)
+predictedLabel = SvmSgd.predict(newFeatureVector);
+@endcode
+
+*/
+
+class CV_EXPORTS_W SVMSGD : public cv::ml::StatModel
+{
+public:
+
+    /** SVMSGD type.
+    ASGD is often the preferable choice. */
+    enum SvmsgdType
+    {
+        ILLEGAL_VALUE,
+        SGD, //!Stochastic Gradient Descent
+        ASGD //!Average Stochastic Gradient Descent
+    };
+
+    /**
+     * @return the weights of the trained model (decision function f(x) = weights * x + shift).
+    */
+    CV_WRAP virtual Mat getWeights() = 0;
+
+    /**
+     * @return the shift of the trained model (decision function f(x) = weights * x + shift).
+    */
+    CV_WRAP virtual float getShift() = 0;
+
+
+    /** Creates empty model.
+        Use StatModel::train to train the model. Since %SVMSGD has several parameters, you may want to
+        find the best parameters for your problem or use setOptimalParameters() to set some default parameters.
+    */
+    CV_WRAP static Ptr<SVMSGD> create();
+
+    /** Function sets optimal parameters values for chosen SVM SGD model.
+     * If chosen type is ASGD, function sets the following values for parameters of model:
+     * lambda = 0.00001;
+     * gamma0 = 0.05;
+     * c = 0.75;
+     * termCrit.maxCount = 100000;
+     * termCrit.epsilon = 0.00001;
+     *
+     * If SGD:
+     * lambda = 0.0001;
+     * gamma0 = 0.05;
+     * c = 1;
+     * termCrit.maxCount = 100000;
+     * termCrit.epsilon = 0.00001;
+     * @param type is the type of SVMSGD classifier. Legal values are SvmsgdType::SGD and SvmsgdType::ASGD.
+     * Recommended value is SvmsgdType::ASGD (by default).
+    */
+    CV_WRAP virtual void setOptimalParameters(int type = ASGD) = 0;
+
+    /** %Algorithm type, one of SVMSGD::SvmsgdType. */
+    /** @see setAlgorithmType */
+    CV_WRAP virtual int getType() const = 0;
+    /** @copybrief getAlgorithmType @see getAlgorithmType */
+    CV_WRAP virtual void setType(int type) = 0;
+
+    /** Parameter _Lambda_ of a %SVMSGD optimization problem. Default value is 0. */
+    /** @see setLambda */
+    CV_WRAP virtual float getLambda() const = 0;
+    /** @copybrief getLambda @see getLambda */
+    CV_WRAP virtual void setLambda(float lambda) = 0;
+
+    /** Parameter _Gamma0_ of a %SVMSGD optimization problem. Default value is 0. */
+    /** @see setGamma0 */
+    CV_WRAP virtual float getGamma0() const = 0;
+    CV_WRAP virtual void setGamma0(float gamma0) = 0;
+
+    /** Parameter _C_ of a %SVMSGD optimization problem. Default value is 0. */
+    /** @see setC */
+    CV_WRAP virtual float getC() const = 0;
+    /** @copybrief getC @see getC */
+    CV_WRAP virtual void setC(float c) = 0;
+
+    /** @brief Termination criteria of the training algorithm.
+    You can specify the maximum number of iterations (maxCount) and/or how much the error could
+    change between the iterations to make the algorithm continue (epsilon).*/
+    /** @see setTermCriteria */
+    CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
+    /** @copybrief getTermCriteria @see getTermCriteria */
+    CV_WRAP virtual void setTermCriteria(const cv::TermCriteria &val) = 0;
+};
+
+
 /****************************************************************************************\
 *                           Auxilary functions declarations                              *
 \****************************************************************************************/
--- a/modules/ml/include/opencv2/ml/svmsgd.hpp
+++ b/modules/ml/include/opencv2/ml/svmsgd.hpp
@ -1,134 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Copyright (C) 2014, Itseez Inc, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_ML_SVMSGD_HPP__
-#define __OPENCV_ML_SVMSGD_HPP__
-
-#ifdef __cplusplus
-
-#include "opencv2/ml.hpp"
-
-namespace cv
-{
-namespace ml
-{
-
-
-/****************************************************************************************\
-*                        Stochastic Gradient Descent SVM Classifier                      *
-\****************************************************************************************/
-
-/*!
-@brief Stochastic Gradient Descent SVM classifier
-
-SVMSGD provides a fast and easy-to-use implementation of the SVM classifier using the Stochastic Gradient Descent approach, as presented in @cite bottou2010large.
-The gradient descent show amazing performance for large-scale problems, reducing the computing time. This allows a fast and reliable online update of the classifier for each new feature which
-is fundamental when dealing with variations of data over time (like weather and illumination changes in videosurveillance, for example).
-
-First, create the SVMSGD object. To enable the online update, a value for updateFrequency should be defined.
-
-Then the SVM model can be trained using the train features and the correspondent labels.
-
-After that, the label of a new feature vector can be predicted using the predict function. If the updateFrequency was defined in the constructor, the predict function will update the weights automatically.
-
-@code
-// Initialize object
-SVMSGD SvmSgd;
-
-// Train the Stochastic Gradient Descent SVM
-SvmSgd.train(trainFeatures, labels);
-
-// Predict label for the new feature vector (1xM)
-predictedLabel = SvmSgd.predict(newFeatureVector);
-@endcode
-
-*/
-
-class CV_EXPORTS_W SVMSGD : public cv::ml::StatModel
-{
-public:
-
-    enum SvmsgdType
-    {
-        ILLEGAL_VALUE,
-        SGD,                                     //Stochastic Gradient Descent
-        ASGD                                     //Average Stochastic Gradient Descent
-    };
-
-    /**
-     * @return the weights of the trained model.
-    */
-    CV_WRAP virtual Mat getWeights() = 0;
-
-    CV_WRAP virtual float getShift() = 0;
-
-    CV_WRAP static Ptr<SVMSGD> create();    
-
-    CV_WRAP virtual void setOptimalParameters(int type = ASGD) = 0;
-
-    CV_WRAP virtual int getType() const = 0;
-
-    CV_WRAP virtual void setType(int type) = 0;
-
-    CV_WRAP virtual float getLambda() const = 0;
-
-    CV_WRAP virtual void setLambda(float lambda) = 0;
-
-    CV_WRAP virtual float getGamma0() const = 0;
-
-    CV_WRAP virtual void setGamma0(float gamma0) = 0;
-
-    CV_WRAP virtual float getC() const = 0;
-
-    CV_WRAP virtual void setC(float c) = 0;
-
-    CV_WRAP virtual cv::TermCriteria getTermCriteria() const = 0;
-
-    CV_WRAP virtual void setTermCriteria(const cv::TermCriteria &val) = 0;
-};
-
-} //ml
-} //cv
-
-#endif  // __clpusplus
-#endif  // __OPENCV_ML_SVMSGD_HPP
--- a/modules/ml/src/precomp.hpp
+++ b/modules/ml/src/precomp.hpp
@ -45,7 +45,6 @@
 #include "opencv2/ml.hpp"
 #include "opencv2/core/core_c.h"
 #include "opencv2/core/utility.hpp"
-#include "opencv2/ml/svmsgd.hpp"
 #include "opencv2/core/private.hpp"

 #include <assert.h>
--- a/modules/ml/src/svmsgd.cpp
+++ b/modules/ml/src/svmsgd.cpp
@ -42,6 +42,12 @@

 #include "precomp.hpp"
 #include "limits"
+//#include "math.h"
+
+#include <iostream>
+
+using std::cout;
+using std::endl;

 /****************************************************************************************\
 *                        Stochastic Gradient Descent SVM Classifier                      *
@ -64,7 +70,7 @@ public:

    virtual float predict( InputArray samples, OutputArray results=noArray(), int flags = 0 ) const;

-    virtual bool isClassifier() const { return params.svmsgdType == SGD || params.svmsgdType == ASGD; }
+    virtual bool isClassifier() const;

    virtual bool isTrained() const;

@ -94,21 +100,28 @@ public:
    CV_IMPL_PROPERTY_S(cv::TermCriteria, TermCriteria, params.termCrit)

 private:
-        void updateWeights(InputArray sample, bool is_first_class, float gamma);
-    float calcShift(InputArray trainSamples, InputArray trainResponses) const;
+    void updateWeights(InputArray sample, bool isFirstClass, float gamma, Mat weights);
+
    std::pair<bool,bool> areClassesEmpty(Mat responses);
+
    void writeParams( FileStorage& fs ) const;
+
    void readParams( const FileNode& fn );
+
    static inline bool isFirstClass(float val) { return val > 0; }

+    static void normalizeSamples(Mat &matrix, Mat &multiplier, Mat &average);
+
+    float calcShift(InputArray _samples, InputArray _responses) const;
+
+    static void makeExtendedTrainSamples(const Mat trainSamples, Mat &extendedTrainSamples, Mat &multiplier);
+
+

    // Vector with SVM weights
    Mat weights_;
    float shift_;

-    // Random index generation
-    RNG rng_;
-
    // Parameters for learning
    struct SVMSGDParams
    {
@ -127,97 +140,88 @@ Ptr<SVMSGD> SVMSGD::create()
    return makePtr<SVMSGDImpl>();
 }

-
-bool SVMSGDImpl::train(const Ptr<TrainData>& data, int)
+std::pair<bool,bool> SVMSGDImpl::areClassesEmpty(Mat responses)
 {
-    clear();
-
-    Mat trainSamples = data->getTrainSamples();
+    CV_Assert(responses.cols == 1);
+    std::pair<bool,bool> emptyInClasses(true, true);
+    int limit_index = responses.rows;

-    // Initialize varCount
-    int trainSamplesCount_ = trainSamples.rows;
-    int varCount = trainSamples.cols;
+    for(int index = 0; index < limit_index; index++)
+    {
+        if (isFirstClass(responses.at<float>(index)))
+            emptyInClasses.first = false;
+        else
+            emptyInClasses.second = false;

-    // Initialize weights vector with zeros
-    weights_ = Mat::zeros(1, varCount, CV_32F);
+        if (!emptyInClasses.first && ! emptyInClasses.second)
+            break;
+    }

-    Mat trainResponses = data->getTrainResponses();        // (trainSamplesCount x 1) matrix
+    return emptyInClasses;
+}

-    std::pair<bool,bool> are_empty = areClassesEmpty(trainResponses);
+void SVMSGDImpl::normalizeSamples(Mat &samples, Mat &multiplier, Mat &average)
+{
+    int featuresCount = samples.cols;
+    int samplesCount = samples.rows;

-    if ( are_empty.first && are_empty.second )
+    average = Mat(1, featuresCount, samples.type());
+    for (int featureIndex = 0; featureIndex < featuresCount; featureIndex++)
    {
-        weights_.release();
-        return false;
+        average.at<float>(featureIndex) = mean(samples.col(featureIndex))[0];
    }
-    if ( are_empty.first || are_empty.second )
+
+    for (int sampleIndex = 0; sampleIndex < samplesCount; sampleIndex++)
    {
-        shift_ = are_empty.first ? -1 : 1;
-        return true;
+        samples.row(sampleIndex) -= average;
    }

+    Mat featureNorm(1, featuresCount, samples.type());
+    for (int featureIndex = 0; featureIndex < featuresCount; featureIndex++)
+    {
+        featureNorm.at<float>(featureIndex) = norm(samples.col(featureIndex));
+    }

-    Mat currentSample;
-    float gamma = 0;
-    Mat lastWeights = Mat::zeros(1, varCount, CV_32F);     //weights vector for calculating terminal criterion
-    Mat averageWeights;                                    //average weights vector for ASGD model
-    double err = DBL_MAX;
-    if (params.svmsgdType == ASGD)
+    multiplier =  sqrt(samplesCount) / featureNorm;
+    for (int sampleIndex = 0; sampleIndex < samplesCount; sampleIndex++)
    {
-        averageWeights = Mat::zeros(1, varCount, CV_32F);
+        samples.row(sampleIndex) = samples.row(sampleIndex).mul(multiplier);
+    }
 }

-    // Stochastic gradient descent SVM
-    for (int iter = 0; (iter < params.termCrit.maxCount)&&(err > params.termCrit.epsilon); iter++)
+void SVMSGDImpl::makeExtendedTrainSamples(const Mat trainSamples, Mat &extendedTrainSamples, Mat &multiplier)
 {
-        //generate sample number
-        int randomNumber = rng_.uniform(0, trainSamplesCount_);
+    Mat normalisedTrainSamples = trainSamples.clone();
+    int samplesCount = normalisedTrainSamples.rows;

-        currentSample = trainSamples.row(randomNumber);
+    Mat average;

-        //update gamma
-        gamma = params.gamma0 * std::pow((1 + params.lambda * params.gamma0 * (float)iter), (-params.c));
+    normalizeSamples(normalisedTrainSamples, multiplier, average);

-        bool is_first_class = isFirstClass(trainResponses.at<float>(randomNumber));
-        updateWeights( currentSample, is_first_class, gamma );
+    Mat onesCol = Mat::ones(samplesCount, 1, CV_32F);
+    cv::hconcat(normalisedTrainSamples, onesCol, extendedTrainSamples);

-        //average weights (only for ASGD model)
-        if (params.svmsgdType == ASGD)
-        {
-            averageWeights = ((float)iter/ (1 + (float)iter)) * averageWeights  + weights_ / (1 + (float) iter);
+    //cout << "SVMSGDImpl::makeExtendedTrainSamples average: \n" << average << endl;
+    //cout << "SVMSGDImpl::makeExtendedTrainSamples multiplier: \n" << multiplier << endl;
 }

-        err = norm(weights_ - lastWeights);
-        weights_.copyTo(lastWeights);
-    }

-    if (params.svmsgdType == ASGD)
+void SVMSGDImpl::updateWeights(InputArray _sample, bool firstClass, float gamma, Mat weights)
 {
-        weights_ = averageWeights;
-    }
-
-    shift_ = calcShift(trainSamples, trainResponses);
+    Mat sample = _sample.getMat();

-    return true;
-}
+    int response = firstClass ? 1 : -1; // ensure that trainResponses are -1 or 1

-std::pair<bool,bool> SVMSGDImpl::areClassesEmpty(Mat responses)
+    if ( sample.dot(weights) * response > 1)
    {
-    std::pair<bool,bool> are_classes_empty(true, true);
-    int limit_index = responses.rows;
-
-    for(int index = 0; index < limit_index; index++)
-    {
-        if (isFirstClass(responses.at<float>(index,0)))
-            are_classes_empty.first = false;
+        // Not a support vector, only apply weight decay
+        weights *= (1.f - gamma * params.lambda);
+    }
    else
-            are_classes_empty.second = false;
-
-        if (!are_classes_empty.first && ! are_classes_empty.second)
-            break;
+    {
+        // It's a support vector, add it to the weights
+        weights -= (gamma * params.lambda) * weights - (gamma * response) * sample;
    }
-
-    return are_classes_empty;
 }

 float SVMSGDImpl::calcShift(InputArray _samples, InputArray _responses) const
@ -232,12 +236,12 @@ float SVMSGDImpl::calcShift(InputArray _samples, InputArray _responses) const
    for (int samplesIndex = 0; samplesIndex < trainSamplesCount; samplesIndex++)
    {
        Mat currentSample = trainSamples.row(samplesIndex);
-        float scalar_product = currentSample.dot(weights_);
+        float dotProduct = currentSample.dot(weights_);

-        bool is_first_class = isFirstClass(trainResponses.at<float>(samplesIndex));
-        int index = is_first_class ? 0:1;
-        float sign_to_mul = is_first_class ? 1 : -1;
-        float cur_distance = scalar_product * sign_to_mul ;
+        bool firstClass = isFirstClass(trainResponses.at<float>(samplesIndex));
+        int index = firstClass ? 0:1;
+        float signToMul = firstClass ? 1 : -1;
+        float cur_distance = dotProduct * signToMul;

        if (cur_distance < distance_to_classes[index])
        {
@ -245,10 +249,109 @@ float SVMSGDImpl::calcShift(InputArray _samples, InputArray _responses) const
        }
    }

-    //todo: areClassesEmpty(); make const;
    return -(distance_to_classes[0] - distance_to_classes[1]) / 2.f;
 }

+bool SVMSGDImpl::train(const Ptr<TrainData>& data, int)
+{
+    //cout << "SVMSGDImpl::train begin" << endl;
+    clear();
+    CV_Assert( isClassifier() );   //toDo: consider
+
+    Mat trainSamples = data->getTrainSamples();
+
+    //cout << "SVMSGDImpl::train trainSamples: \n" << trainSamples << endl;
+
+    int featureCount = trainSamples.cols;
+    Mat trainResponses = data->getTrainResponses();        // (trainSamplesCount x 1) matrix
+
+    //cout << "SVMSGDImpl::train trainresponses: \n" << trainResponses << endl;
+
+    std::pair<bool,bool> areEmpty = areClassesEmpty(trainResponses);
+
+    //cout << "SVMSGDImpl::train areEmpty" << areEmpty.first << "," << areEmpty.second << endl;
+
+    if ( areEmpty.first && areEmpty.second )
+    {
+        return false;
+    }
+    if ( areEmpty.first || areEmpty.second )
+    {
+        weights_ = Mat::zeros(1, featureCount, CV_32F);
+        shift_ = areEmpty.first ? -1 : 1;
+        return true;
+    }    
+
+    Mat extendedTrainSamples;
+    Mat multiplier;
+    makeExtendedTrainSamples(trainSamples, extendedTrainSamples, multiplier);
+
+    //cout << "SVMSGDImpl::train extendedTrainSamples: \n" << extendedTrainSamples << endl;
+
+    int extendedTrainSamplesCount = extendedTrainSamples.rows;
+    int extendedFeatureCount = extendedTrainSamples.cols;
+
+    Mat extendedWeights = Mat::zeros(1, extendedFeatureCount, CV_32F);         // Initialize extendedWeights vector with zeros
+    Mat previousWeights = Mat::zeros(1, extendedFeatureCount, CV_32F);     //extendedWeights vector for calculating terminal criterion
+    Mat averageExtendedWeights;                                        //average extendedWeights vector for ASGD model
+    if (params.svmsgdType == ASGD)
+    {
+        averageExtendedWeights = Mat::zeros(1, extendedFeatureCount, CV_32F);
+    }
+
+    RNG rng(0);
+
+    int maxCount = (params.termCrit.type & TermCriteria::COUNT) ? params.termCrit.maxCount : INT_MAX;
+    double epsilon = (params.termCrit.type & TermCriteria::EPS) ? params.termCrit.epsilon : 0;
+
+    double err = DBL_MAX;
+    // Stochastic gradient descent SVM
+    for (int iter = 0; (iter < maxCount) && (err > epsilon); iter++)
+    {
+        int randomNumber = rng.uniform(0, extendedTrainSamplesCount);             //generate sample number
+
+        Mat currentSample = extendedTrainSamples.row(randomNumber);
+        bool firstClass = isFirstClass(trainResponses.at<float>(randomNumber));
+
+        float gamma = params.gamma0 * std::pow((1 + params.lambda * params.gamma0 * (float)iter), (-params.c));    //update gamma
+
+        updateWeights( currentSample, firstClass, gamma, extendedWeights );
+
+        //average weights (only for ASGD model)
+        if (params.svmsgdType == ASGD)
+        {
+            averageExtendedWeights = ((float)iter/ (1 + (float)iter)) * averageExtendedWeights  + extendedWeights / (1 + (float) iter);
+            err = norm(averageExtendedWeights - previousWeights);
+            averageExtendedWeights.copyTo(previousWeights);
+        }
+        else
+        {
+             err = norm(extendedWeights - previousWeights);
+             extendedWeights.copyTo(previousWeights);
+        }
+    }
+
+    if (params.svmsgdType == ASGD)
+    {
+        extendedWeights = averageExtendedWeights;
+    }
+
+    //cout << "SVMSGDImpl::train extendedWeights: \n" << extendedWeights << endl;
+
+    Rect roi(0, 0, featureCount, 1);
+    weights_ = extendedWeights(roi);
+    weights_ = weights_.mul(1/multiplier);
+
+    //cout << "SVMSGDImpl::train weights: \n" << weights_ << endl;
+
+    shift_ = calcShift(trainSamples, trainResponses);
+
+    //cout << "SVMSGDImpl::train shift = " << shift_ << endl;
+
+    return true;
+}
+
+
 float SVMSGDImpl::predict( InputArray _samples, OutputArray _results, int ) const
 {
    float result = 0;
@ -269,37 +372,21 @@ float SVMSGDImpl::predict( InputArray _samples, OutputArray _results, int ) cons
        results = Mat(1, 1, CV_32F, &result);
    }

-    Mat currentSample;
-    float criterion = 0;
-
    for (int sampleIndex = 0; sampleIndex < nSamples; sampleIndex++)
    {
-        currentSample = samples.row(sampleIndex);
-        criterion = currentSample.dot(weights_) + shift_;
+        Mat currentSample = samples.row(sampleIndex);
+        float criterion = currentSample.dot(weights_) + shift_;
        results.at<float>(sampleIndex) = (criterion >= 0) ? 1 : -1;
    }

    return result;
 }

-void SVMSGDImpl::updateWeights(InputArray _sample, bool is_first_class, float gamma)
-{
-    Mat sample = _sample.getMat();
-
-    int responce = is_first_class ? 1 : -1; // ensure that trainResponses are -1 or 1
-
-    if ( sample.dot(weights_) * responce > 1)
-    {
-        // Not a support vector, only apply weight decay
-        weights_ *= (1.f - gamma * params.lambda);
-    }
-    else
+bool SVMSGDImpl::isClassifier() const
 {
-        // It's a support vector, add it to the weights
-        weights_ -= (gamma * params.lambda) * weights_ - gamma * responce * sample;
-        //std::cout << "sample " << sample << std::endl;
-        //std::cout << "weights_ " << weights_ << std::endl;
-    }
+    return (params.svmsgdType == SGD || params.svmsgdType == ASGD)
+            &&
+            (params.lambda > 0) && (params.gamma0 > 0) && (params.c >= 0);
 }

 bool SVMSGDImpl::isTrained() const
@ -314,8 +401,8 @@ void SVMSGDImpl::write(FileStorage& fs) const

    writeParams( fs );

-    fs << "shift" << shift_;
    fs << "weights" << weights_;
+    fs << "shift" << shift_;
 }

 void SVMSGDImpl::writeParams( FileStorage& fs ) const
@ -359,8 +446,8 @@ void SVMSGDImpl::read(const FileNode& fn)

    readParams(fn);

-    shift_ = (float) fn["shift"];
    fn["weights"] >> weights_;
+    fn["shift"] >> shift_;
 }

 void SVMSGDImpl::readParams( const FileNode& fn )
@ -393,21 +480,19 @@ void SVMSGDImpl::readParams( const FileNode& fn )
                (params.termCrit.maxCount > 0 ? TermCriteria::COUNT : 0);
    }
    else
-        params.termCrit = TermCriteria( TermCriteria::EPS + TermCriteria::COUNT, 1000, FLT_EPSILON );
+        params.termCrit = TermCriteria( TermCriteria::EPS + TermCriteria::COUNT, 100000, FLT_EPSILON );

 }

 void SVMSGDImpl::clear()
 {
    weights_.release();
-    shift_ = 0;
 }


 SVMSGDImpl::SVMSGDImpl()
 {
    clear();
-    rng_(0);

    params.svmsgdType = ILLEGAL_VALUE;

@ -426,20 +511,20 @@ void SVMSGDImpl::setOptimalParameters(int type)
    {
    case SGD:
        params.svmsgdType = SGD;
-        params.lambda = 0.00001;
+        params.lambda = 0.0001;
        params.gamma0 = 0.05;
        params.c = 1;
-        params.termCrit.maxCount = 50000;
-        params.termCrit.epsilon = 0.00000001;
+        params.termCrit.maxCount = 100000;
+        params.termCrit.epsilon = 0.00001;
        break;

    case ASGD:
        params.svmsgdType = ASGD;
        params.lambda = 0.00001;
-        params.gamma0 = 0.5;
+        params.gamma0 = 0.05;
        params.c = 0.75;
        params.termCrit.maxCount = 100000;
-        params.termCrit.epsilon = 0.000001;
+        params.termCrit.epsilon = 0.00001;
        break;

    default:
--- a/modules/ml/test/test_precomp.hpp
+++ b/modules/ml/test/test_precomp.hpp
@ -13,7 +13,6 @@
 #include <map>
 #include "opencv2/ts.hpp"
 #include "opencv2/ml.hpp"
-#include "opencv2/ml/svmsgd.hpp"
 #include "opencv2/core/core_c.h"

 #define CV_NBAYES   "nbayes"
--- a/modules/ml/test/test_svmsgd.cpp
+++ b/modules/ml/test/test_svmsgd.cpp
@ -52,7 +52,7 @@ using cv::ml::TrainData;
 class CV_SVMSGDTrainTest : public cvtest::BaseTest
 {
 public:
-    CV_SVMSGDTrainTest(Mat _weights, float _shift);
+    CV_SVMSGDTrainTest(Mat _weights, float shift);
 private:
    virtual void run( int start_from );
    float decisionFunction(Mat sample, Mat weights, float shift);
@ -60,7 +60,7 @@ private:
    cv::Ptr<TrainData> data;
    cv::Mat testSamples;
    cv::Mat testResponses;
-    static const int TEST_VALUE_LIMIT = 50;
+    static const int TEST_VALUE_LIMIT = 500;
 };

 CV_SVMSGDTrainTest::CV_SVMSGDTrainTest(Mat weights, float shift)
@ -81,6 +81,11 @@ CV_SVMSGDTrainTest::CV_SVMSGDTrainTest(Mat weights, float shift)
        responses.at<float>( sampleIndex ) = decisionFunction(samples.row(sampleIndex), weights, shift) > 0 ? 1 : -1;
    }

+
+
+    std::cout << "real weights\n" << weights/norm(weights) << "\n" << std::endl;
+    std::cout << "real shift \n" << shift/norm(weights) << "\n" << std::endl;
+
    data = TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );

    int testSamplesCount = 100000;
@ -100,6 +105,7 @@ void CV_SVMSGDTrainTest::run( int /*start_from*/ )
    cv::Ptr<SVMSGD> svmsgd = SVMSGD::create();

    svmsgd->setOptimalParameters(SVMSGD::ASGD);
+    svmsgd->setTermCriteria(TermCriteria(TermCriteria::EPS, 0, 0.00005));

    svmsgd->train(data);

@ -116,6 +122,12 @@ void CV_SVMSGDTrainTest::run( int /*start_from*/ )
            errCount++;
    }

+
+    float normW = norm(svmsgd->getWeights());
+
+    std::cout << "found weights\n" << svmsgd->getWeights()/normW << "\n" << std::endl;
+    std::cout << "found shift \n" << svmsgd->getShift()/normW << "\n" << std::endl;
+
    float err = (float)errCount / testSamplesCount;
    std::cout << "err " << err << std::endl;

@ -138,8 +150,8 @@ TEST(ML_SVMSGD, train0)
    weights.create(1, varCount, CV_32FC1);
    weights.at<float>(0) = 1;
    weights.at<float>(1) = 0;
-
-    float shift = 5;
+    cv::RNG rng(1);
+    float shift = rng.uniform(-varCount, varCount);

    CV_SVMSGDTrainTest test(weights, shift);
    test.safe_run();
@ -157,7 +169,7 @@ TEST(ML_SVMSGD, train1)
    cv::RNG rng(0);
    rng.fill(weights, RNG::UNIFORM, lowerLimit, upperLimit);

-    float shift = rng.uniform(-5.f, 5.f);
+    float shift = rng.uniform(-varCount, varCount);

    CV_SVMSGDTrainTest test(weights, shift);
    test.safe_run();
@ -175,7 +187,7 @@ TEST(ML_SVMSGD, train2)
    cv::RNG rng(0);
    rng.fill(weights, RNG::UNIFORM, lowerLimit, upperLimit);

-    float shift = rng.uniform(-1000.f, 1000.f);
+    float shift = rng.uniform(-varCount, varCount);

    CV_SVMSGDTrainTest test(weights,shift);
    test.safe_run();
--- a/samples/cpp/train_svmsgd.cpp
+++ b/samples/cpp/train_svmsgd.cpp
@ -12,10 +12,8 @@ using namespace cv::ml;
 struct Data
 {
    Mat img;
-    Mat samples;
-    Mat responses;
-    RNG rng;
-    //Point points[2];
+    Mat samples;          //Set of train samples. Contains points on image
+    Mat responses;        //Set of responses for train samples

    Data()
    {
@ -24,22 +22,34 @@ struct Data
    }
 };

+//Train with SVMSGD algorithm
+//(samples, responses) is a train set
+//weights is a required vector for decision function of SVMSGD algorithm
 bool doTrain(const Mat samples, const Mat responses, Mat &weights, float &shift);
-bool findPointsForLine(const Mat &weights, float shift, Point (&points)[2]);
-bool findCrossPoint(const Mat &weights, float shift, const std::pair<Point,Point> &segment, Point &crossPoint);
-void fillSegments(std::vector<std::pair<Point,Point> > &segments);
+
+//function finds two points for drawing line (wx = 0)
+bool findPointsForLine(const Mat &weights, float shift, Point (&points)[2], int width, int height);
+
+// function finds cross point of line (wx = 0) and segment ( (y = HEIGHT, 0 <= x <= WIDTH) or (x = WIDTH, 0 <= y <= HEIGHT) )
+bool findCrossPointWithBorders(const Mat &weights, float shift, const std::pair<Point,Point> &segment, Point &crossPoint);
+
+//segments' initialization ( (y = HEIGHT, 0 <= x <= WIDTH) and (x = WIDTH, 0 <= y <= HEIGHT) )
+void fillSegments(std::vector<std::pair<Point,Point> > &segments, int width, int height);
+
+//redraw points' set and line (wx = 0)
 void redraw(Data data, const Point points[2]);
-void addPointsRetrainAndRedraw(Data &data, int x, int y);
+
+//add point in train set, train SVMSGD algorithm and draw results on image
+void addPointRetrainAndRedraw(Data &data, int x, int y);


 bool doTrain( const Mat samples, const Mat responses, Mat &weights, float &shift)
 {
    cv::Ptr<SVMSGD> svmsgd = SVMSGD::create();
    svmsgd->setOptimalParameters(SVMSGD::ASGD);
-    svmsgd->setTermCriteria(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 50000, 0.0000001));
-    svmsgd->setLambda(0.01);
-    svmsgd->setGamma0(1);
-   // svmsgd->setC(5);
+    svmsgd->setTermCriteria(TermCriteria(TermCriteria::EPS, 0, 0.00000001));
+    svmsgd->setLambda(0.00000001);
+

    cv::Ptr<TrainData> train_data = TrainData::create(samples, cv::ml::ROW_SAMPLE, responses);
    svmsgd->train( train_data );
@ -49,36 +59,39 @@ bool doTrain( const Mat samples, const Mat responses, Mat &weights, float &shift
        weights = svmsgd->getWeights();
        shift = svmsgd->getShift();

-        std::cout << weights << std::endl;
-        std::cout << shift << std::endl;
-
        return true;
    }
    return false;
 }


-bool findCrossPoint(const Mat &weights, float shift, const std::pair<Point,Point> &segment, Point &crossPoint)
+bool findCrossPointWithBorders(const Mat &weights, float shift, const std::pair<Point,Point> &segment, Point &crossPoint)
 {
    int x = 0;
    int y = 0;
-    //с (0,0) всё плохо
-    if (segment.first.x == segment.second.x && weights.at<float>(1) != 0)
+    int xMin = std::min(segment.first.x, segment.second.x);
+    int xMax = std::max(segment.first.x, segment.second.x);
+    int yMin = std::min(segment.first.y, segment.second.y);
+    int yMax = std::max(segment.first.y, segment.second.y);
+
+    CV_Assert(xMin == xMax || yMin == yMax);
+
+    if (xMin == xMax && weights.at<float>(1) != 0)
    {
-        x = segment.first.x;
-        y = -(weights.at<float>(0) * x + shift) / weights.at<float>(1);
-        if (y >= 0 && y <= HEIGHT)
+        x = xMin;
+        y = std::floor( - (weights.at<float>(0) * x + shift) / weights.at<float>(1));
+        if (y >= yMin && y <= yMax)
        {
            crossPoint.x = x;
            crossPoint.y = y;
            return true;
        }
    }
-    else if (segment.first.y == segment.second.y && weights.at<float>(0) != 0)
+    else if (yMin == yMax && weights.at<float>(0) != 0)
    {
-        y = segment.first.y;
-        x = - (weights.at<float>(1) * y + shift) / weights.at<float>(0);
-        if (x >= 0 && x <= WIDTH)
+        y = yMin;
+        x = std::floor( - (weights.at<float>(1) * y + shift) / weights.at<float>(0));
+        if (x >= xMin && x <= xMax)
        {
            crossPoint.x = x;
            crossPoint.y = y;
@ -88,7 +101,7 @@ bool findCrossPoint(const Mat &weights, float shift, const std::pair<Point,Point
    return false;
 }

-bool findPointsForLine(const Mat &weights, float shift, Point (&points)[2])
+bool findPointsForLine(const Mat &weights, float shift, Point (&points)[2], int width, int height)
 {
    if (weights.empty())
    {
@ -97,42 +110,43 @@ bool findPointsForLine(const Mat &weights, float shift, Point (&points)[2])

    int foundPointsCount = 0;
    std::vector<std::pair<Point,Point> > segments;
-    fillSegments(segments);
+    fillSegments(segments, width, height);

-    for (int i = 0; i < 4; i++)
+    for (uint i = 0; i < segments.size(); i++)
    {
-        if (findCrossPoint(weights, shift, segments[i], points[foundPointsCount]))
+        if (findCrossPointWithBorders(weights, shift, segments[i], points[foundPointsCount]))
            foundPointsCount++;
-        if (foundPointsCount > 2)
+        if (foundPointsCount >= 2)
            break;
    }
+
    return true;
 }

-void fillSegments(std::vector<std::pair<Point,Point> > &segments)
+void fillSegments(std::vector<std::pair<Point,Point> > &segments, int width, int height)
 {
-    std::pair<Point,Point> curSegment;
+    std::pair<Point,Point> currentSegment;

-    curSegment.first = Point(0,0);
-    curSegment.second = Point(0,HEIGHT);
-    segments.push_back(curSegment);
+    currentSegment.first = Point(width, 0);
+    currentSegment.second = Point(width, height);
+    segments.push_back(currentSegment);

-    curSegment.first = Point(0,0);
-    curSegment.second = Point(WIDTH,0);
-    segments.push_back(curSegment);
+    currentSegment.first = Point(0, height);
+    currentSegment.second = Point(width, height);
+    segments.push_back(currentSegment);

-    curSegment.first = Point(WIDTH,0);
-    curSegment.second = Point(WIDTH,HEIGHT);
-    segments.push_back(curSegment);
+    currentSegment.first = Point(0, 0);
+    currentSegment.second = Point(width, 0);
+    segments.push_back(currentSegment);

-    curSegment.first = Point(0,HEIGHT);
-    curSegment.second = Point(WIDTH,HEIGHT);
-    segments.push_back(curSegment);
+    currentSegment.first = Point(0, 0);
+    currentSegment.second = Point(0, height);
+    segments.push_back(currentSegment);
 }

 void redraw(Data data, const Point points[2])
 {
-    data.img = Mat::zeros(HEIGHT, WIDTH, CV_8UC3);
+    data.img.setTo(0);
    Point center;
    int radius = 3;
    Scalar color;
@ -148,43 +162,21 @@ void redraw(Data data, const Point points[2])
    imshow("Train svmsgd", data.img);
 }

-void addPointsRetrainAndRedraw(Data &data, int x, int y)
+void addPointRetrainAndRedraw(Data &data, int x, int y)
 {
-
    Mat currentSample(1, 2, CV_32F);
-    //start
-/*
-    Mat _weights;
-    _weights.create(1, 2, CV_32FC1);
-    _weights.at<float>(0) = 1;
-    _weights.at<float>(1) = -1;

-    int _x, _y;
-
-    for (int i=0;i<199;i++)
-    {
-    _x = data.rng.uniform(0,800);
-    _y = data.rng.uniform(0,500);*/
    currentSample.at<float>(0,0) = x;
    currentSample.at<float>(0,1) = y;
-    //if (currentSample.dot(_weights) > 0)
-        //data.responses.push_back(1);
-   // else data.responses.push_back(-1);
-
-    //finish
    data.samples.push_back(currentSample);

-
-
    Mat weights(1, 2, CV_32F);
    float shift = 0;

    if (doTrain(data.samples, data.responses, weights, shift))
   {
        Point points[2];
-        shift = 0;
-
-        findPointsForLine(weights, shift, points);
+        findPointsForLine(weights, shift, points, data.img.cols, data.img.rows);

        redraw(data, points);
    }
@ -199,13 +191,13 @@ static void onMouse( int event, int x, int y, int, void* pData)
    {
    case CV_EVENT_LBUTTONUP:
        data.responses.push_back(1);
-        addPointsRetrainAndRedraw(data, x, y);
+        addPointRetrainAndRedraw(data, x, y);

        break;

    case CV_EVENT_RBUTTONDOWN:
        data.responses.push_back(-1);
-        addPointsRetrainAndRedraw(data, x, y);
+        addPointRetrainAndRedraw(data, x, y);
        break;
    }

@ -213,14 +205,10 @@ static void onMouse( int event, int x, int y, int, void* pData)

 int main()
 {
-
    Data data;

    setMouseCallback( "Train svmsgd", onMouse, &data );
    waitKey();

-
-
-
    return 0;
 }