From 891bcd8491ba3f35c2a64459cf73272826a04255 Mon Sep 17 00:00:00 2001
From: Alex Leontiev <alozz1991@gmail.com>
Date: Tue, 24 Sep 2013 07:51:21 +0800
Subject: [PATCH] Finish implementing the Nonlinear Conjugate Gradient

Now everything is prepared for the pull request.
---
 modules/optim/doc/conjugate_gradient.rst      | 11 ---
 modules/optim/doc/downhill_simplex_method.rst | 13 ++--
 modules/optim/doc/optim.rst                   |  2 +-
 modules/optim/src/conjugate_gradient.cpp      | 78 +++++++++++++++++--
 .../optim/test/test_conjugate_gradient.cpp    | 23 +++---
 5 files changed, 93 insertions(+), 34 deletions(-)
 delete mode 100644 modules/optim/doc/conjugate_gradient.rst
diff --git a/modules/optim/doc/conjugate_gradient.rst b/modules/optim/doc/conjugate_gradient.rst
deleted file mode 100644
index cd96974652..0000000000
--- a/modules/optim/doc/conjugate_gradient.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-Conjugate Gradient
-=======================
-
-.. highlight:: cpp
-
-optim::ConjGradSolver
----------------------------------
-
-.. ocv:class:: optim::ConjGradSolver
-
-This class is used
diff --git a/modules/optim/doc/downhill_simplex_method.rst b/modules/optim/doc/downhill_simplex_method.rst
index 94d084c23f..bd0e194393 100644
--- a/modules/optim/doc/downhill_simplex_method.rst
+++ b/modules/optim/doc/downhill_simplex_method.rst
@@ -8,14 +8,15 @@ optim::DownhillSolver
 
 .. ocv:class:: optim::DownhillSolver
 
-This class is used to perform the non-linear non-constrained *minimization* of a function, given on an *n*-dimensional Euclidean space,
+This class is used to perform the non-linear non-constrained *minimization* of a function, defined on an *n*-dimensional Euclidean space,
 using the **Nelder-Mead method**, also known as **downhill simplex method**. The basic idea about the method can be obtained from
 (`http://en.wikipedia.org/wiki/Nelder-Mead\_method <http://en.wikipedia.org/wiki/Nelder-Mead_method>`_). It should be noted, that
 this method, although deterministic, is rather a heuristic and therefore may converge to a local minima, not necessary a global one.
 It is iterative optimization technique, which at each step uses an information about the values of a function evaluated only at
 *n+1* points, arranged as a *simplex* in *n*-dimensional space (hence the second name of the method). At each step new point is
 chosen to evaluate function at, obtained value is compared with previous ones and based on this information simplex changes it's shape
-, slowly moving to the local minimum.
+, slowly moving to the local minimum. Thus this method is using *only* function values to make decision, on contrary to, say, Nonlinear
+Conjugate Gradient method (which is also implemented in ``optim``).
 
 Algorithm stops when the number of function evaluations done exceeds ``termcrit.maxCount``, when the function values at the
 vertices of simplex are within ``termcrit.epsilon`` range or simplex becomes so small that it
@@ -30,9 +31,9 @@ positive integer ``termcrit.maxCount`` and positive non-integer ``termcrit.epsil
         class CV_EXPORTS Function
         {
         public:
-           virtual ~Function() {}
-           //! ndim - dimensionality
-           virtual double calc(const double* x) const = 0;
+            virtual ~Function() {}
+            virtual double calc(const double* x) const = 0;
+            virtual void getGradient(const double* /*x*/,double* /*grad*/) {}
         };
 
         virtual Ptr<Function> getFunction() const = 0;
@@ -150,7 +151,7 @@ optim::createDownhillSolver
 This function returns the reference to the ready-to-use ``DownhillSolver`` object. All the parameters are optional, so this procedure can be called
 even without parameters at all. In this case, the default values will be used. As default value for terminal criteria are the only sensible ones,
 ``DownhillSolver::setFunction()`` and ``DownhillSolver::setInitStep()`` should be called upon the obtained object, if the respective parameters
-were not given to ``createDownhillSolver()``. Otherwise, the two ways (give parameters to ``createDownhillSolver()`` or miss the out and call the
+were not given to ``createDownhillSolver()``. Otherwise, the two ways (give parameters to ``createDownhillSolver()`` or miss them out and call the
 ``DownhillSolver::setFunction()`` and ``DownhillSolver::setInitStep()``) are absolutely equivalent (and will drop the same errors in the same way,
 should invalid input be detected).
 
diff --git a/modules/optim/doc/optim.rst b/modules/optim/doc/optim.rst
index dfa4b65df9..113882eee0 100644
--- a/modules/optim/doc/optim.rst
+++ b/modules/optim/doc/optim.rst
@@ -10,4 +10,4 @@ optim. Generic numerical optimization
     linear_programming
     downhill_simplex_method
     primal_dual_algorithm
-    conjugate_gradient
+    nonlinear_conjugate_gradient
diff --git a/modules/optim/src/conjugate_gradient.cpp b/modules/optim/src/conjugate_gradient.cpp
index 7e555c6cc5..186d35a1f5 100644
--- a/modules/optim/src/conjugate_gradient.cpp
+++ b/modules/optim/src/conjugate_gradient.cpp
@@ -1,8 +1,11 @@
 #include "precomp.hpp"
+#undef ALEX_DEBUG
 #include "debug.hpp"
 
 namespace cv{namespace optim{
 
+#define SEC_METHOD_ITERATIONS 4
+#define INITIAL_SEC_METHOD_SIGMA 0.1
     class ConjGradSolverImpl : public ConjGradSolver
     {
     public:
@@ -16,9 +19,45 @@ namespace cv{namespace optim{
         Ptr<Solver::Function> _Function;
         TermCriteria _termcrit;
         Mat_<double> d,r,buf_x,r_old;
+        Mat_<double> minimizeOnTheLine_buf1,minimizeOnTheLine_buf2;
     private:
+        static void minimizeOnTheLine(Ptr<Solver::Function> _f,Mat_<double>& x,const Mat_<double>& d,Mat_<double>& buf1,Mat_<double>& buf2);
     };
 
+    void ConjGradSolverImpl::minimizeOnTheLine(Ptr<Solver::Function> _f,Mat_<double>& x,const Mat_<double>& d,Mat_<double>& buf1,
+            Mat_<double>& buf2){
+        double sigma=INITIAL_SEC_METHOD_SIGMA;
+        buf1=0.0;
+        buf2=0.0;
+
+        dprintf(("before minimizeOnTheLine\n"));
+        dprintf(("x:\n"));
+        print_matrix(x);
+        dprintf(("d:\n"));
+        print_matrix(d);
+
+        for(int i=0;i<SEC_METHOD_ITERATIONS;i++){
+            _f->getGradient((double*)x.data,(double*)buf1.data); 
+            dprintf(("buf1:\n"));
+            print_matrix(buf1);
+            x=x+sigma*d;
+            _f->getGradient((double*)x.data,(double*)buf2.data); 
+            dprintf(("buf2:\n"));
+            print_matrix(buf2);
+            double d1=buf1.dot(d), d2=buf2.dot(d);
+            if((d1-d2)==0){
+                break;
+            }
+            double alpha=-sigma*d1/(d2-d1);
+            dprintf(("(buf2.dot(d)-buf1.dot(d))=%f\nalpha=%f\n",(buf2.dot(d)-buf1.dot(d)),alpha));
+            x=x+(alpha-sigma)*d;
+            sigma=-alpha;
+        }
+
+        dprintf(("after minimizeOnTheLine\n"));
+        print_matrix(x);
+    }
+
     double ConjGradSolverImpl::minimize(InputOutputArray x){
         CV_Assert(_Function.empty()==false);
         dprintf(("termcrit:\n\ttype: %d\n\tmaxCount: %d\n\tEPS: %g\n",_termcrit.type,_termcrit.maxCount,_termcrit.epsilon));
@@ -28,9 +67,13 @@ namespace cv{namespace optim{
         int ndim=MAX(x_mat.rows,x_mat.cols);
         CV_Assert(x_mat.type()==CV_64FC1);
 
-        d.create(1,ndim);
-        r.create(1,ndim);
-        r_old.create(1,ndim);
+        if(d.cols!=ndim){
+            d.create(1,ndim);
+            r.create(1,ndim);
+            r_old.create(1,ndim);
+            minimizeOnTheLine_buf1.create(1,ndim);
+            minimizeOnTheLine_buf2.create(1,ndim);
+        }
 
         Mat_<double> proxy_x;
         if(x_mat.rows>1){
@@ -41,14 +84,40 @@ namespace cv{namespace optim{
         }else{
             proxy_x=x_mat;
         }
+        _Function->getGradient((double*)proxy_x.data,(double*)d.data);
+        if(true){
+        d*=-1.0;
+        d.copyTo(r);
+        }else{((double*)d.data)[1]=42.0;}
 
         //here everything goes. check that everything is setted properly
+        dprintf(("proxy_x\n"));print_matrix(proxy_x);
+        dprintf(("d first time\n"));print_matrix(d);
+        dprintf(("r\n"));print_matrix(r);
+
+        double beta=0;
+        for(int count=0;count<_termcrit.maxCount;count++){
+            minimizeOnTheLine(_Function,proxy_x,d,minimizeOnTheLine_buf1,minimizeOnTheLine_buf2);
+            r.copyTo(r_old);
+            _Function->getGradient((double*)proxy_x.data,(double*)r.data);
+            r*=-1.0;
+            double r_norm_sq=norm(r);
+            if(_termcrit.type==(TermCriteria::MAX_ITER+TermCriteria::EPS) && r_norm_sq<_termcrit.epsilon){
+                break;
+            }
+            r_norm_sq=r_norm_sq*r_norm_sq;
+            beta=MAX(0.0,(r_norm_sq-r.dot(r_old))/r_norm_sq);
+            d=r+beta*d;
+        }
+
+
 
         if(x_mat.rows>1){
             Mat(ndim, 1, CV_64F, (double*)proxy_x.data).copyTo(x);
         }
-        return 0.0;
+        return _Function->calc((double*)proxy_x.data);
     }
+
     ConjGradSolverImpl::ConjGradSolverImpl(){
         _Function=Ptr<Function>();
     }
@@ -74,4 +143,3 @@ namespace cv{namespace optim{
         return Ptr<ConjGradSolver>(CG);
     }
 }}
-                               
diff --git a/modules/optim/test/test_conjugate_gradient.cpp b/modules/optim/test/test_conjugate_gradient.cpp
index 2456a5cab4..890d891df1 100644
--- a/modules/optim/test/test_conjugate_gradient.cpp
+++ b/modules/optim/test/test_conjugate_gradient.cpp
@@ -24,38 +24,39 @@ public:
         return x[0]*x[0]+x[1]*x[1]+x[2]*x[2]+x[3]*x[3];
     }
     void getGradient(const double* x,double* grad){
-        for(int i=0;i<4;i++,grad++,x++){
-            grad[0]=2*x[0];
+        for(int i=0;i<4;i++){
+            grad[i]=2*x[i];
         }
     }
 };
-//TODO: test transp/usual x
-/*class RosenbrockF:public cv::optim::Solver::Function{
+class RosenbrockF:public cv::optim::Solver::Function{
     double calc(const double* x)const{
         return 100*(x[1]-x[0]*x[0])*(x[1]-x[0]*x[0])+(1-x[0])*(1-x[0]);
     }
-};*/
+    void getGradient(const double* x,double* grad){
+            grad[0]=-2*(1-x[0])-400*(x[1]-x[0]*x[0])*x[0];
+            grad[1]=200*(x[1]-x[0]*x[0]);
+    }
+};
 
 TEST(Optim_ConjGrad, regression_basic){
     cv::Ptr<cv::optim::ConjGradSolver> solver=cv::optim::createConjGradSolver();
 #if 1
     {
         cv::Ptr<cv::optim::Solver::Function> ptr_F(new SphereF());
-        cv::Mat x=(cv::Mat_<double>(1,2)<<1.0,1.0),
-            etalon_x=(cv::Mat_<double>(1,2)<<0.0,0.0);
+        cv::Mat x=(cv::Mat_<double>(4,1)<<50.0,10.0,1.0,-10.0),
+            etalon_x=(cv::Mat_<double>(1,4)<<0.0,0.0,0.0,0.0);
         double etalon_res=0.0;
-        return;
         mytest(solver,ptr_F,x,etalon_x,etalon_res);
     }
 #endif
-#if 0
+#if 1
     {
         cv::Ptr<cv::optim::Solver::Function> ptr_F(new RosenbrockF());
         cv::Mat x=(cv::Mat_<double>(2,1)<<0.0,0.0),
-            step=(cv::Mat_<double>(2,1)<<0.5,+0.5),
             etalon_x=(cv::Mat_<double>(2,1)<<1.0,1.0);
         double etalon_res=0.0;
-        mytest(solver,ptr_F,x,step,etalon_x,etalon_res);
+        mytest(solver,ptr_F,x,etalon_x,etalon_res);
     }
 #endif
 }