From 891bcd8491ba3f35c2a64459cf73272826a04255 Mon Sep 17 00:00:00 2001 From: Alex Leontiev Date: Tue, 24 Sep 2013 07:51:21 +0800 Subject: [PATCH] Finish implementing the Nonlinear Conjugate Gradient Now everything is prepared for the pull request. --- modules/optim/doc/conjugate_gradient.rst | 11 --- modules/optim/doc/downhill_simplex_method.rst | 13 ++-- modules/optim/doc/optim.rst | 2 +- modules/optim/src/conjugate_gradient.cpp | 78 +++++++++++++++++-- .../optim/test/test_conjugate_gradient.cpp | 23 +++--- 5 files changed, 93 insertions(+), 34 deletions(-) delete mode 100644 modules/optim/doc/conjugate_gradient.rst diff --git a/modules/optim/doc/conjugate_gradient.rst b/modules/optim/doc/conjugate_gradient.rst deleted file mode 100644 index cd96974652..0000000000 --- a/modules/optim/doc/conjugate_gradient.rst +++ /dev/null @@ -1,11 +0,0 @@ -Conjugate Gradient -======================= - -.. highlight:: cpp - -optim::ConjGradSolver ---------------------------------- - -.. ocv:class:: optim::ConjGradSolver - -This class is used diff --git a/modules/optim/doc/downhill_simplex_method.rst b/modules/optim/doc/downhill_simplex_method.rst index 94d084c23f..bd0e194393 100644 --- a/modules/optim/doc/downhill_simplex_method.rst +++ b/modules/optim/doc/downhill_simplex_method.rst @@ -8,14 +8,15 @@ optim::DownhillSolver .. ocv:class:: optim::DownhillSolver -This class is used to perform the non-linear non-constrained *minimization* of a function, given on an *n*-dimensional Euclidean space, +This class is used to perform the non-linear non-constrained *minimization* of a function, defined on an *n*-dimensional Euclidean space, using the **Nelder-Mead method**, also known as **downhill simplex method**. The basic idea about the method can be obtained from (`http://en.wikipedia.org/wiki/Nelder-Mead\_method `_). It should be noted, that this method, although deterministic, is rather a heuristic and therefore may converge to a local minima, not necessary a global one. It is iterative optimization technique, which at each step uses an information about the values of a function evaluated only at *n+1* points, arranged as a *simplex* in *n*-dimensional space (hence the second name of the method). At each step new point is chosen to evaluate function at, obtained value is compared with previous ones and based on this information simplex changes it's shape -, slowly moving to the local minimum. +, slowly moving to the local minimum. Thus this method is using *only* function values to make decision, on contrary to, say, Nonlinear +Conjugate Gradient method (which is also implemented in ``optim``). Algorithm stops when the number of function evaluations done exceeds ``termcrit.maxCount``, when the function values at the vertices of simplex are within ``termcrit.epsilon`` range or simplex becomes so small that it @@ -30,9 +31,9 @@ positive integer ``termcrit.maxCount`` and positive non-integer ``termcrit.epsil class CV_EXPORTS Function { public: - virtual ~Function() {} - //! ndim - dimensionality - virtual double calc(const double* x) const = 0; + virtual ~Function() {} + virtual double calc(const double* x) const = 0; + virtual void getGradient(const double* /*x*/,double* /*grad*/) {} }; virtual Ptr getFunction() const = 0; @@ -150,7 +151,7 @@ optim::createDownhillSolver This function returns the reference to the ready-to-use ``DownhillSolver`` object. All the parameters are optional, so this procedure can be called even without parameters at all. In this case, the default values will be used. As default value for terminal criteria are the only sensible ones, ``DownhillSolver::setFunction()`` and ``DownhillSolver::setInitStep()`` should be called upon the obtained object, if the respective parameters -were not given to ``createDownhillSolver()``. Otherwise, the two ways (give parameters to ``createDownhillSolver()`` or miss the out and call the +were not given to ``createDownhillSolver()``. Otherwise, the two ways (give parameters to ``createDownhillSolver()`` or miss them out and call the ``DownhillSolver::setFunction()`` and ``DownhillSolver::setInitStep()``) are absolutely equivalent (and will drop the same errors in the same way, should invalid input be detected). diff --git a/modules/optim/doc/optim.rst b/modules/optim/doc/optim.rst index dfa4b65df9..113882eee0 100644 --- a/modules/optim/doc/optim.rst +++ b/modules/optim/doc/optim.rst @@ -10,4 +10,4 @@ optim. Generic numerical optimization linear_programming downhill_simplex_method primal_dual_algorithm - conjugate_gradient + nonlinear_conjugate_gradient diff --git a/modules/optim/src/conjugate_gradient.cpp b/modules/optim/src/conjugate_gradient.cpp index 7e555c6cc5..186d35a1f5 100644 --- a/modules/optim/src/conjugate_gradient.cpp +++ b/modules/optim/src/conjugate_gradient.cpp @@ -1,8 +1,11 @@ #include "precomp.hpp" +#undef ALEX_DEBUG #include "debug.hpp" namespace cv{namespace optim{ +#define SEC_METHOD_ITERATIONS 4 +#define INITIAL_SEC_METHOD_SIGMA 0.1 class ConjGradSolverImpl : public ConjGradSolver { public: @@ -16,9 +19,45 @@ namespace cv{namespace optim{ Ptr _Function; TermCriteria _termcrit; Mat_ d,r,buf_x,r_old; + Mat_ minimizeOnTheLine_buf1,minimizeOnTheLine_buf2; private: + static void minimizeOnTheLine(Ptr _f,Mat_& x,const Mat_& d,Mat_& buf1,Mat_& buf2); }; + void ConjGradSolverImpl::minimizeOnTheLine(Ptr _f,Mat_& x,const Mat_& d,Mat_& buf1, + Mat_& buf2){ + double sigma=INITIAL_SEC_METHOD_SIGMA; + buf1=0.0; + buf2=0.0; + + dprintf(("before minimizeOnTheLine\n")); + dprintf(("x:\n")); + print_matrix(x); + dprintf(("d:\n")); + print_matrix(d); + + for(int i=0;igetGradient((double*)x.data,(double*)buf1.data); + dprintf(("buf1:\n")); + print_matrix(buf1); + x=x+sigma*d; + _f->getGradient((double*)x.data,(double*)buf2.data); + dprintf(("buf2:\n")); + print_matrix(buf2); + double d1=buf1.dot(d), d2=buf2.dot(d); + if((d1-d2)==0){ + break; + } + double alpha=-sigma*d1/(d2-d1); + dprintf(("(buf2.dot(d)-buf1.dot(d))=%f\nalpha=%f\n",(buf2.dot(d)-buf1.dot(d)),alpha)); + x=x+(alpha-sigma)*d; + sigma=-alpha; + } + + dprintf(("after minimizeOnTheLine\n")); + print_matrix(x); + } + double ConjGradSolverImpl::minimize(InputOutputArray x){ CV_Assert(_Function.empty()==false); dprintf(("termcrit:\n\ttype: %d\n\tmaxCount: %d\n\tEPS: %g\n",_termcrit.type,_termcrit.maxCount,_termcrit.epsilon)); @@ -28,9 +67,13 @@ namespace cv{namespace optim{ int ndim=MAX(x_mat.rows,x_mat.cols); CV_Assert(x_mat.type()==CV_64FC1); - d.create(1,ndim); - r.create(1,ndim); - r_old.create(1,ndim); + if(d.cols!=ndim){ + d.create(1,ndim); + r.create(1,ndim); + r_old.create(1,ndim); + minimizeOnTheLine_buf1.create(1,ndim); + minimizeOnTheLine_buf2.create(1,ndim); + } Mat_ proxy_x; if(x_mat.rows>1){ @@ -41,14 +84,40 @@ namespace cv{namespace optim{ }else{ proxy_x=x_mat; } + _Function->getGradient((double*)proxy_x.data,(double*)d.data); + if(true){ + d*=-1.0; + d.copyTo(r); + }else{((double*)d.data)[1]=42.0;} //here everything goes. check that everything is setted properly + dprintf(("proxy_x\n"));print_matrix(proxy_x); + dprintf(("d first time\n"));print_matrix(d); + dprintf(("r\n"));print_matrix(r); + + double beta=0; + for(int count=0;count<_termcrit.maxCount;count++){ + minimizeOnTheLine(_Function,proxy_x,d,minimizeOnTheLine_buf1,minimizeOnTheLine_buf2); + r.copyTo(r_old); + _Function->getGradient((double*)proxy_x.data,(double*)r.data); + r*=-1.0; + double r_norm_sq=norm(r); + if(_termcrit.type==(TermCriteria::MAX_ITER+TermCriteria::EPS) && r_norm_sq<_termcrit.epsilon){ + break; + } + r_norm_sq=r_norm_sq*r_norm_sq; + beta=MAX(0.0,(r_norm_sq-r.dot(r_old))/r_norm_sq); + d=r+beta*d; + } + + if(x_mat.rows>1){ Mat(ndim, 1, CV_64F, (double*)proxy_x.data).copyTo(x); } - return 0.0; + return _Function->calc((double*)proxy_x.data); } + ConjGradSolverImpl::ConjGradSolverImpl(){ _Function=Ptr(); } @@ -74,4 +143,3 @@ namespace cv{namespace optim{ return Ptr(CG); } }} - diff --git a/modules/optim/test/test_conjugate_gradient.cpp b/modules/optim/test/test_conjugate_gradient.cpp index 2456a5cab4..890d891df1 100644 --- a/modules/optim/test/test_conjugate_gradient.cpp +++ b/modules/optim/test/test_conjugate_gradient.cpp @@ -24,38 +24,39 @@ public: return x[0]*x[0]+x[1]*x[1]+x[2]*x[2]+x[3]*x[3]; } void getGradient(const double* x,double* grad){ - for(int i=0;i<4;i++,grad++,x++){ - grad[0]=2*x[0]; + for(int i=0;i<4;i++){ + grad[i]=2*x[i]; } } }; -//TODO: test transp/usual x -/*class RosenbrockF:public cv::optim::Solver::Function{ +class RosenbrockF:public cv::optim::Solver::Function{ double calc(const double* x)const{ return 100*(x[1]-x[0]*x[0])*(x[1]-x[0]*x[0])+(1-x[0])*(1-x[0]); } -};*/ + void getGradient(const double* x,double* grad){ + grad[0]=-2*(1-x[0])-400*(x[1]-x[0]*x[0])*x[0]; + grad[1]=200*(x[1]-x[0]*x[0]); + } +}; TEST(Optim_ConjGrad, regression_basic){ cv::Ptr solver=cv::optim::createConjGradSolver(); #if 1 { cv::Ptr ptr_F(new SphereF()); - cv::Mat x=(cv::Mat_(1,2)<<1.0,1.0), - etalon_x=(cv::Mat_(1,2)<<0.0,0.0); + cv::Mat x=(cv::Mat_(4,1)<<50.0,10.0,1.0,-10.0), + etalon_x=(cv::Mat_(1,4)<<0.0,0.0,0.0,0.0); double etalon_res=0.0; - return; mytest(solver,ptr_F,x,etalon_x,etalon_res); } #endif -#if 0 +#if 1 { cv::Ptr ptr_F(new RosenbrockF()); cv::Mat x=(cv::Mat_(2,1)<<0.0,0.0), - step=(cv::Mat_(2,1)<<0.5,+0.5), etalon_x=(cv::Mat_(2,1)<<1.0,1.0); double etalon_res=0.0; - mytest(solver,ptr_F,x,step,etalon_x,etalon_res); + mytest(solver,ptr_F,x,etalon_x,etalon_res); } #endif }