|
|
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
|
|
//
|
|
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
|
|
// If you do not agree to this license, do not download, install,
|
|
|
|
// copy or use the software.
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// Intel License Agreement
|
|
|
|
//
|
|
|
|
// Copyright (C) 2000, Intel Corporation, all rights reserved.
|
|
|
|
// Third party copyrights are property of their respective owners.
|
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
// are permitted provided that the following conditions are met:
|
|
|
|
//
|
|
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer.
|
|
|
|
//
|
|
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
|
|
// and/or other materials provided with the distribution.
|
|
|
|
//
|
|
|
|
// * The name of Intel Corporation may not be used to endorse or promote products
|
|
|
|
// derived from this software without specific prior written permission.
|
|
|
|
//
|
|
|
|
// This software is provided by the copyright holders and contributors "as is" and
|
|
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
|
|
//
|
|
|
|
//M*/
|
|
|
|
|
|
|
|
#include "precomp.hpp"
|
|
|
|
|
|
|
|
namespace cv { namespace ml {
|
|
|
|
|
|
|
|
ANN_MLP::Params::Params()
|
|
|
|
{
|
|
|
|
layerSizes = Mat();
|
|
|
|
activateFunc = SIGMOID_SYM;
|
|
|
|
fparam1 = fparam2 = 0;
|
|
|
|
termCrit = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 1000, 0.01 );
|
|
|
|
trainMethod = RPROP;
|
|
|
|
bpDWScale = bpMomentScale = 0.1;
|
|
|
|
rpDW0 = 0.1; rpDWPlus = 1.2; rpDWMinus = 0.5;
|
|
|
|
rpDWMin = FLT_EPSILON; rpDWMax = 50.;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ANN_MLP::Params::Params( const Mat& _layerSizes, int _activateFunc, double _fparam1, double _fparam2,
|
|
|
|
TermCriteria _termCrit, int _trainMethod, double _param1, double _param2 )
|
|
|
|
{
|
|
|
|
layerSizes = _layerSizes;
|
|
|
|
activateFunc = _activateFunc;
|
|
|
|
fparam1 = _fparam1;
|
|
|
|
fparam2 = _fparam2;
|
|
|
|
termCrit = _termCrit;
|
|
|
|
trainMethod = _trainMethod;
|
|
|
|
bpDWScale = bpMomentScale = 0.1;
|
|
|
|
rpDW0 = 1.; rpDWPlus = 1.2; rpDWMinus = 0.5;
|
|
|
|
rpDWMin = FLT_EPSILON; rpDWMax = 50.;
|
|
|
|
|
|
|
|
if( trainMethod == RPROP )
|
|
|
|
{
|
|
|
|
rpDW0 = _param1;
|
|
|
|
if( rpDW0 < FLT_EPSILON )
|
|
|
|
rpDW0 = 1.;
|
|
|
|
rpDWMin = _param2;
|
|
|
|
rpDWMin = std::max( rpDWMin, 0. );
|
|
|
|
}
|
|
|
|
else if( trainMethod == BACKPROP )
|
|
|
|
{
|
|
|
|
bpDWScale = _param1;
|
|
|
|
if( bpDWScale <= 0 )
|
|
|
|
bpDWScale = 0.1;
|
|
|
|
bpDWScale = std::max( bpDWScale, 1e-3 );
|
|
|
|
bpDWScale = std::min( bpDWScale, 1. );
|
|
|
|
bpMomentScale = _param2;
|
|
|
|
if( bpMomentScale < 0 )
|
|
|
|
bpMomentScale = 0.1;
|
|
|
|
bpMomentScale = std::min( bpMomentScale, 1. );
|
|
|
|
}
|
|
|
|
else
|
|
|
|
trainMethod = RPROP;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class ANN_MLPImpl : public ANN_MLP
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
ANN_MLPImpl()
|
|
|
|
{
|
|
|
|
clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
ANN_MLPImpl( const Params& p )
|
|
|
|
{
|
|
|
|
setParams(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual ~ANN_MLPImpl() {}
|
|
|
|
|
|
|
|
void setParams(const Params& p)
|
|
|
|
{
|
|
|
|
params = p;
|
|
|
|
create( params.layerSizes );
|
|
|
|
set_activ_func( params.activateFunc, params.fparam1, params.fparam2 );
|
|
|
|
}
|
|
|
|
|
|
|
|
Params getParams() const
|
|
|
|
{
|
|
|
|
return params;
|
|
|
|
}
|
|
|
|
|
|
|
|
void clear()
|
|
|
|
{
|
|
|
|
min_val = max_val = min_val1 = max_val1 = 0.;
|
|
|
|
rng = RNG((uint64)-1);
|
|
|
|
weights.clear();
|
|
|
|
trained = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
int layer_count() const { return (int)layer_sizes.size(); }
|
|
|
|
|
|
|
|
void set_activ_func( int _activ_func, double _f_param1, double _f_param2 )
|
|
|
|
{
|
|
|
|
if( _activ_func < 0 || _activ_func > GAUSSIAN )
|
|
|
|
CV_Error( CV_StsOutOfRange, "Unknown activation function" );
|
|
|
|
|
|
|
|
activ_func = _activ_func;
|
|
|
|
|
|
|
|
switch( activ_func )
|
|
|
|
{
|
|
|
|
case SIGMOID_SYM:
|
|
|
|
max_val = 0.95; min_val = -max_val;
|
|
|
|
max_val1 = 0.98; min_val1 = -max_val1;
|
|
|
|
if( fabs(_f_param1) < FLT_EPSILON )
|
|
|
|
_f_param1 = 2./3;
|
|
|
|
if( fabs(_f_param2) < FLT_EPSILON )
|
|
|
|
_f_param2 = 1.7159;
|
|
|
|
break;
|
|
|
|
case GAUSSIAN:
|
|
|
|
max_val = 1.; min_val = 0.05;
|
|
|
|
max_val1 = 1.; min_val1 = 0.02;
|
|
|
|
if( fabs(_f_param1) < FLT_EPSILON )
|
|
|
|
_f_param1 = 1.;
|
|
|
|
if( fabs(_f_param2) < FLT_EPSILON )
|
|
|
|
_f_param2 = 1.;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
min_val = max_val = min_val1 = max_val1 = 0.;
|
|
|
|
_f_param1 = 1.;
|
|
|
|
_f_param2 = 0.;
|
|
|
|
}
|
|
|
|
|
|
|
|
f_param1 = _f_param1;
|
|
|
|
f_param2 = _f_param2;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void init_weights()
|
|
|
|
{
|
|
|
|
int i, j, k, l_count = layer_count();
|
|
|
|
|
|
|
|
for( i = 1; i < l_count; i++ )
|
|
|
|
{
|
|
|
|
int n1 = layer_sizes[i-1];
|
|
|
|
int n2 = layer_sizes[i];
|
|
|
|
double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.;
|
|
|
|
double* w = weights[i].ptr<double>();
|
|
|
|
|
|
|
|
// initialize weights using Nguyen-Widrow algorithm
|
|
|
|
for( j = 0; j < n2; j++ )
|
|
|
|
{
|
|
|
|
double s = 0;
|
|
|
|
for( k = 0; k <= n1; k++ )
|
|
|
|
{
|
|
|
|
val = rng.uniform(0., 1.)*2-1.;
|
|
|
|
w[k*n2 + j] = val;
|
|
|
|
s += fabs(val);
|
|
|
|
}
|
|
|
|
|
|
|
|
if( i < l_count - 1 )
|
|
|
|
{
|
|
|
|
s = 1./(s - fabs(val));
|
|
|
|
for( k = 0; k <= n1; k++ )
|
|
|
|
w[k*n2 + j] *= s;
|
|
|
|
w[n1*n2 + j] *= G*(-1+j*2./n2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void create( InputArray _layer_sizes )
|
|
|
|
{
|
|
|
|
clear();
|
|
|
|
|
|
|
|
_layer_sizes.copyTo(layer_sizes);
|
|
|
|
int l_count = layer_count();
|
|
|
|
|
|
|
|
weights.resize(l_count + 2);
|
|
|
|
max_lsize = 0;
|
|
|
|
|
|
|
|
if( l_count > 0 )
|
|
|
|
{
|
|
|
|
for( int i = 0; i < l_count; i++ )
|
|
|
|
{
|
|
|
|
int n = layer_sizes[i];
|
|
|
|
if( n < 1 + (0 < i && i < l_count-1))
|
|
|
|
CV_Error( CV_StsOutOfRange,
|
|
|
|
"there should be at least one input and one output "
|
|
|
|
"and every hidden layer must have more than 1 neuron" );
|
|
|
|
max_lsize = std::max( max_lsize, n );
|
|
|
|
if( i > 0 )
|
|
|
|
weights[i].create(layer_sizes[i-1]+1, n, CV_64F);
|
|
|
|
}
|
|
|
|
|
|
|
|
int ninputs = layer_sizes.front();
|
|
|
|
int noutputs = layer_sizes.back();
|
|
|
|
weights[0].create(1, ninputs*2, CV_64F);
|
|
|
|
weights[l_count].create(1, noutputs*2, CV_64F);
|
|
|
|
weights[l_count+1].create(1, noutputs*2, CV_64F);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
float predict( InputArray _inputs, OutputArray _outputs, int ) const
|
|
|
|
{
|
|
|
|
if( !trained )
|
|
|
|
CV_Error( CV_StsError, "The network has not been trained or loaded" );
|
|
|
|
|
|
|
|
Mat inputs = _inputs.getMat();
|
|
|
|
int type = inputs.type(), l_count = layer_count();
|
|
|
|
int n = inputs.rows, dn0 = n;
|
|
|
|
|
|
|
|
CV_Assert( (type == CV_32F || type == CV_64F) && inputs.cols == layer_sizes[0] );
|
|
|
|
int noutputs = layer_sizes[l_count-1];
|
|
|
|
Mat outputs;
|
|
|
|
|
|
|
|
int min_buf_sz = 2*max_lsize;
|
|
|
|
int buf_sz = n*min_buf_sz;
|
|
|
|
|
|
|
|
if( buf_sz > max_buf_sz )
|
|
|
|
{
|
|
|
|
dn0 = max_buf_sz/min_buf_sz;
|
|
|
|
dn0 = std::max( dn0, 1 );
|
|
|
|
buf_sz = dn0*min_buf_sz;
|
|
|
|
}
|
|
|
|
|
|
|
|
cv::AutoBuffer<double> _buf(buf_sz+noutputs);
|
|
|
|
double* buf = _buf;
|
|
|
|
|
|
|
|
if( !_outputs.needed() )
|
|
|
|
{
|
|
|
|
CV_Assert( n == 1 );
|
|
|
|
outputs = Mat(n, noutputs, type, buf + buf_sz);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
_outputs.create(n, noutputs, type);
|
|
|
|
outputs = _outputs.getMat();
|
|
|
|
}
|
|
|
|
|
|
|
|
int dn = 0;
|
|
|
|
for( int i = 0; i < n; i += dn )
|
|
|
|
{
|
|
|
|
dn = std::min( dn0, n - i );
|
|
|
|
|
|
|
|
Mat layer_in = inputs.rowRange(i, i + dn);
|
|
|
|
Mat layer_out( dn, layer_in.cols, CV_64F, buf);
|
|
|
|
|
|
|
|
scale_input( layer_in, layer_out );
|
|
|
|
layer_in = layer_out;
|
|
|
|
|
|
|
|
for( int j = 1; j < l_count; j++ )
|
|
|
|
{
|
|
|
|
double* data = buf + ((j&1) ? max_lsize*dn0 : 0);
|
|
|
|
int cols = layer_sizes[j];
|
|
|
|
|
|
|
|
layer_out = Mat(dn, cols, CV_64F, data);
|
|
|
|
Mat w = weights[j].rowRange(0, layer_in.cols);
|
|
|
|
gemm(layer_in, w, 1, noArray(), 0, layer_out);
|
|
|
|
calc_activ_func( layer_out, weights[j] );
|
|
|
|
|
|
|
|
layer_in = layer_out;
|
|
|
|
}
|
|
|
|
|
|
|
|
layer_out = outputs.rowRange(i, i + dn);
|
|
|
|
scale_output( layer_in, layer_out );
|
|
|
|
}
|
|
|
|
|
|
|
|
if( n == 1 )
|
|
|
|
{
|
|
|
|
int maxIdx[] = {0, 0};
|
|
|
|
minMaxIdx(outputs, 0, 0, 0, maxIdx);
|
|
|
|
return (float)(maxIdx[0] + maxIdx[1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0.f;
|
|
|
|
}
|
|
|
|
|
|
|
|
void scale_input( const Mat& _src, Mat& _dst ) const
|
|
|
|
{
|
|
|
|
int cols = _src.cols;
|
|
|
|
const double* w = weights[0].ptr<double>();
|
|
|
|
|
|
|
|
if( _src.type() == CV_32F )
|
|
|
|
{
|
|
|
|
for( int i = 0; i < _src.rows; i++ )
|
|
|
|
{
|
|
|
|
const float* src = _src.ptr<float>(i);
|
|
|
|
double* dst = _dst.ptr<double>(i);
|
|
|
|
for( int j = 0; j < cols; j++ )
|
|
|
|
dst[j] = src[j]*w[j*2] + w[j*2+1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for( int i = 0; i < _src.rows; i++ )
|
|
|
|
{
|
|
|
|
const float* src = _src.ptr<float>(i);
|
|
|
|
double* dst = _dst.ptr<double>(i);
|
|
|
|
for( int j = 0; j < cols; j++ )
|
|
|
|
dst[j] = src[j]*w[j*2] + w[j*2+1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void scale_output( const Mat& _src, Mat& _dst ) const
|
|
|
|
{
|
|
|
|
int cols = _src.cols;
|
|
|
|
const double* w = weights[layer_count()].ptr<double>();
|
|
|
|
|
|
|
|
if( _dst.type() == CV_32F )
|
|
|
|
{
|
|
|
|
for( int i = 0; i < _src.rows; i++ )
|
|
|
|
{
|
|
|
|
const double* src = _src.ptr<double>(i);
|
|
|
|
float* dst = _dst.ptr<float>(i);
|
|
|
|
for( int j = 0; j < cols; j++ )
|
|
|
|
dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for( int i = 0; i < _src.rows; i++ )
|
|
|
|
{
|
|
|
|
const double* src = _src.ptr<double>(i);
|
|
|
|
double* dst = _dst.ptr<double>(i);
|
|
|
|
for( int j = 0; j < cols; j++ )
|
|
|
|
dst[j] = src[j]*w[j*2] + w[j*2+1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void calc_activ_func( Mat& sums, const Mat& w ) const
|
|
|
|
{
|
|
|
|
const double* bias = w.ptr<double>(w.rows-1);
|
|
|
|
int i, j, n = sums.rows, cols = sums.cols;
|
|
|
|
double scale = 0, scale2 = f_param2;
|
|
|
|
|
|
|
|
switch( activ_func )
|
|
|
|
{
|
|
|
|
case IDENTITY:
|
|
|
|
scale = 1.;
|
|
|
|
break;
|
|
|
|
case SIGMOID_SYM:
|
|
|
|
scale = -f_param1;
|
|
|
|
break;
|
|
|
|
case GAUSSIAN:
|
|
|
|
scale = -f_param1*f_param1;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
;
|
|
|
|
}
|
|
|
|
|
|
|
|
CV_Assert( sums.isContinuous() );
|
|
|
|
|
|
|
|
if( activ_func != GAUSSIAN )
|
|
|
|
{
|
|
|
|
for( i = 0; i < n; i++ )
|
|
|
|
{
|
|
|
|
double* data = sums.ptr<double>(i);
|
|
|
|
for( j = 0; j < cols; j++ )
|
|
|
|
data[j] = (data[j] + bias[j])*scale;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( activ_func == IDENTITY )
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for( i = 0; i < n; i++ )
|
|
|
|
{
|
|
|
|
double* data = sums.ptr<double>(i);
|
|
|
|
for( j = 0; j < cols; j++ )
|
|
|
|
{
|
|
|
|
double t = data[j] + bias[j];
|
|
|
|
data[j] = t*t*scale;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
exp( sums, sums );
|
|
|
|
|
|
|
|
if( sums.isContinuous() )
|
|
|
|
{
|
|
|
|
cols *= n;
|
|
|
|
n = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch( activ_func )
|
|
|
|
{
|
|
|
|
case SIGMOID_SYM:
|
|
|
|
for( i = 0; i < n; i++ )
|
|
|
|
{
|
|
|
|
double* data = sums.ptr<double>(i);
|
|
|
|
for( j = 0; j < cols; j++ )
|
|
|
|
{
|
|
|
|
double t = scale2*(1. - data[j])/(1. + data[j]);
|
|
|
|
data[j] = t;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case GAUSSIAN:
|
|
|
|
for( i = 0; i < n; j++ )
|
|
|
|
{
|
|
|
|
double* data = sums.ptr<double>(i);
|
|
|
|
for( j = 0; j < cols; j++ )
|
|
|
|
data[j] = scale2*data[j];
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void calc_activ_func_deriv( Mat& _xf, Mat& _df, const Mat& w ) const
|
|
|
|
{
|
|
|
|
const double* bias = w.ptr<double>(w.rows-1);
|
|
|
|
int i, j, n = _xf.rows, cols = _xf.cols;
|
|
|
|
|
|
|
|
if( activ_func == IDENTITY )
|
|
|
|
{
|
|
|
|
for( i = 0; i < n; i++ )
|
|
|
|
{
|
|
|
|
double* xf = _xf.ptr<double>(i);
|
|
|
|
double* df = _df.ptr<double>(i);
|
|
|
|
|
|
|
|
for( j = 0; j < cols; j++ )
|
|
|
|
{
|
|
|
|
xf[j] += bias[j];
|
|
|
|
df[j] = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if( activ_func == GAUSSIAN )
|
|
|
|
{
|
|
|
|
double scale = -f_param1*f_param1;
|
|
|
|
double scale2 = scale*f_param2;
|
|
|
|
for( i = 0; i < n; i++ )
|
|
|
|
{
|
|
|
|
double* xf = _xf.ptr<double>(i);
|
|
|
|
double* df = _df.ptr<double>(i);
|
|
|
|
|
|
|
|
for( j = 0; j < cols; j++ )
|
|
|
|
{
|
|
|
|
double t = xf[j] + bias[j];
|
|
|
|
df[j] = t*2*scale2;
|
|
|
|
xf[j] = t*t*scale;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
exp( _xf, _xf );
|
|
|
|
|
|
|
|
for( i = 0; i < n; i++ )
|
|
|
|
{
|
|
|
|
double* xf = _xf.ptr<double>(i);
|
|
|
|
double* df = _df.ptr<double>(i);
|
|
|
|
|
|
|
|
for( j = 0; j < cols; j++ )
|
|
|
|
df[j] *= xf[j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
double scale = f_param1;
|
|
|
|
double scale2 = f_param2;
|
|
|
|
|
|
|
|
for( i = 0; i < n; i++ )
|
|
|
|
{
|
|
|
|
double* xf = _xf.ptr<double>(i);
|
|
|
|
double* df = _df.ptr<double>(i);
|
|
|
|
|
|
|
|
for( j = 0; j < cols; j++ )
|
|
|
|
{
|
|
|
|
xf[j] = (xf[j] + bias[j])*scale;
|
|
|
|
df[j] = -fabs(xf[j]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
exp( _df, _df );
|
|
|
|
|
|
|
|
// ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
|
|
|
|
// ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
|
|
|
|
// 2*a*exp(-ax)/(1+exp(-ax))^2
|
|
|
|
scale *= 2*f_param2;
|
|
|
|
for( i = 0; i < n; i++ )
|
|
|
|
{
|
|
|
|
double* xf = _xf.ptr<double>(i);
|
|
|
|
double* df = _df.ptr<double>(i);
|
|
|
|
|
|
|
|
for( j = 0; j < cols; j++ )
|
|
|
|
{
|
|
|
|
int s0 = xf[j] > 0 ? 1 : -1;
|
|
|
|
double t0 = 1./(1. + df[j]);
|
|
|
|
double t1 = scale*df[j]*t0*t0;
|
|
|
|
t0 *= scale2*(1. - df[j])*s0;
|
|
|
|
df[j] = t1;
|
|
|
|
xf[j] = t0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void calc_input_scale( const Mat& inputs, int flags )
|
|
|
|
{
|
|
|
|
bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
|
|
|
|
bool no_scale = (flags & NO_INPUT_SCALE) != 0;
|
|
|
|
double* scale = weights[0].ptr<double>();
|
|
|
|
int count = inputs.rows;
|
|
|
|
|
|
|
|
if( reset_weights )
|
|
|
|
{
|
|
|
|
int i, j, vcount = layer_sizes[0];
|
|
|
|
int type = inputs.type();
|
|
|
|
double a = no_scale ? 1. : 0.;
|
|
|
|
|
|
|
|
for( j = 0; j < vcount; j++ )
|
|
|
|
scale[2*j] = a, scale[j*2+1] = 0.;
|
|
|
|
|
|
|
|
if( no_scale )
|
|
|
|
return;
|
|
|
|
|
|
|
|
for( i = 0; i < count; i++ )
|
|
|
|
{
|
|
|
|
const uchar* p = inputs.ptr(i);
|
|
|
|
const float* f = (const float*)p;
|
|
|
|
const double* d = (const double*)p;
|
|
|
|
for( j = 0; j < vcount; j++ )
|
|
|
|
{
|
|
|
|
double t = type == CV_32F ? (double)f[j] : d[j];
|
|
|
|
scale[j*2] += t;
|
|
|
|
scale[j*2+1] += t*t;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for( j = 0; j < vcount; j++ )
|
|
|
|
{
|
|
|
|
double s = scale[j*2], s2 = scale[j*2+1];
|
|
|
|
double m = s/count, sigma2 = s2/count - m*m;
|
|
|
|
scale[j*2] = sigma2 < DBL_EPSILON ? 1 : 1./sqrt(sigma2);
|
|
|
|
scale[j*2+1] = -m*scale[j*2];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void calc_output_scale( const Mat& outputs, int flags )
|
|
|
|
{
|
|
|
|
int i, j, vcount = layer_sizes.back();
|
|
|
|
int type = outputs.type();
|
|
|
|
double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1;
|
|
|
|
bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
|
|
|
|
bool no_scale = (flags & NO_OUTPUT_SCALE) != 0;
|
|
|
|
int l_count = layer_count();
|
|
|
|
double* scale = weights[l_count].ptr<double>();
|
|
|
|
double* inv_scale = weights[l_count+1].ptr<double>();
|
|
|
|
int count = outputs.rows;
|
|
|
|
|
|
|
|
if( reset_weights )
|
|
|
|
{
|
|
|
|
double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX;
|
|
|
|
|
|
|
|
for( j = 0; j < vcount; j++ )
|
|
|
|
{
|
|
|
|
scale[2*j] = inv_scale[2*j] = a0;
|
|
|
|
scale[j*2+1] = inv_scale[2*j+1] = b0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( no_scale )
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for( i = 0; i < count; i++ )
|
|
|
|
{
|
|
|
|
const uchar* p = outputs.ptr(i);
|
|
|
|
const float* f = (const float*)p;
|
|
|
|
const double* d = (const double*)p;
|
|
|
|
|
|
|
|
for( j = 0; j < vcount; j++ )
|
|
|
|
{
|
|
|
|
double t = type == CV_32F ? (double)f[j] : d[j];
|
|
|
|
|
|
|
|
if( reset_weights )
|
|
|
|
{
|
|
|
|
double mj = scale[j*2], Mj = scale[j*2+1];
|
|
|
|
if( mj > t ) mj = t;
|
|
|
|
if( Mj < t ) Mj = t;
|
|
|
|
|
|
|
|
scale[j*2] = mj;
|
|
|
|
scale[j*2+1] = Mj;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
t = t*inv_scale[j*2] + inv_scale[2*j+1];
|
|
|
|
if( t < m1 || t > M1 )
|
|
|
|
CV_Error( CV_StsOutOfRange,
|
|
|
|
"Some of new output training vector components run exceed the original range too much" );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( reset_weights )
|
|
|
|
for( j = 0; j < vcount; j++ )
|
|
|
|
{
|
|
|
|
// map mj..Mj to m..M
|
|
|
|
double mj = scale[j*2], Mj = scale[j*2+1];
|
|
|
|
double a, b;
|
|
|
|
double delta = Mj - mj;
|
|
|
|
if( delta < DBL_EPSILON )
|
|
|
|
a = 1, b = (M + m - Mj - mj)*0.5;
|
|
|
|
else
|
|
|
|
a = (M - m)/delta, b = m - mj*a;
|
|
|
|
inv_scale[j*2] = a; inv_scale[j*2+1] = b;
|
|
|
|
a = 1./a; b = -b*a;
|
|
|
|
scale[j*2] = a; scale[j*2+1] = b;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void prepare_to_train( const Mat& inputs, const Mat& outputs,
|
|
|
|
Mat& sample_weights, int flags )
|
|
|
|
{
|
|
|
|
if( layer_sizes.empty() )
|
|
|
|
CV_Error( CV_StsError,
|
|
|
|
"The network has not been created. Use method create or the appropriate constructor" );
|
|
|
|
|
|
|
|
if( (inputs.type() != CV_32F && inputs.type() != CV_64F) ||
|
|
|
|
inputs.cols != layer_sizes[0] )
|
|
|
|
CV_Error( CV_StsBadArg,
|
|
|
|
"input training data should be a floating-point matrix with "
|
|
|
|
"the number of rows equal to the number of training samples and "
|
|
|
|
"the number of columns equal to the size of 0-th (input) layer" );
|
|
|
|
|
|
|
|
if( (outputs.type() != CV_32F && outputs.type() != CV_64F) ||
|
|
|
|
outputs.cols != layer_sizes.back() )
|
|
|
|
CV_Error( CV_StsBadArg,
|
|
|
|
"output training data should be a floating-point matrix with "
|
|
|
|
"the number of rows equal to the number of training samples and "
|
|
|
|
"the number of columns equal to the size of last (output) layer" );
|
|
|
|
|
|
|
|
if( inputs.rows != outputs.rows )
|
|
|
|
CV_Error( CV_StsUnmatchedSizes, "The numbers of input and output samples do not match" );
|
|
|
|
|
|
|
|
Mat temp;
|
|
|
|
double s = sum(sample_weights)[0];
|
|
|
|
sample_weights.convertTo(temp, CV_64F, 1./s);
|
|
|
|
sample_weights = temp;
|
|
|
|
|
|
|
|
calc_input_scale( inputs, flags );
|
|
|
|
calc_output_scale( outputs, flags );
|
|
|
|
}
|
|
|
|
|
|
|
|
bool train( const Ptr<TrainData>& trainData, int flags )
|
|
|
|
{
|
|
|
|
const int MAX_ITER = 1000;
|
|
|
|
const double DEFAULT_EPSILON = FLT_EPSILON;
|
|
|
|
|
|
|
|
// initialize training data
|
|
|
|
Mat inputs = trainData->getTrainSamples();
|
|
|
|
Mat outputs = trainData->getTrainResponses();
|
|
|
|
Mat sw = trainData->getTrainSampleWeights();
|
|
|
|
prepare_to_train( inputs, outputs, sw, flags );
|
|
|
|
|
|
|
|
// ... and link weights
|
|
|
|
if( !(flags & UPDATE_WEIGHTS) )
|
|
|
|
init_weights();
|
|
|
|
|
|
|
|
TermCriteria termcrit;
|
|
|
|
termcrit.type = TermCriteria::COUNT + TermCriteria::EPS;
|
|
|
|
termcrit.maxCount = std::max((params.termCrit.type & CV_TERMCRIT_ITER ? params.termCrit.maxCount : MAX_ITER), 1);
|
|
|
|
termcrit.epsilon = std::max((params.termCrit.type & CV_TERMCRIT_EPS ? params.termCrit.epsilon : DEFAULT_EPSILON), DBL_EPSILON);
|
|
|
|
|
|
|
|
int iter = params.trainMethod == Params::BACKPROP ?
|
|
|
|
train_backprop( inputs, outputs, sw, termcrit ) :
|
|
|
|
train_rprop( inputs, outputs, sw, termcrit );
|
|
|
|
|
|
|
|
trained = iter > 0;
|
|
|
|
return trained;
|
|
|
|
}
|
|
|
|
|
|
|
|
int train_backprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
|
|
|
|
{
|
|
|
|
int i, j, k;
|
|
|
|
double prev_E = DBL_MAX*0.5, E = 0;
|
|
|
|
int itype = inputs.type(), otype = outputs.type();
|
|
|
|
|
|
|
|
int count = inputs.rows;
|
|
|
|
|
|
|
|
int iter = -1, max_iter = termCrit.maxCount*count;
|
|
|
|
double epsilon = termCrit.epsilon*count;
|
|
|
|
|
|
|
|
int l_count = layer_count();
|
|
|
|
int ivcount = layer_sizes[0];
|
|
|
|
int ovcount = layer_sizes.back();
|
|
|
|
|
|
|
|
// allocate buffers
|
|
|
|
vector<vector<double> > x(l_count);
|
|
|
|
vector<vector<double> > df(l_count);
|
|
|
|
vector<Mat> dw(l_count);
|
|
|
|
|
|
|
|
for( i = 0; i < l_count; i++ )
|
|
|
|
{
|
|
|
|
int n = layer_sizes[i];
|
|
|
|
x[i].resize(n);
|
|
|
|
df[i].resize(n);
|
|
|
|
dw[i].create(weights[i].size(), CV_64F);
|
|
|
|
}
|
|
|
|
|
|
|
|
Mat _idx_m(1, count, CV_32S);
|
|
|
|
int* _idx = _idx_m.ptr<int>();
|
|
|
|
for( i = 0; i < count; i++ )
|
|
|
|
_idx[i] = i;
|
|
|
|
|
|
|
|
AutoBuffer<double> _buf(max_lsize*2);
|
|
|
|
double* buf[] = { _buf, (double*)_buf + max_lsize };
|
|
|
|
|
|
|
|
const double* sw = _sw.empty() ? 0 : _sw.ptr<double>();
|
|
|
|
|
|
|
|
// run back-propagation loop
|
|
|
|
/*
|
|
|
|
y_i = w_i*x_{i-1}
|
|
|
|
x_i = f(y_i)
|
|
|
|
E = 1/2*||u - x_N||^2
|
|
|
|
grad_N = (x_N - u)*f'(y_i)
|
|
|
|
dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
|
|
|
|
w_i(t+1) = w_i(t) + dw_i(t)
|
|
|
|
grad_{i-1} = w_i^t*grad_i
|
|
|
|
*/
|
|
|
|
for( iter = 0; iter < max_iter; iter++ )
|
|
|
|
{
|
|
|
|
int idx = iter % count;
|
|
|
|
double sweight = sw ? count*sw[idx] : 1.;
|
|
|
|
|
|
|
|
if( idx == 0 )
|
|
|
|
{
|
|
|
|
//printf("%d. E = %g\n", iter/count, E);
|
|
|
|
if( fabs(prev_E - E) < epsilon )
|
|
|
|
break;
|
|
|
|
prev_E = E;
|
|
|
|
E = 0;
|
|
|
|
|
|
|
|
// shuffle indices
|
|
|
|
for( i = 0; i < count; i++ )
|
|
|
|
{
|
|
|
|
j = rng.uniform(0, count);
|
|
|
|
k = rng.uniform(0, count);
|
|
|
|
std::swap(_idx[j], _idx[k]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
idx = _idx[idx];
|
|
|
|
|
|
|
|
const uchar* x0data_p = inputs.ptr(idx);
|
|
|
|
const float* x0data_f = (const float*)x0data_p;
|
|
|
|
const double* x0data_d = (const double*)x0data_p;
|
|
|
|
|
|
|
|
double* w = weights[0].ptr<double>();
|
|
|
|
for( j = 0; j < ivcount; j++ )
|
|
|
|
x[0][j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2 + 1];
|
|
|
|
|
|
|
|
Mat x1( 1, ivcount, CV_64F, &x[0][0] );
|
|
|
|
|
|
|
|
// forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
|
|
|
|
for( i = 1; i < l_count; i++ )
|
|
|
|
{
|
|
|
|
int n = layer_sizes[i];
|
|
|
|
Mat x2(1, n, CV_64F, &x[i][0] );
|
|
|
|
Mat _w = weights[i].rowRange(0, x1.cols);
|
|
|
|
gemm(x1, _w, 1, noArray(), 0, x2);
|
|
|
|
Mat _df(1, n, CV_64F, &df[i][0] );
|
|
|
|
calc_activ_func_deriv( x2, _df, weights[i] );
|
|
|
|
x1 = x2;
|
|
|
|
}
|
|
|
|
|
|
|
|
Mat grad1( 1, ovcount, CV_64F, buf[l_count&1] );
|
|
|
|
w = weights[l_count+1].ptr<double>();
|
|
|
|
|
|
|
|
// calculate error
|
|
|
|
const uchar* udata_p = outputs.ptr(idx);
|
|
|
|
const float* udata_f = (const float*)udata_p;
|
|
|
|
const double* udata_d = (const double*)udata_p;
|
|
|
|
|
|
|
|
double* gdata = grad1.ptr<double>();
|
|
|
|
for( k = 0; k < ovcount; k++ )
|
|
|
|
{
|
|
|
|
double t = (otype == CV_32F ? (double)udata_f[k] : udata_d[k])*w[k*2] + w[k*2+1] - x[l_count-1][k];
|
|
|
|
gdata[k] = t*sweight;
|
|
|
|
E += t*t;
|
|
|
|
}
|
|
|
|
E *= sweight;
|
|
|
|
|
|
|
|
// backward pass, update weights
|
|
|
|
for( i = l_count-1; i > 0; i-- )
|
|
|
|
{
|
|
|
|
int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
|
|
|
|
Mat _df(1, n2, CV_64F, &df[i][0]);
|
|
|
|
multiply( grad1, _df, grad1 );
|
|
|
|
Mat _x(n1+1, 1, CV_64F, &x[i-1][0]);
|
|
|
|
x[i-1][n1] = 1.;
|
|
|
|
gemm( _x, grad1, params.bpDWScale, dw[i], params.bpMomentScale, dw[i] );
|
|
|
|
add( weights[i], dw[i], weights[i] );
|
|
|
|
if( i > 1 )
|
|
|
|
{
|
|
|
|
Mat grad2(1, n1, CV_64F, buf[i&1]);
|
|
|
|
Mat _w = weights[i].rowRange(0, n1);
|
|
|
|
gemm( grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T );
|
|
|
|
grad1 = grad2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
iter /= count;
|
|
|
|
return iter;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct RPropLoop : public ParallelLoopBody
|
|
|
|
{
|
|
|
|
RPropLoop(ANN_MLPImpl* _ann,
|
|
|
|
const Mat& _inputs, const Mat& _outputs, const Mat& _sw,
|
|
|
|
int _dcount0, vector<Mat>& _dEdw, double* _E)
|
|
|
|
{
|
|
|
|
ann = _ann;
|
|
|
|
inputs = _inputs;
|
|
|
|
outputs = _outputs;
|
|
|
|
sw = _sw.ptr<double>();
|
|
|
|
dcount0 = _dcount0;
|
|
|
|
dEdw = &_dEdw;
|
|
|
|
pE = _E;
|
|
|
|
}
|
|
|
|
|
|
|
|
ANN_MLPImpl* ann;
|
|
|
|
vector<Mat>* dEdw;
|
|
|
|
Mat inputs, outputs;
|
|
|
|
const double* sw;
|
|
|
|
int dcount0;
|
|
|
|
double* pE;
|
|
|
|
|
|
|
|
void operator()( const Range& range ) const
|
|
|
|
{
|
|
|
|
double inv_count = 1./inputs.rows;
|
|
|
|
int ivcount = ann->layer_sizes.front();
|
|
|
|
int ovcount = ann->layer_sizes.back();
|
|
|
|
int itype = inputs.type(), otype = outputs.type();
|
|
|
|
int count = inputs.rows;
|
|
|
|
int i, j, k, l_count = ann->layer_count();
|
|
|
|
vector<vector<double> > x(l_count);
|
|
|
|
vector<vector<double> > df(l_count);
|
|
|
|
vector<double> _buf(ann->max_lsize*dcount0*2);
|
|
|
|
double* buf[] = { &_buf[0], &_buf[ann->max_lsize*dcount0] };
|
|
|
|
double E = 0;
|
|
|
|
|
|
|
|
for( i = 0; i < l_count; i++ )
|
|
|
|
{
|
|
|
|
x[i].resize(ann->layer_sizes[i]*dcount0);
|
|
|
|
df[i].resize(ann->layer_sizes[i]*dcount0);
|
|
|
|
}
|
|
|
|
|
|
|
|
for( int si = range.start; si < range.end; si++ )
|
|
|
|
{
|
|
|
|
int i0 = si*dcount0, i1 = std::min((si + 1)*dcount0, count);
|
|
|
|
int dcount = i1 - i0;
|
|
|
|
const double* w = ann->weights[0].ptr<double>();
|
|
|
|
|
|
|
|
// grab and preprocess input data
|
|
|
|
for( i = 0; i < dcount; i++ )
|
|
|
|
{
|
|
|
|
const uchar* x0data_p = inputs.ptr(i0 + i);
|
|
|
|
const float* x0data_f = (const float*)x0data_p;
|
|
|
|
const double* x0data_d = (const double*)x0data_p;
|
|
|
|
|
|
|
|
double* xdata = &x[0][i*ivcount];
|
|
|
|
for( j = 0; j < ivcount; j++ )
|
|
|
|
xdata[j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2+1];
|
|
|
|
}
|
|
|
|
Mat x1(dcount, ivcount, CV_64F, &x[0][0]);
|
|
|
|
|
|
|
|
// forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
|
|
|
|
for( i = 1; i < l_count; i++ )
|
|
|
|
{
|
|
|
|
Mat x2( dcount, ann->layer_sizes[i], CV_64F, &x[i][0] );
|
|
|
|
Mat _w = ann->weights[i].rowRange(0, x1.cols);
|
|
|
|
gemm( x1, _w, 1, noArray(), 0, x2 );
|
|
|
|
Mat _df( x2.size(), CV_64F, &df[i][0] );
|
|
|
|
ann->calc_activ_func_deriv( x2, _df, ann->weights[i] );
|
|
|
|
x1 = x2;
|
|
|
|
}
|
|
|
|
|
|
|
|
Mat grad1(dcount, ovcount, CV_64F, buf[l_count & 1]);
|
|
|
|
|
|
|
|
w = ann->weights[l_count+1].ptr<double>();
|
|
|
|
|
|
|
|
// calculate error
|
|
|
|
for( i = 0; i < dcount; i++ )
|
|
|
|
{
|
|
|
|
const uchar* udata_p = outputs.ptr(i0+i);
|
|
|
|
const float* udata_f = (const float*)udata_p;
|
|
|
|
const double* udata_d = (const double*)udata_p;
|
|
|
|
|
|
|
|
const double* xdata = &x[l_count-1][i*ovcount];
|
|
|
|
double* gdata = grad1.ptr<double>(i);
|
|
|
|
double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
|
|
|
|
|
|
|
|
for( j = 0; j < ovcount; j++ )
|
|
|
|
{
|
|
|
|
double t = (otype == CV_32F ? (double)udata_f[j] : udata_d[j])*w[j*2] + w[j*2+1] - xdata[j];
|
|
|
|
gdata[j] = t*sweight;
|
|
|
|
E1 += t*t;
|
|
|
|
}
|
|
|
|
E += sweight*E1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for( i = l_count-1; i > 0; i-- )
|
|
|
|
{
|
|
|
|
int n1 = ann->layer_sizes[i-1], n2 = ann->layer_sizes[i];
|
|
|
|
Mat _df(dcount, n2, CV_64F, &df[i][0]);
|
|
|
|
multiply(grad1, _df, grad1);
|
|
|
|
|
|
|
|
{
|
|
|
|
AutoLock lock(ann->mtx);
|
|
|
|
Mat _dEdw = dEdw->at(i).rowRange(0, n1);
|
|
|
|
x1 = Mat(dcount, n1, CV_64F, &x[i-1][0]);
|
|
|
|
gemm(x1, grad1, 1, _dEdw, 1, _dEdw, GEMM_1_T);
|
|
|
|
|
|
|
|
// update bias part of dEdw
|
|
|
|
double* dst = dEdw->at(i).ptr<double>(n1);
|
|
|
|
for( k = 0; k < dcount; k++ )
|
|
|
|
{
|
|
|
|
const double* src = grad1.ptr<double>(k);
|
|
|
|
for( j = 0; j < n2; j++ )
|
|
|
|
dst[j] += src[j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Mat grad2( dcount, n1, CV_64F, buf[i&1] );
|
|
|
|
if( i > 1 )
|
|
|
|
{
|
|
|
|
Mat _w = ann->weights[i].rowRange(0, n1);
|
|
|
|
gemm(grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T);
|
|
|
|
}
|
|
|
|
grad1 = grad2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
{
|
|
|
|
AutoLock lock(ann->mtx);
|
|
|
|
*pE += E;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
int train_rprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
|
|
|
|
{
|
|
|
|
const int max_buf_size = 1 << 16;
|
|
|
|
int i, iter = -1, count = inputs.rows;
|
|
|
|
|
|
|
|
double prev_E = DBL_MAX*0.5;
|
|
|
|
|
|
|
|
int max_iter = termCrit.maxCount;
|
|
|
|
double epsilon = termCrit.epsilon;
|
|
|
|
double dw_plus = params.rpDWPlus;
|
|
|
|
double dw_minus = params.rpDWMinus;
|
|
|
|
double dw_min = params.rpDWMin;
|
|
|
|
double dw_max = params.rpDWMax;
|
|
|
|
|
|
|
|
int l_count = layer_count();
|
|
|
|
|
|
|
|
// allocate buffers
|
|
|
|
vector<Mat> dw(l_count), dEdw(l_count), prev_dEdw_sign(l_count);
|
|
|
|
|
|
|
|
int total = 0;
|
|
|
|
for( i = 0; i < l_count; i++ )
|
|
|
|
{
|
|
|
|
total += layer_sizes[i];
|
|
|
|
dw[i].create(weights[i].size(), CV_64F);
|
|
|
|
dw[i].setTo(Scalar::all(params.rpDW0));
|
|
|
|
prev_dEdw_sign[i] = Mat::zeros(weights[i].size(), CV_8S);
|
|
|
|
dEdw[i] = Mat::zeros(weights[i].size(), CV_64F);
|
|
|
|
}
|
|
|
|
|
|
|
|
int dcount0 = max_buf_size/(2*total);
|
|
|
|
dcount0 = std::max( dcount0, 1 );
|
|
|
|
dcount0 = std::min( dcount0, count );
|
|
|
|
int chunk_count = (count + dcount0 - 1)/dcount0;
|
|
|
|
|
|
|
|
// run rprop loop
|
|
|
|
/*
|
|
|
|
y_i(t) = w_i(t)*x_{i-1}(t)
|
|
|
|
x_i(t) = f(y_i(t))
|
|
|
|
E = sum_over_all_samples(1/2*||u - x_N||^2)
|
|
|
|
grad_N = (x_N - u)*f'(y_i)
|
|
|
|
|
|
|
|
std::min(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0
|
|
|
|
dw_i{jk}(t) = std::max(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0
|
|
|
|
dw_i{jk}(t-1) else
|
|
|
|
|
|
|
|
if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0)
|
|
|
|
dE/dw_i{jk}(t)<-0
|
|
|
|
else
|
|
|
|
w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t)
|
|
|
|
grad_{i-1}(t) = w_i^t(t)*grad_i(t)
|
|
|
|
*/
|
|
|
|
for( iter = 0; iter < max_iter; iter++ )
|
|
|
|
{
|
|
|
|
double E = 0;
|
|
|
|
|
|
|
|
for( i = 0; i < l_count; i++ )
|
|
|
|
dEdw[i].setTo(Scalar::all(0));
|
|
|
|
|
|
|
|
// first, iterate through all the samples and compute dEdw
|
|
|
|
RPropLoop invoker(this, inputs, outputs, _sw, dcount0, dEdw, &E);
|
|
|
|
parallel_for_(Range(0, chunk_count), invoker);
|
|
|
|
//invoker(Range(0, chunk_count));
|
|
|
|
|
|
|
|
// now update weights
|
|
|
|
for( i = 1; i < l_count; i++ )
|
|
|
|
{
|
|
|
|
int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
|
|
|
|
for( int k = 0; k <= n1; k++ )
|
|
|
|
{
|
|
|
|
CV_Assert(weights[i].size() == Size(n2, n1+1));
|
|
|
|
double* wk = weights[i].ptr<double>(k);
|
|
|
|
double* dwk = dw[i].ptr<double>(k);
|
|
|
|
double* dEdwk = dEdw[i].ptr<double>(k);
|
|
|
|
schar* prevEk = prev_dEdw_sign[i].ptr<schar>(k);
|
|
|
|
|
|
|
|
for( int j = 0; j < n2; j++ )
|
|
|
|
{
|
|
|
|
double Eval = dEdwk[j];
|
|
|
|
double dval = dwk[j];
|
|
|
|
double wval = wk[j];
|
|
|
|
int s = CV_SIGN(Eval);
|
|
|
|
int ss = prevEk[j]*s;
|
|
|
|
if( ss > 0 )
|
|
|
|
{
|
|
|
|
dval *= dw_plus;
|
|
|
|
dval = std::min( dval, dw_max );
|
|
|
|
dwk[j] = dval;
|
|
|
|
wk[j] = wval + dval*s;
|
|
|
|
}
|
|
|
|
else if( ss < 0 )
|
|
|
|
{
|
|
|
|
dval *= dw_minus;
|
|
|
|
dval = std::max( dval, dw_min );
|
|
|
|
prevEk[j] = 0;
|
|
|
|
dwk[j] = dval;
|
|
|
|
wk[j] = wval + dval*s;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
prevEk[j] = (schar)s;
|
|
|
|
wk[j] = wval + dval*s;
|
|
|
|
}
|
|
|
|
dEdwk[j] = 0.;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//printf("%d. E = %g\n", iter, E);
|
|
|
|
if( fabs(prev_E - E) < epsilon )
|
|
|
|
break;
|
|
|
|
prev_E = E;
|
|
|
|
}
|
|
|
|
|
|
|
|
return iter;
|
|
|
|
}
|
|
|
|
|
|
|
|
void write_params( FileStorage& fs ) const
|
|
|
|
{
|
|
|
|
const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
|
|
|
|
activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
|
|
|
|
activ_func == GAUSSIAN ? "GAUSSIAN" : 0;
|
|
|
|
|
|
|
|
if( activ_func_name )
|
|
|
|
fs << "activation_function" << activ_func_name;
|
|
|
|
else
|
|
|
|
fs << "activation_function_id" << activ_func;
|
|
|
|
|
|
|
|
if( activ_func != IDENTITY )
|
|
|
|
{
|
|
|
|
fs << "f_param1" << f_param1;
|
|
|
|
fs << "f_param2" << f_param2;
|
|
|
|
}
|
|
|
|
|
|
|
|
fs << "min_val" << min_val << "max_val" << max_val << "min_val1" << min_val1 << "max_val1" << max_val1;
|
|
|
|
|
|
|
|
fs << "training_params" << "{";
|
|
|
|
if( params.trainMethod == Params::BACKPROP )
|
|
|
|
{
|
|
|
|
fs << "train_method" << "BACKPROP";
|
|
|
|
fs << "dw_scale" << params.bpDWScale;
|
|
|
|
fs << "moment_scale" << params.bpMomentScale;
|
|
|
|
}
|
|
|
|
else if( params.trainMethod == Params::RPROP )
|
|
|
|
{
|
|
|
|
fs << "train_method" << "RPROP";
|
|
|
|
fs << "dw0" << params.rpDW0;
|
|
|
|
fs << "dw_plus" << params.rpDWPlus;
|
|
|
|
fs << "dw_minus" << params.rpDWMinus;
|
|
|
|
fs << "dw_min" << params.rpDWMin;
|
|
|
|
fs << "dw_max" << params.rpDWMax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
CV_Error(CV_StsError, "Unknown training method");
|
|
|
|
|
|
|
|
fs << "term_criteria" << "{";
|
|
|
|
if( params.termCrit.type & TermCriteria::EPS )
|
|
|
|
fs << "epsilon" << params.termCrit.epsilon;
|
|
|
|
if( params.termCrit.type & TermCriteria::COUNT )
|
|
|
|
fs << "iterations" << params.termCrit.maxCount;
|
|
|
|
fs << "}" << "}";
|
|
|
|
}
|
|
|
|
|
|
|
|
void write( FileStorage& fs ) const
|
|
|
|
{
|
|
|
|
if( layer_sizes.empty() )
|
|
|
|
return;
|
|
|
|
int i, l_count = layer_count();
|
|
|
|
|
|
|
|
fs << "layer_sizes" << layer_sizes;
|
|
|
|
|
|
|
|
write_params( fs );
|
|
|
|
|
|
|
|
size_t esz = weights[0].elemSize();
|
|
|
|
|
|
|
|
fs << "input_scale" << "[";
|
|
|
|
fs.writeRaw("d", weights[0].data, weights[0].total()*esz);
|
|
|
|
|
|
|
|
fs << "]" << "output_scale" << "[";
|
|
|
|
fs.writeRaw("d", weights[l_count].data, weights[l_count].total()*esz);
|
|
|
|
|
|
|
|
fs << "]" << "inv_output_scale" << "[";
|
|
|
|
fs.writeRaw("d", weights[l_count+1].data, weights[l_count+1].total()*esz);
|
|
|
|
|
|
|
|
fs << "]" << "weights" << "[";
|
|
|
|
for( i = 1; i < l_count; i++ )
|
|
|
|
{
|
|
|
|
fs << "[";
|
|
|
|
fs.writeRaw("d", weights[i].data, weights[i].total()*esz);
|
|
|
|
fs << "]";
|
|
|
|
}
|
|
|
|
fs << "]";
|
|
|
|
}
|
|
|
|
|
|
|
|
void read_params( const FileNode& fn )
|
|
|
|
{
|
|
|
|
String activ_func_name = (String)fn["activation_function"];
|
|
|
|
if( !activ_func_name.empty() )
|
|
|
|
{
|
|
|
|
activ_func = activ_func_name == "SIGMOID_SYM" ? SIGMOID_SYM :
|
|
|
|
activ_func_name == "IDENTITY" ? IDENTITY :
|
|
|
|
activ_func_name == "GAUSSIAN" ? GAUSSIAN : -1;
|
|
|
|
CV_Assert( activ_func >= 0 );
|
|
|
|
}
|
|
|
|
else
|
|
|
|
activ_func = (int)fn["activation_function_id"];
|
|
|
|
|
|
|
|
f_param1 = (double)fn["f_param1"];
|
|
|
|
f_param2 = (double)fn["f_param2"];
|
|
|
|
|
|
|
|
set_activ_func( activ_func, f_param1, f_param2 );
|
|
|
|
|
|
|
|
min_val = (double)fn["min_val"];
|
|
|
|
max_val = (double)fn["max_val"];
|
|
|
|
min_val1 = (double)fn["min_val1"];
|
|
|
|
max_val1 = (double)fn["max_val1"];
|
|
|
|
|
|
|
|
FileNode tpn = fn["training_params"];
|
|
|
|
params = Params();
|
|
|
|
|
|
|
|
if( !tpn.empty() )
|
|
|
|
{
|
|
|
|
String tmethod_name = (String)tpn["train_method"];
|
|
|
|
|
|
|
|
if( tmethod_name == "BACKPROP" )
|
|
|
|
{
|
|
|
|
params.trainMethod = Params::BACKPROP;
|
|
|
|
params.bpDWScale = (double)tpn["dw_scale"];
|
|
|
|
params.bpMomentScale = (double)tpn["moment_scale"];
|
|
|
|
}
|
|
|
|
else if( tmethod_name == "RPROP" )
|
|
|
|
{
|
|
|
|
params.trainMethod = Params::RPROP;
|
|
|
|
params.rpDW0 = (double)tpn["dw0"];
|
|
|
|
params.rpDWPlus = (double)tpn["dw_plus"];
|
|
|
|
params.rpDWMinus = (double)tpn["dw_minus"];
|
|
|
|
params.rpDWMin = (double)tpn["dw_min"];
|
|
|
|
params.rpDWMax = (double)tpn["dw_max"];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
CV_Error(CV_StsParseError, "Unknown training method (should be BACKPROP or RPROP)");
|
|
|
|
|
|
|
|
FileNode tcn = tpn["term_criteria"];
|
|
|
|
if( !tcn.empty() )
|
|
|
|
{
|
|
|
|
FileNode tcn_e = tcn["epsilon"];
|
|
|
|
FileNode tcn_i = tcn["iterations"];
|
|
|
|
params.termCrit.type = 0;
|
|
|
|
if( !tcn_e.empty() )
|
|
|
|
{
|
|
|
|
params.termCrit.type |= TermCriteria::EPS;
|
|
|
|
params.termCrit.epsilon = (double)tcn_e;
|
|
|
|
}
|
|
|
|
if( !tcn_i.empty() )
|
|
|
|
{
|
|
|
|
params.termCrit.type |= TermCriteria::COUNT;
|
|
|
|
params.termCrit.maxCount = (int)tcn_i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void read( const FileNode& fn )
|
|
|
|
{
|
|
|
|
clear();
|
|
|
|
|
|
|
|
vector<int> _layer_sizes;
|
|
|
|
fn["layer_sizes"] >> _layer_sizes;
|
|
|
|
create( _layer_sizes );
|
|
|
|
|
|
|
|
int i, l_count = layer_count();
|
|
|
|
read_params(fn);
|
|
|
|
|
|
|
|
size_t esz = weights[0].elemSize();
|
|
|
|
|
|
|
|
FileNode w = fn["input_scale"];
|
|
|
|
w.readRaw("d", weights[0].data, weights[0].total()*esz);
|
|
|
|
|
|
|
|
w = fn["output_scale"];
|
|
|
|
w.readRaw("d", weights[l_count].data, weights[l_count].total()*esz);
|
|
|
|
|
|
|
|
w = fn["inv_output_scale"];
|
|
|
|
w.readRaw("d", weights[l_count+1].data, weights[l_count+1].total()*esz);
|
|
|
|
|
|
|
|
FileNodeIterator w_it = fn["weights"].begin();
|
|
|
|
|
|
|
|
for( i = 1; i < l_count; i++, ++w_it )
|
|
|
|
(*w_it).readRaw("d", weights[i].data, weights[i].total()*esz);
|
|
|
|
trained = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
Mat getLayerSizes() const
|
|
|
|
{
|
|
|
|
return Mat_<int>(layer_sizes, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
Mat getWeights(int layerIdx) const
|
|
|
|
{
|
|
|
|
CV_Assert( 0 <= layerIdx && layerIdx < (int)weights.size() );
|
|
|
|
return weights[layerIdx];
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isTrained() const
|
|
|
|
{
|
|
|
|
return trained;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isClassifier() const
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
int getVarCount() const
|
|
|
|
{
|
|
|
|
return layer_sizes.empty() ? 0 : layer_sizes[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
String getDefaultModelName() const
|
|
|
|
{
|
|
|
|
return "opencv_ml_ann_mlp";
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<int> layer_sizes;
|
|
|
|
vector<Mat> weights;
|
|
|
|
double f_param1, f_param2;
|
|
|
|
double min_val, max_val, min_val1, max_val1;
|
|
|
|
int activ_func;
|
|
|
|
int max_lsize, max_buf_sz;
|
|
|
|
Params params;
|
|
|
|
RNG rng;
|
|
|
|
Mutex mtx;
|
|
|
|
bool trained;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
Ptr<ANN_MLP> ANN_MLP::create(const ANN_MLP::Params& params)
|
|
|
|
{
|
|
|
|
Ptr<ANN_MLPImpl> ann = makePtr<ANN_MLPImpl>(params);
|
|
|
|
return ann;
|
|
|
|
}
|
|
|
|
|
|
|
|
}}
|
|
|
|
|
|
|
|
/* End of file. */
|