initial commit; ml has been refactored; it compiles and the tests run well; some other modules, apps and samples do not compile; to be fixed

pull/3032/head
Vadim Pisarevsky 11 years ago
parent dce1824a91
commit ba3783d205
  1. 4
      modules/core/src/lapack.cpp
  2. 2254
      modules/ml/include/opencv2/ml.hpp
  3. 1633
      modules/ml/src/ann_mlp.cpp
  4. 2277
      modules/ml/src/boost.cpp
  5. 1675
      modules/ml/src/cnn.cpp
  6. 1339
      modules/ml/src/data.cpp
  7. 450
      modules/ml/src/em.cpp
  8. 1859
      modules/ml/src/ertrees.cpp
  9. 728
      modules/ml/src/estimate.cpp
  10. 5
      modules/ml/src/gbt.cpp
  11. 1857
      modules/ml/src/inner_functions.cpp
  12. 542
      modules/ml/src/knearest.cpp
  13. 63
      modules/ml/src/ml_init.cpp
  14. 703
      modules/ml/src/nbayes.cpp
  15. 494
      modules/ml/src/precomp.hpp
  16. 969
      modules/ml/src/rtrees.cpp
  17. 3180
      modules/ml/src/svm.cpp
  18. 143
      modules/ml/src/testset.cpp
  19. 4648
      modules/ml/src/tree.cpp
  20. 98
      modules/ml/test/test_emknearestkmeans.cpp
  21. 4
      modules/ml/test/test_gbttest.cpp
  22. 9
      modules/ml/test/test_mltests.cpp
  23. 637
      modules/ml/test/test_mltests2.cpp
  24. 30
      modules/ml/test/test_precomp.hpp
  25. 45
      modules/ml/test/test_save_load.cpp

@ -1557,12 +1557,16 @@ static void _SVDcompute( InputArray _aarr, OutputArray _w,
{
if( !at )
{
if( _u.needed() )
transpose(temp_u, _u);
if( _vt.needed() )
temp_v.copyTo(_vt);
}
else
{
if( _u.needed() )
transpose(temp_v, _u);
if( _vt.needed() )
temp_u.copyTo(_vt);
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -43,26 +43,44 @@
namespace cv
{
namespace ml
{
const double minEigenValue = DBL_EPSILON;
///////////////////////////////////////////////////////////////////////////////////////////////////////
EM::EM(int _nclusters, int _covMatType, const TermCriteria& _termCrit)
EM::Params::Params(int _nclusters, int _covMatType, const TermCriteria& _termCrit)
{
nclusters = _nclusters;
covMatType = _covMatType;
maxIters = (_termCrit.type & TermCriteria::MAX_ITER) ? _termCrit.maxCount : DEFAULT_MAX_ITERS;
epsilon = (_termCrit.type & TermCriteria::EPS) ? _termCrit.epsilon : 0;
termCrit = _termCrit;
}
EM::~EM()
class CV_EXPORTS EMImpl : public EM
{
//clear();
}
public:
EMImpl(const Params& _params)
{
setParams(_params);
}
void EM::clear()
{
virtual ~EMImpl() {}
void setParams(const Params& _params)
{
params = _params;
CV_Assert(params.nclusters > 1);
CV_Assert(params.covMatType == COV_MAT_SPHERICAL ||
params.covMatType == COV_MAT_DIAGONAL ||
params.covMatType == COV_MAT_GENERIC);
}
Params getParams() const
{
return params;
}
void clear()
{
trainSamples.release();
trainProbs.release();
trainLogLikelihoods.release();
@ -77,27 +95,32 @@ void EM::clear()
covsRotateMats.clear();
logWeightDivDet.release();
}
}
bool train(const Ptr<TrainData>& data, int)
{
Mat samples = data->getTrainSamples(), labels;
return train_(samples, labels, noArray(), noArray());
}
bool EM::train(InputArray samples,
bool train_(InputArray samples,
OutputArray logLikelihoods,
OutputArray labels,
OutputArray probs)
{
{
Mat samplesMat = samples.getMat();
setTrainData(START_AUTO_STEP, samplesMat, 0, 0, 0, 0);
return doTrain(START_AUTO_STEP, logLikelihoods, labels, probs);
}
}
bool EM::trainE(InputArray samples,
bool trainE(InputArray samples,
InputArray _means0,
InputArray _covs0,
InputArray _weights0,
OutputArray logLikelihoods,
OutputArray labels,
OutputArray probs)
{
{
Mat samplesMat = samples.getMat();
std::vector<Mat> covs0;
_covs0.getMatVector(covs0);
@ -107,24 +130,52 @@ bool EM::trainE(InputArray samples,
setTrainData(START_E_STEP, samplesMat, 0, !_means0.empty() ? &means0 : 0,
!_covs0.empty() ? &covs0 : 0, !_weights0.empty() ? &weights0 : 0);
return doTrain(START_E_STEP, logLikelihoods, labels, probs);
}
}
bool EM::trainM(InputArray samples,
bool trainM(InputArray samples,
InputArray _probs0,
OutputArray logLikelihoods,
OutputArray labels,
OutputArray probs)
{
{
Mat samplesMat = samples.getMat();
Mat probs0 = _probs0.getMat();
setTrainData(START_M_STEP, samplesMat, !_probs0.empty() ? &probs0 : 0, 0, 0, 0);
return doTrain(START_M_STEP, logLikelihoods, labels, probs);
}
}
float predict(InputArray _inputs, OutputArray _outputs, int) const
{
bool needprobs = _outputs.needed();
Mat samples = _inputs.getMat(), probs, probsrow;
int ptype = CV_32F;
float firstres = 0.f;
int i, nsamples = samples.rows;
Vec2d EM::predict(InputArray _sample, OutputArray _probs) const
{
if( needprobs )
{
if( _outputs.fixedType() )
ptype = _outputs.type();
_outputs.create(samples.rows, params.nclusters, ptype);
}
else
nsamples = std::min(nsamples, 1);
for( i = 0; i < nsamples; i++ )
{
if( needprobs )
probsrow = probs.row(i);
Vec2d res = computeProbabilities(samples.row(i), needprobs ? &probsrow : 0, ptype);
if( i == 0 )
firstres = (float)res[1];
}
return firstres;
}
Vec2d predict2(InputArray _sample, OutputArray _probs) const
{
int ptype = CV_32F;
Mat sample = _sample.getMat();
CV_Assert(isTrained());
@ -135,29 +186,44 @@ Vec2d EM::predict(InputArray _sample, OutputArray _probs) const
sample.convertTo(tmp, CV_64FC1);
sample = tmp;
}
sample = sample.reshape(1, 1);
sample.reshape(1, 1);
Mat probs;
if( _probs.needed() )
{
_probs.create(1, nclusters, CV_64FC1);
if( _probs.fixedType() )
ptype = _probs.type();
_probs.create(1, params.nclusters, ptype);
probs = _probs.getMat();
}
return computeProbabilities(sample, !probs.empty() ? &probs : 0);
}
return computeProbabilities(sample, !probs.empty() ? &probs : 0, ptype);
}
bool EM::isTrained() const
{
bool isTrained() const
{
return !means.empty();
}
}
bool isClassifier() const
{
return true;
}
int getVarCount() const
{
return means.cols;
}
String getDefaultModelName() const
{
return "opencv_ml_em";
}
static
void checkTrainData(int startStep, const Mat& samples,
static void checkTrainData(int startStep, const Mat& samples,
int nclusters, int covMatType, const Mat* probs, const Mat* means,
const std::vector<Mat>* covs, const Mat* weights)
{
{
// Check samples.
CV_Assert(!samples.empty());
CV_Assert(samples.channels() == 1);
@ -168,12 +234,12 @@ void checkTrainData(int startStep, const Mat& samples,
// Check training params.
CV_Assert(nclusters > 0);
CV_Assert(nclusters <= nsamples);
CV_Assert(startStep == EM::START_AUTO_STEP ||
startStep == EM::START_E_STEP ||
startStep == EM::START_M_STEP);
CV_Assert(covMatType == EM::COV_MAT_GENERIC ||
covMatType == EM::COV_MAT_DIAGONAL ||
covMatType == EM::COV_MAT_SPHERICAL);
CV_Assert(startStep == START_AUTO_STEP ||
startStep == START_E_STEP ||
startStep == START_M_STEP);
CV_Assert(covMatType == COV_MAT_GENERIC ||
covMatType == COV_MAT_DIAGONAL ||
covMatType == COV_MAT_SPHERICAL);
CV_Assert(!probs ||
(!probs->empty() &&
@ -203,28 +269,26 @@ void checkTrainData(int startStep, const Mat& samples,
}
}
if(startStep == EM::START_E_STEP)
if(startStep == START_E_STEP)
{
CV_Assert(means);
}
else if(startStep == EM::START_M_STEP)
else if(startStep == START_M_STEP)
{
CV_Assert(probs);
}
}
}
static
void preprocessSampleData(const Mat& src, Mat& dst, int dstType, bool isAlwaysClone)
{
static void preprocessSampleData(const Mat& src, Mat& dst, int dstType, bool isAlwaysClone)
{
if(src.type() == dstType && !isAlwaysClone)
dst = src;
else
src.convertTo(dst, dstType);
}
}
static
void preprocessProbability(Mat& probs)
{
static void preprocessProbability(Mat& probs)
{
max(probs, 0., probs);
const double uniformProbability = (double)(1./probs.cols);
@ -239,55 +303,57 @@ void preprocessProbability(Mat& probs)
else
normalize(sampleProbs, sampleProbs, 1, 0, NORM_L1);
}
}
}
void EM::setTrainData(int startStep, const Mat& samples,
void setTrainData(int startStep, const Mat& samples,
const Mat* probs0,
const Mat* means0,
const std::vector<Mat>* covs0,
const Mat* weights0)
{
{
int nclusters = params.nclusters, covMatType = params.covMatType;
clear();
checkTrainData(startStep, samples, nclusters, covMatType, probs0, means0, covs0, weights0);
bool isKMeansInit = (startStep == EM::START_AUTO_STEP) || (startStep == EM::START_E_STEP && (covs0 == 0 || weights0 == 0));
bool isKMeansInit = (startStep == START_AUTO_STEP) || (startStep == START_E_STEP && (covs0 == 0 || weights0 == 0));
// Set checked data
preprocessSampleData(samples, trainSamples, isKMeansInit ? CV_32FC1 : CV_64FC1, false);
// set probs
if(probs0 && startStep == EM::START_M_STEP)
if(probs0 && startStep == START_M_STEP)
{
preprocessSampleData(*probs0, trainProbs, CV_64FC1, true);
preprocessProbability(trainProbs);
}
// set weights
if(weights0 && (startStep == EM::START_E_STEP && covs0))
if(weights0 && (startStep == START_E_STEP && covs0))
{
weights0->convertTo(weights, CV_64FC1);
weights = weights.reshape(1,1);
weights.reshape(1,1);
preprocessProbability(weights);
}
// set means
if(means0 && (startStep == EM::START_E_STEP/* || startStep == EM::START_AUTO_STEP*/))
if(means0 && (startStep == START_E_STEP/* || startStep == START_AUTO_STEP*/))
means0->convertTo(means, isKMeansInit ? CV_32FC1 : CV_64FC1);
// set covs
if(covs0 && (startStep == EM::START_E_STEP && weights0))
if(covs0 && (startStep == START_E_STEP && weights0))
{
covs.resize(nclusters);
for(size_t i = 0; i < covs0->size(); i++)
(*covs0)[i].convertTo(covs[i], CV_64FC1);
}
}
}
void EM::decomposeCovs()
{
void decomposeCovs()
{
int nclusters = params.nclusters, covMatType = params.covMatType;
CV_Assert(!covs.empty());
covsEigenValues.resize(nclusters);
if(covMatType == EM::COV_MAT_GENERIC)
if(covMatType == COV_MAT_GENERIC)
covsRotateMats.resize(nclusters);
invCovsEigenValues.resize(nclusters);
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
@ -296,16 +362,16 @@ void EM::decomposeCovs()
SVD svd(covs[clusterIndex], SVD::MODIFY_A + SVD::FULL_UV);
if(covMatType == EM::COV_MAT_SPHERICAL)
if(covMatType == COV_MAT_SPHERICAL)
{
double maxSingularVal = svd.w.at<double>(0);
covsEigenValues[clusterIndex] = Mat(1, 1, CV_64FC1, Scalar(maxSingularVal));
}
else if(covMatType == EM::COV_MAT_DIAGONAL)
else if(covMatType == COV_MAT_DIAGONAL)
{
covsEigenValues[clusterIndex] = svd.w;
}
else //EM::COV_MAT_GENERIC
else //COV_MAT_GENERIC
{
covsEigenValues[clusterIndex] = svd.w;
covsRotateMats[clusterIndex] = svd.u;
@ -313,10 +379,11 @@ void EM::decomposeCovs()
max(covsEigenValues[clusterIndex], minEigenValue, covsEigenValues[clusterIndex]);
invCovsEigenValues[clusterIndex] = 1./covsEigenValues[clusterIndex];
}
}
}
void EM::clusterTrainSamples()
{
void clusterTrainSamples()
{
int nclusters = params.nclusters;
int nsamples = trainSamples.rows;
// Cluster samples, compute/update means
@ -336,7 +403,9 @@ void EM::clusterTrainSamples()
}
Mat labels;
kmeans(trainSamplesFlt, nclusters, labels, TermCriteria(TermCriteria::COUNT, means.empty() ? 10 : 1, 0.5), 10, KMEANS_PP_CENTERS, meansFlt);
kmeans(trainSamplesFlt, nclusters, labels,
TermCriteria(TermCriteria::COUNT, means.empty() ? 10 : 1, 0.5),
10, KMEANS_PP_CENTERS, meansFlt);
// Convert samples and means back to 64F.
CV_Assert(meansFlt.type() == CV_32FC1);
@ -370,10 +439,11 @@ void EM::clusterTrainSamples()
}
decomposeCovs();
}
}
void EM::computeLogWeightDivDet()
{
void computeLogWeightDivDet()
{
int nclusters = params.nclusters;
CV_Assert(!covsEigenValues.empty());
Mat logWeights;
@ -388,17 +458,18 @@ void EM::computeLogWeightDivDet()
double logDetCov = 0.;
const int evalCount = static_cast<int>(covsEigenValues[clusterIndex].total());
for(int di = 0; di < evalCount; di++)
logDetCov += std::log(covsEigenValues[clusterIndex].at<double>(covMatType != EM::COV_MAT_SPHERICAL ? di : 0));
logDetCov += std::log(covsEigenValues[clusterIndex].at<double>(params.covMatType != COV_MAT_SPHERICAL ? di : 0));
logWeightDivDet.at<double>(clusterIndex) = logWeights.at<double>(clusterIndex) - 0.5 * logDetCov;
}
}
}
bool EM::doTrain(int startStep, OutputArray logLikelihoods, OutputArray labels, OutputArray probs)
{
bool doTrain(int startStep, OutputArray logLikelihoods, OutputArray labels, OutputArray probs)
{
int nclusters = params.nclusters;
int dim = trainSamples.cols;
// Precompute the empty initial train data in the cases of EM::START_E_STEP and START_AUTO_STEP
if(startStep != EM::START_M_STEP)
// Precompute the empty initial train data in the cases of START_E_STEP and START_AUTO_STEP
if(startStep != START_M_STEP)
{
if(covs.empty())
{
@ -413,10 +484,14 @@ bool EM::doTrain(int startStep, OutputArray logLikelihoods, OutputArray labels,
decomposeCovs();
}
if(startStep == EM::START_M_STEP)
if(startStep == START_M_STEP)
mStep();
double trainLogLikelihood, prevTrainLogLikelihood = 0.;
int maxIters = (params.termCrit.type & TermCriteria::MAX_ITER) ?
params.termCrit.maxCount : DEFAULT_MAX_ITERS;
double epsilon = (params.termCrit.type & TermCriteria::EPS) ? params.termCrit.epsilon : 0.;
for(int iter = 0; ; iter++)
{
eStep();
@ -446,12 +521,12 @@ bool EM::doTrain(int startStep, OutputArray logLikelihoods, OutputArray labels,
covs.resize(nclusters);
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
if(covMatType == EM::COV_MAT_SPHERICAL)
if(params.covMatType == COV_MAT_SPHERICAL)
{
covs[clusterIndex].create(dim, dim, CV_64FC1);
setIdentity(covs[clusterIndex], Scalar(covsEigenValues[clusterIndex].at<double>(0)));
}
else if(covMatType == EM::COV_MAT_DIAGONAL)
else if(params.covMatType == COV_MAT_DIAGONAL)
{
covs[clusterIndex] = Mat::diag(covsEigenValues[clusterIndex]);
}
@ -470,36 +545,50 @@ bool EM::doTrain(int startStep, OutputArray logLikelihoods, OutputArray labels,
trainLogLikelihoods.release();
return true;
}
}
Vec2d EM::computeProbabilities(const Mat& sample, Mat* probs) const
{
Vec2d computeProbabilities(const Mat& sample, Mat* probs, int ptype) const
{
// L_ik = log(weight_k) - 0.5 * log(|det(cov_k)|) - 0.5 *(x_i - mean_k)' cov_k^(-1) (x_i - mean_k)]
// q = arg(max_k(L_ik))
// probs_ik = exp(L_ik - L_iq) / (1 + sum_j!=q (exp(L_ij - L_iq))
// see Alex Smola's blog http://blog.smola.org/page/2 for
// details on the log-sum-exp trick
int nclusters = params.nclusters, covMatType = params.covMatType;
int stype = sample.type();
CV_Assert(!means.empty());
CV_Assert(sample.type() == CV_64FC1);
CV_Assert(sample.rows == 1);
CV_Assert(sample.cols == means.cols);
CV_Assert((stype == CV_32F || stype == CV_64F) && (ptype == CV_32F || ptype == CV_64F));
CV_Assert(sample.size() == Size(means.cols, 1));
int dim = sample.cols;
Mat L(1, nclusters, CV_64FC1);
int label = 0;
Mat L(1, nclusters, CV_64FC1), centeredSample(1, dim, CV_64F);
int i, label = 0;
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
const Mat centeredSample = sample - means.row(clusterIndex);
const double* mptr = means.ptr<double>(clusterIndex);
double* dptr = centeredSample.ptr<double>();
if( stype == CV_32F )
{
const float* sptr = sample.ptr<float>();
for( i = 0; i < dim; i++ )
dptr[i] = sptr[i] - mptr[i];
}
else
{
const double* sptr = sample.ptr<double>();
for( i = 0; i < dim; i++ )
dptr[i] = sptr[i] - mptr[i];
}
Mat rotatedCenteredSample = covMatType != EM::COV_MAT_GENERIC ?
Mat rotatedCenteredSample = covMatType != COV_MAT_GENERIC ?
centeredSample : centeredSample * covsRotateMats[clusterIndex];
double Lval = 0;
for(int di = 0; di < dim; di++)
{
double w = invCovsEigenValues[clusterIndex].at<double>(covMatType != EM::COV_MAT_SPHERICAL ? di : 0);
double w = invCovsEigenValues[clusterIndex].at<double>(covMatType != COV_MAT_SPHERICAL ? di : 0);
double val = rotatedCenteredSample.at<double>(di);
Lval += w * val * val;
}
@ -511,30 +600,28 @@ Vec2d EM::computeProbabilities(const Mat& sample, Mat* probs) const
}
double maxLVal = L.at<double>(label);
Mat expL_Lmax = L; // exp(L_ij - L_iq)
for(int i = 0; i < L.cols; i++)
expL_Lmax.at<double>(i) = std::exp(L.at<double>(i) - maxLVal);
double expDiffSum = sum(expL_Lmax)[0]; // sum_j(exp(L_ij - L_iq))
if(probs)
double expDiffSum = 0;
for( i = 0; i < L.cols; i++ )
{
probs->create(1, nclusters, CV_64FC1);
double factor = 1./expDiffSum;
expL_Lmax *= factor;
expL_Lmax.copyTo(*probs);
double v = std::exp(L.at<double>(i) - maxLVal);
L.at<double>(i) = v;
expDiffSum += v; // sum_j(exp(L_ij - L_iq))
}
if(probs)
L.convertTo(*probs, ptype, 1./expDiffSum);
Vec2d res;
res[0] = std::log(expDiffSum) + maxLVal - 0.5 * dim * CV_LOG2PI;
res[1] = label;
return res;
}
}
void EM::eStep()
{
void eStep()
{
// Compute probs_ik from means_k, covs_k and weights_k.
trainProbs.create(trainSamples.rows, nclusters, CV_64FC1);
trainProbs.create(trainSamples.rows, params.nclusters, CV_64FC1);
trainLabels.create(trainSamples.rows, 1, CV_32SC1);
trainLogLikelihoods.create(trainSamples.rows, 1, CV_64FC1);
@ -546,15 +633,17 @@ void EM::eStep()
for(int sampleIndex = 0; sampleIndex < trainSamples.rows; sampleIndex++)
{
Mat sampleProbs = trainProbs.row(sampleIndex);
Vec2d res = computeProbabilities(trainSamples.row(sampleIndex), &sampleProbs);
Vec2d res = computeProbabilities(trainSamples.row(sampleIndex), &sampleProbs, CV_64F);
trainLogLikelihoods.at<double>(sampleIndex) = res[0];
trainLabels.at<int>(sampleIndex) = static_cast<int>(res[1]);
}
}
}
void EM::mStep()
{
void mStep()
{
// Update means_k, covs_k and weights_k from probs_ik
int nclusters = params.nclusters;
int covMatType = params.covMatType;
int dim = trainSamples.cols;
// Update weights
@ -588,7 +677,7 @@ void EM::mStep()
// Update covsEigenValues and invCovsEigenValues
covs.resize(nclusters);
covsEigenValues.resize(nclusters);
if(covMatType == EM::COV_MAT_GENERIC)
if(covMatType == COV_MAT_GENERIC)
covsRotateMats.resize(nclusters);
invCovsEigenValues.resize(nclusters);
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
@ -596,15 +685,15 @@ void EM::mStep()
if(weights.at<double>(clusterIndex) <= minPosWeight)
continue;
if(covMatType != EM::COV_MAT_SPHERICAL)
if(covMatType != COV_MAT_SPHERICAL)
covsEigenValues[clusterIndex].create(1, dim, CV_64FC1);
else
covsEigenValues[clusterIndex].create(1, 1, CV_64FC1);
if(covMatType == EM::COV_MAT_GENERIC)
if(covMatType == COV_MAT_GENERIC)
covs[clusterIndex].create(dim, dim, CV_64FC1);
Mat clusterCov = covMatType != EM::COV_MAT_GENERIC ?
Mat clusterCov = covMatType != COV_MAT_GENERIC ?
covsEigenValues[clusterIndex] : covs[clusterIndex];
clusterCov = Scalar(0);
@ -614,7 +703,7 @@ void EM::mStep()
{
centeredSample = trainSamples.row(sampleIndex) - means.row(clusterIndex);
if(covMatType == EM::COV_MAT_GENERIC)
if(covMatType == COV_MAT_GENERIC)
clusterCov += trainProbs.at<double>(sampleIndex, clusterIndex) * centeredSample.t() * centeredSample;
else
{
@ -622,18 +711,18 @@ void EM::mStep()
for(int di = 0; di < dim; di++ )
{
double val = centeredSample.at<double>(di);
clusterCov.at<double>(covMatType != EM::COV_MAT_SPHERICAL ? di : 0) += p*val*val;
clusterCov.at<double>(covMatType != COV_MAT_SPHERICAL ? di : 0) += p*val*val;
}
}
}
if(covMatType == EM::COV_MAT_SPHERICAL)
if(covMatType == COV_MAT_SPHERICAL)
clusterCov /= dim;
clusterCov /= weights.at<double>(clusterIndex);
// Update covsRotateMats for EM::COV_MAT_GENERIC only
if(covMatType == EM::COV_MAT_GENERIC)
// Update covsRotateMats for COV_MAT_GENERIC only
if(covMatType == COV_MAT_GENERIC)
{
SVD svd(covs[clusterIndex], SVD::MODIFY_A + SVD::FULL_UV);
covsEigenValues[clusterIndex] = svd.w;
@ -654,7 +743,7 @@ void EM::mStep()
means.row(minWeightClusterIndex).copyTo(clusterMean);
covs[minWeightClusterIndex].copyTo(covs[clusterIndex]);
covsEigenValues[minWeightClusterIndex].copyTo(covsEigenValues[clusterIndex]);
if(covMatType == EM::COV_MAT_GENERIC)
if(covMatType == COV_MAT_GENERIC)
covsRotateMats[minWeightClusterIndex].copyTo(covsRotateMats[clusterIndex]);
invCovsEigenValues[minWeightClusterIndex].copyTo(invCovsEigenValues[clusterIndex]);
}
@ -662,16 +751,131 @@ void EM::mStep()
// Normalize weights
weights /= trainSamples.rows;
}
}
void EM::read(const FileNode& fn)
{
Algorithm::read(fn);
void write_params(FileStorage& fs) const
{
fs << "nclusters" << params.nclusters;
fs << "cov_mat_type" << (params.covMatType == COV_MAT_SPHERICAL ? String("spherical") :
params.covMatType == COV_MAT_DIAGONAL ? String("diagonal") :
params.covMatType == COV_MAT_GENERIC ? String("generic") :
format("unknown_%d", params.covMatType));
writeTermCrit(fs, params.termCrit);
}
void write(FileStorage& fs) const
{
fs << "training_params" << "{";
write_params(fs);
fs << "}";
fs << "weights" << weights;
fs << "means" << means;
size_t i, n = covs.size();
fs << "covs" << "[";
for( i = 0; i < n; i++ )
fs << covs[i];
fs << "]";
}
void read_params(const FileNode& fn)
{
Params _params;
_params.nclusters = (int)fn["nclusters"];
String s = (String)fn["cov_mat_type"];
_params.covMatType = s == "spherical" ? COV_MAT_SPHERICAL :
s == "diagonal" ? COV_MAT_DIAGONAL :
s == "generic" ? COV_MAT_GENERIC : -1;
CV_Assert(_params.covMatType >= 0);
_params.termCrit = readTermCrit(fn);
setParams(_params);
}
void read(const FileNode& fn)
{
clear();
read_params(fn["training_params"]);
fn["weights"] >> weights;
fn["means"] >> means;
FileNode cfn = fn["covs"];
FileNodeIterator cfn_it = cfn.begin();
int i, n = (int)cfn.size();
covs.resize(n);
for( i = 0; i < n; i++, ++cfn_it )
(*cfn_it) >> covs[i];
decomposeCovs();
computeLogWeightDivDet();
}
Mat getWeights() const { return weights; }
Mat getMeans() const { return means; }
void getCovs(std::vector<Mat>& _covs) const
{
_covs.resize(covs.size());
std::copy(covs.begin(), covs.end(), _covs.begin());
}
Params params;
// all inner matrices have type CV_64FC1
Mat trainSamples;
Mat trainProbs;
Mat trainLogLikelihoods;
Mat trainLabels;
Mat weights;
Mat means;
std::vector<Mat> covs;
std::vector<Mat> covsEigenValues;
std::vector<Mat> covsRotateMats;
std::vector<Mat> invCovsEigenValues;
Mat logWeightDivDet;
};
Ptr<EM> EM::train(InputArray samples, OutputArray logLikelihoods,
OutputArray labels, OutputArray probs,
const EM::Params& params)
{
Ptr<EMImpl> em = makePtr<EMImpl>(params);
if(!em->train_(samples, logLikelihoods, labels, probs))
em.release();
return em;
}
Ptr<EM> EM::train_startWithE(InputArray samples, InputArray means0,
InputArray covs0, InputArray weights0,
OutputArray logLikelihoods, OutputArray labels,
OutputArray probs, const EM::Params& params)
{
Ptr<EMImpl> em = makePtr<EMImpl>(params);
if(!em->trainE(samples, means0, covs0, weights0, logLikelihoods, labels, probs))
em.release();
return em;
}
Ptr<EM> EM::train_startWithM(InputArray samples, InputArray probs0,
OutputArray logLikelihoods, OutputArray labels,
OutputArray probs, const EM::Params& params)
{
Ptr<EMImpl> em = makePtr<EMImpl>(params);
if(!em->trainM(samples, probs0, logLikelihoods, labels, probs))
em.release();
return em;
}
Ptr<EM> EM::create(const Params& params)
{
return makePtr<EMImpl>(params);
}
}
} // namespace cv
/* End of file. */

File diff suppressed because it is too large Load Diff

@ -1,728 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#if 0
ML_IMPL int
icvCmpIntegers (const void* a, const void* b) {return *(const int*)a - *(const int*)b;}
/****************************************************************************************\
* Cross-validation algorithms realizations *
\****************************************************************************************/
// Return pointer to trainIdx. Function DOES NOT FILL this matrix!
ML_IMPL
const CvMat* cvCrossValGetTrainIdxMatrix (const CvStatModel* estimateModel)
{
CvMat* result = NULL;
CV_FUNCNAME ("cvCrossValGetTrainIdxMatrix");
__BEGIN__
if (!CV_IS_CROSSVAL(estimateModel))
{
CV_ERROR (CV_StsBadArg, "Pointer point to not CvCrossValidationModel");
}
result = ((CvCrossValidationModel*)estimateModel)->sampleIdxTrain;
__END__
return result;
} // End of cvCrossValGetTrainIdxMatrix
/****************************************************************************************/
// Return pointer to checkIdx. Function DOES NOT FILL this matrix!
ML_IMPL
const CvMat* cvCrossValGetCheckIdxMatrix (const CvStatModel* estimateModel)
{
CvMat* result = NULL;
CV_FUNCNAME ("cvCrossValGetCheckIdxMatrix");
__BEGIN__
if (!CV_IS_CROSSVAL (estimateModel))
{
CV_ERROR (CV_StsBadArg, "Pointer point to not CvCrossValidationModel");
}
result = ((CvCrossValidationModel*)estimateModel)->sampleIdxEval;
__END__
return result;
} // End of cvCrossValGetCheckIdxMatrix
/****************************************************************************************/
// Create new Idx-matrix for next classifiers training and return code of result.
// Result is 0 if function can't make next step (error input or folds are finished),
// it is 1 if all was correct, and it is 2 if current fold wasn't' checked.
ML_IMPL
int cvCrossValNextStep (CvStatModel* estimateModel)
{
int result = 0;
CV_FUNCNAME ("cvCrossValGetNextTrainIdx");
__BEGIN__
CvCrossValidationModel* crVal = (CvCrossValidationModel*) estimateModel;
int k, fold;
if (!CV_IS_CROSSVAL (estimateModel))
{
CV_ERROR (CV_StsBadArg, "Pointer point to not CvCrossValidationModel");
}
fold = ++crVal->current_fold;
if (fold >= crVal->folds_all)
{
if (fold == crVal->folds_all)
EXIT;
else
{
CV_ERROR (CV_StsInternal, "All iterations has end long ago");
}
}
k = crVal->folds[fold + 1] - crVal->folds[fold];
crVal->sampleIdxTrain->data.i = crVal->sampleIdxAll + crVal->folds[fold + 1];
crVal->sampleIdxTrain->cols = crVal->samples_all - k;
crVal->sampleIdxEval->data.i = crVal->sampleIdxAll + crVal->folds[fold];
crVal->sampleIdxEval->cols = k;
if (crVal->is_checked)
{
crVal->is_checked = 0;
result = 1;
}
else
{
result = 2;
}
__END__
return result;
}
/****************************************************************************************/
// Do checking part of loop of cross-validations metod.
ML_IMPL
void cvCrossValCheckClassifier (CvStatModel* estimateModel,
const CvStatModel* model,
const CvMat* trainData,
int sample_t_flag,
const CvMat* trainClasses)
{
CV_FUNCNAME ("cvCrossValCheckClassifier ");
__BEGIN__
CvCrossValidationModel* crVal = (CvCrossValidationModel*) estimateModel;
int i, j, k;
int* data;
float* responses_fl;
int step;
float* responses_result;
int* responses_i;
double te, te1;
double sum_c, sum_p, sum_pp, sum_cp, sum_cc, sq_err;
// Check input data to correct values.
if (!CV_IS_CROSSVAL (estimateModel))
{
CV_ERROR (CV_StsBadArg,"First parameter point to not CvCrossValidationModel");
}
if (!CV_IS_STAT_MODEL (model))
{
CV_ERROR (CV_StsBadArg, "Second parameter point to not CvStatModel");
}
if (!CV_IS_MAT (trainData))
{
CV_ERROR (CV_StsBadArg, "Third parameter point to not CvMat");
}
if (!CV_IS_MAT (trainClasses))
{
CV_ERROR (CV_StsBadArg, "Fifth parameter point to not CvMat");
}
if (crVal->is_checked)
{
CV_ERROR (CV_StsInternal, "This iterations already was checked");
}
// Initialize.
k = crVal->sampleIdxEval->cols;
data = crVal->sampleIdxEval->data.i;
// Eval tested feature vectors.
CV_CALL (cvStatModelMultiPredict (model, trainData, sample_t_flag,
crVal->predict_results, NULL, crVal->sampleIdxEval));
// Count number if correct results.
responses_result = crVal->predict_results->data.fl;
if (crVal->is_regression)
{
sum_c = sum_p = sum_pp = sum_cp = sum_cc = sq_err = 0;
if (CV_MAT_TYPE (trainClasses->type) == CV_32FC1)
{
responses_fl = trainClasses->data.fl;
step = trainClasses->rows == 1 ? 1 : trainClasses->step / sizeof(float);
for (i = 0; i < k; i++)
{
te = responses_result[*data];
te1 = responses_fl[*data * step];
sum_c += te1;
sum_p += te;
sum_cc += te1 * te1;
sum_pp += te * te;
sum_cp += te1 * te;
te -= te1;
sq_err += te * te;
data++;
}
}
else
{
responses_i = trainClasses->data.i;
step = trainClasses->rows == 1 ? 1 : trainClasses->step / sizeof(int);
for (i = 0; i < k; i++)
{
te = responses_result[*data];
te1 = responses_i[*data * step];
sum_c += te1;
sum_p += te;
sum_cc += te1 * te1;
sum_pp += te * te;
sum_cp += te1 * te;
te -= te1;
sq_err += te * te;
data++;
}
}
// Fixing new internal values of accuracy.
crVal->sum_correct += sum_c;
crVal->sum_predict += sum_p;
crVal->sum_cc += sum_cc;
crVal->sum_pp += sum_pp;
crVal->sum_cp += sum_cp;
crVal->sq_error += sq_err;
}
else
{
if (CV_MAT_TYPE (trainClasses->type) == CV_32FC1)
{
responses_fl = trainClasses->data.fl;
step = trainClasses->rows == 1 ? 1 : trainClasses->step / sizeof(float);
for (i = 0, j = 0; i < k; i++)
{
if (cvRound (responses_result[*data]) == cvRound (responses_fl[*data * step]))
j++;
data++;
}
}
else
{
responses_i = trainClasses->data.i;
step = trainClasses->rows == 1 ? 1 : trainClasses->step / sizeof(int);
for (i = 0, j = 0; i < k; i++)
{
if (cvRound (responses_result[*data]) == responses_i[*data * step])
j++;
data++;
}
}
// Fixing new internal values of accuracy.
crVal->correct_results += j;
}
// Fixing that this fold already checked.
crVal->all_results += k;
crVal->is_checked = 1;
__END__
} // End of cvCrossValCheckClassifier
/****************************************************************************************/
// Return current accuracy.
ML_IMPL
float cvCrossValGetResult (const CvStatModel* estimateModel,
float* correlation)
{
float result = 0;
CV_FUNCNAME ("cvCrossValGetResult");
__BEGIN__
double te, te1;
CvCrossValidationModel* crVal = (CvCrossValidationModel*)estimateModel;
if (!CV_IS_CROSSVAL (estimateModel))
{
CV_ERROR (CV_StsBadArg, "Pointer point to not CvCrossValidationModel");
}
if (crVal->all_results)
{
if (crVal->is_regression)
{
result = ((float)crVal->sq_error) / crVal->all_results;
if (correlation)
{
te = crVal->all_results * crVal->sum_cp -
crVal->sum_correct * crVal->sum_predict;
te *= te;
te1 = (crVal->all_results * crVal->sum_cc -
crVal->sum_correct * crVal->sum_correct) *
(crVal->all_results * crVal->sum_pp -
crVal->sum_predict * crVal->sum_predict);
*correlation = (float)(te / te1);
}
}
else
{
result = ((float)crVal->correct_results) / crVal->all_results;
}
}
__END__
return result;
}
/****************************************************************************************/
// Reset cross-validation EstimateModel to state the same as it was immidiatly after
// its creating.
ML_IMPL
void cvCrossValReset (CvStatModel* estimateModel)
{
CV_FUNCNAME ("cvCrossValReset");
__BEGIN__
CvCrossValidationModel* crVal = (CvCrossValidationModel*)estimateModel;
if (!CV_IS_CROSSVAL (estimateModel))
{
CV_ERROR (CV_StsBadArg, "Pointer point to not CvCrossValidationModel");
}
crVal->current_fold = -1;
crVal->is_checked = 1;
crVal->all_results = 0;
crVal->correct_results = 0;
crVal->sq_error = 0;
crVal->sum_correct = 0;
crVal->sum_predict = 0;
crVal->sum_cc = 0;
crVal->sum_pp = 0;
crVal->sum_cp = 0;
__END__
}
/****************************************************************************************/
// This function is standart CvStatModel field to release cross-validation EstimateModel.
ML_IMPL
void cvReleaseCrossValidationModel (CvStatModel** model)
{
CvCrossValidationModel* pModel;
CV_FUNCNAME ("cvReleaseCrossValidationModel");
__BEGIN__
if (!model)
{
CV_ERROR (CV_StsNullPtr, "");
}
pModel = (CvCrossValidationModel*)*model;
if (!pModel)
{
return;
}
if (!CV_IS_CROSSVAL (pModel))
{
CV_ERROR (CV_StsBadArg, "");
}
cvFree (&pModel->sampleIdxAll);
cvFree (&pModel->folds);
cvReleaseMat (&pModel->sampleIdxEval);
cvReleaseMat (&pModel->sampleIdxTrain);
cvReleaseMat (&pModel->predict_results);
cvFree (model);
__END__
} // End of cvReleaseCrossValidationModel.
/****************************************************************************************/
// This function create cross-validation EstimateModel.
ML_IMPL CvStatModel*
cvCreateCrossValidationEstimateModel(
int samples_all,
const CvStatModelParams* estimateParams,
const CvMat* sampleIdx)
{
CvStatModel* model = NULL;
CvCrossValidationModel* crVal = NULL;
CV_FUNCNAME ("cvCreateCrossValidationEstimateModel");
__BEGIN__
int k_fold = 10;
int i, j, k, s_len;
int samples_selected;
CvRNG rng;
CvRNG* prng;
int* res_s_data;
int* te_s_data;
int* folds;
rng = cvRNG(cvGetTickCount());
cvRandInt (&rng); cvRandInt (&rng); cvRandInt (&rng); cvRandInt (&rng);
// Check input parameters.
if (estimateParams)
k_fold = ((CvCrossValidationParams*)estimateParams)->k_fold;
if (!k_fold)
{
CV_ERROR (CV_StsBadArg, "Error in parameters of cross-validation (k_fold == 0)!");
}
if (samples_all <= 0)
{
CV_ERROR (CV_StsBadArg, "<samples_all> should be positive!");
}
// Alloc memory and fill standart StatModel's fields.
CV_CALL (crVal = (CvCrossValidationModel*)cvCreateStatModel (
CV_STAT_MODEL_MAGIC_VAL | CV_CROSSVAL_MAGIC_VAL,
sizeof(CvCrossValidationModel),
cvReleaseCrossValidationModel,
NULL, NULL));
crVal->current_fold = -1;
crVal->folds_all = k_fold;
if (estimateParams && ((CvCrossValidationParams*)estimateParams)->is_regression)
crVal->is_regression = 1;
else
crVal->is_regression = 0;
if (estimateParams && ((CvCrossValidationParams*)estimateParams)->rng)
prng = ((CvCrossValidationParams*)estimateParams)->rng;
else
prng = &rng;
// Check and preprocess sample indices.
if (sampleIdx)
{
int s_step;
int s_type = 0;
if (!CV_IS_MAT (sampleIdx))
CV_ERROR (CV_StsBadArg, "Invalid sampleIdx array");
if (sampleIdx->rows != 1 && sampleIdx->cols != 1)
CV_ERROR (CV_StsBadSize, "sampleIdx array must be 1-dimensional");
s_len = sampleIdx->rows + sampleIdx->cols - 1;
s_step = sampleIdx->rows == 1 ?
1 : sampleIdx->step / CV_ELEM_SIZE(sampleIdx->type);
s_type = CV_MAT_TYPE (sampleIdx->type);
switch (s_type)
{
case CV_8UC1:
case CV_8SC1:
{
uchar* s_data = sampleIdx->data.ptr;
// sampleIdx is array of 1's and 0's -
// i.e. it is a mask of the selected samples
if( s_len != samples_all )
CV_ERROR (CV_StsUnmatchedSizes,
"Sample mask should contain as many elements as the total number of samples");
samples_selected = 0;
for (i = 0; i < s_len; i++)
samples_selected += s_data[i * s_step] != 0;
if (samples_selected == 0)
CV_ERROR (CV_StsOutOfRange, "No samples is selected!");
}
s_len = samples_selected;
break;
case CV_32SC1:
if (s_len > samples_all)
CV_ERROR (CV_StsOutOfRange,
"sampleIdx array may not contain more elements than the total number of samples");
samples_selected = s_len;
break;
default:
CV_ERROR (CV_StsUnsupportedFormat, "Unsupported sampleIdx array data type "
"(it should be 8uC1, 8sC1 or 32sC1)");
}
// Alloc additional memory for internal Idx and fill it.
/*!!*/ CV_CALL (res_s_data = crVal->sampleIdxAll =
(int*)cvAlloc (2 * s_len * sizeof(int)));
if (s_type < CV_32SC1)
{
uchar* s_data = sampleIdx->data.ptr;
for (i = 0; i < s_len; i++)
if (s_data[i * s_step])
{
*res_s_data++ = i;
}
res_s_data = crVal->sampleIdxAll;
}
else
{
int* s_data = sampleIdx->data.i;
int out_of_order = 0;
for (i = 0; i < s_len; i++)
{
res_s_data[i] = s_data[i * s_step];
if (i > 0 && res_s_data[i] < res_s_data[i - 1])
out_of_order = 1;
}
if (out_of_order)
qsort (res_s_data, s_len, sizeof(res_s_data[0]), icvCmpIntegers);
if (res_s_data[0] < 0 ||
res_s_data[s_len - 1] >= samples_all)
CV_ERROR (CV_StsBadArg, "There are out-of-range sample indices");
for (i = 1; i < s_len; i++)
if (res_s_data[i] <= res_s_data[i - 1])
CV_ERROR (CV_StsBadArg, "There are duplicated");
}
}
else // if (sampleIdx)
{
// Alloc additional memory for internal Idx and fill it.
s_len = samples_all;
CV_CALL (res_s_data = crVal->sampleIdxAll = (int*)cvAlloc (2 * s_len * sizeof(int)));
for (i = 0; i < s_len; i++)
{
*res_s_data++ = i;
}
res_s_data = crVal->sampleIdxAll;
} // if (sampleIdx) ... else
// Resort internal Idx.
te_s_data = res_s_data + s_len;
for (i = s_len; i > 1; i--)
{
j = cvRandInt (prng) % i;
k = *(--te_s_data);
*te_s_data = res_s_data[j];
res_s_data[j] = k;
}
// Duplicate resorted internal Idx.
// It will be used to simplify operation of getting trainIdx.
te_s_data = res_s_data + s_len;
for (i = 0; i < s_len; i++)
{
*te_s_data++ = *res_s_data++;
}
// Cut sampleIdxAll to parts.
if (k_fold > 0)
{
if (k_fold > s_len)
{
CV_ERROR (CV_StsBadArg,
"Error in parameters of cross-validation ('k_fold' > #samples)!");
}
folds = crVal->folds = (int*) cvAlloc ((k_fold + 1) * sizeof (int));
*folds++ = 0;
for (i = 1; i < k_fold; i++)
{
*folds++ = cvRound (i * s_len * 1. / k_fold);
}
*folds = s_len;
folds = crVal->folds;
crVal->max_fold_size = (s_len - 1) / k_fold + 1;
}
else
{
k = -k_fold;
crVal->max_fold_size = k;
if (k >= s_len)
{
CV_ERROR (CV_StsBadArg,
"Error in parameters of cross-validation (-'k_fold' > #samples)!");
}
crVal->folds_all = k = (s_len - 1) / k + 1;
folds = crVal->folds = (int*) cvAlloc ((k + 1) * sizeof (int));
for (i = 0; i < k; i++)
{
*folds++ = -i * k_fold;
}
*folds = s_len;
folds = crVal->folds;
}
// Prepare other internal fields to working.
CV_CALL (crVal->predict_results = cvCreateMat (1, samples_all, CV_32FC1));
CV_CALL (crVal->sampleIdxEval = cvCreateMatHeader (1, 1, CV_32SC1));
CV_CALL (crVal->sampleIdxTrain = cvCreateMatHeader (1, 1, CV_32SC1));
crVal->sampleIdxEval->cols = 0;
crVal->sampleIdxTrain->cols = 0;
crVal->samples_all = s_len;
crVal->is_checked = 1;
crVal->getTrainIdxMat = cvCrossValGetTrainIdxMatrix;
crVal->getCheckIdxMat = cvCrossValGetCheckIdxMatrix;
crVal->nextStep = cvCrossValNextStep;
crVal->check = cvCrossValCheckClassifier;
crVal->getResult = cvCrossValGetResult;
crVal->reset = cvCrossValReset;
model = (CvStatModel*)crVal;
__END__
if (!model)
{
cvReleaseCrossValidationModel ((CvStatModel**)&crVal);
}
return model;
} // End of cvCreateCrossValidationEstimateModel
/****************************************************************************************\
* Extended interface with backcalls for models *
\****************************************************************************************/
ML_IMPL float
cvCrossValidation (const CvMat* trueData,
int tflag,
const CvMat* trueClasses,
CvStatModel* (*createClassifier) (const CvMat*,
int,
const CvMat*,
const CvClassifierTrainParams*,
const CvMat*,
const CvMat*,
const CvMat*,
const CvMat*),
const CvClassifierTrainParams* estimateParams,
const CvClassifierTrainParams* trainParams,
const CvMat* compIdx,
const CvMat* sampleIdx,
CvStatModel** pCrValModel,
const CvMat* typeMask,
const CvMat* missedMeasurementMask)
{
CvCrossValidationModel* crVal = NULL;
float result = 0;
CvStatModel* pClassifier = NULL;
CV_FUNCNAME ("cvCrossValidation");
__BEGIN__
const CvMat* trainDataIdx;
int samples_all;
// checking input data
if ((createClassifier) == NULL)
{
CV_ERROR (CV_StsNullPtr, "Null pointer to functiion which create classifier");
}
if (pCrValModel && *pCrValModel && !CV_IS_CROSSVAL(*pCrValModel))
{
CV_ERROR (CV_StsBadArg,
"<pCrValModel> point to not cross-validation model");
}
// initialization
if (pCrValModel && *pCrValModel)
{
crVal = (CvCrossValidationModel*)*pCrValModel;
crVal->reset ((CvStatModel*)crVal);
}
else
{
samples_all = ((tflag) ? trueData->rows : trueData->cols);
CV_CALL (crVal = (CvCrossValidationModel*)
cvCreateCrossValidationEstimateModel (samples_all, estimateParams, sampleIdx));
}
CV_CALL (trainDataIdx = crVal->getTrainIdxMat ((CvStatModel*)crVal));
// operation loop
for (; crVal->nextStep((CvStatModel*)crVal) != 0; )
{
CV_CALL (pClassifier = createClassifier (trueData, tflag, trueClasses,
trainParams, compIdx, trainDataIdx, typeMask, missedMeasurementMask));
CV_CALL (crVal->check ((CvStatModel*)crVal, pClassifier,
trueData, tflag, trueClasses));
pClassifier->release (&pClassifier);
}
// Get result and fill output field.
CV_CALL (result = crVal->getResult ((CvStatModel*)crVal, 0));
if (pCrValModel && !*pCrValModel)
*pCrValModel = (CvStatModel*)crVal;
__END__
// Free all memory that should be freed.
if (pClassifier)
pClassifier->release (&pClassifier);
if (crVal && (!pCrValModel || !*pCrValModel))
crVal->release ((CvStatModel**)&crVal);
return result;
} // End of cvCrossValidation
#endif
/* End of file */

@ -2,6 +2,8 @@
#include "precomp.hpp"
#include <time.h>
#if 0
#define pCvSeq CvSeq*
#define pCvDTreeNode CvDTreeNode*
@ -1359,3 +1361,6 @@ float CvGBTrees::predict( const cv::Mat& sample, const cv::Mat& _missing,
return predict(&_sample, _missing.empty() ? 0 : &miss, 0,
slice==cv::Range::all() ? CV_WHOLE_SEQ : cvSlice(slice.start, slice.end), k);
}
#endif

File diff suppressed because it is too large Load Diff

@ -7,9 +7,11 @@
// copy or use the software.
//
//
// Intel License Agreement
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2014, Itseez Inc, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
@ -22,7 +24,7 @@
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
@ -44,439 +46,311 @@
* K-Nearest Neighbors Classifier *
\****************************************************************************************/
// k Nearest Neighbors
CvKNearest::CvKNearest()
{
samples = 0;
clear();
}
CvKNearest::~CvKNearest()
{
clear();
}
namespace cv {
namespace ml {
CvKNearest::CvKNearest( const CvMat* _train_data, const CvMat* _responses,
const CvMat* _sample_idx, bool _is_regression, int _max_k )
class KNearestImpl : public KNearest
{
samples = 0;
train( _train_data, _responses, _sample_idx, _is_regression, _max_k, false );
}
void CvKNearest::clear()
{
while( samples )
public:
KNearestImpl(bool __isClassifier=true)
{
CvVectors* next_samples = samples->next;
cvFree( &samples->data.fl );
cvFree( &samples );
samples = next_samples;
defaultK = 3;
_isClassifier = __isClassifier;
}
var_count = 0;
total = 0;
max_k = 0;
}
virtual ~KNearestImpl() {}
int CvKNearest::get_max_k() const { return max_k; }
bool isClassifier() const { return _isClassifier; }
bool isTrained() const { return !samples.empty(); }
int CvKNearest::get_var_count() const { return var_count; }
String getDefaultModelName() const { return "opencv_ml_knn"; }
bool CvKNearest::is_regression() const { return regression; }
int CvKNearest::get_sample_count() const { return total; }
bool CvKNearest::train( const CvMat* _train_data, const CvMat* _responses,
const CvMat* _sample_idx, bool _is_regression,
int _max_k, bool _update_base )
{
bool ok = false;
CvMat* responses = 0;
void clear()
{
samples.release();
responses.release();
}
CV_FUNCNAME( "CvKNearest::train" );
int getVarCount() const { return samples.cols; }
__BEGIN__;
bool train( const Ptr<TrainData>& data, int flags )
{
Mat new_samples = data->getTrainSamples(ROW_SAMPLE);
Mat new_responses;
data->getTrainResponses().convertTo(new_responses, CV_32F);
bool update = (flags & UPDATE_MODEL) != 0 && !samples.empty();
CvVectors* _samples = 0;
float** _data = 0;
int _count = 0, _dims = 0, _dims_all = 0, _rsize = 0;
CV_Assert( new_samples.type() == CV_32F );
if( !_update_base )
if( !update )
{
clear();
// Prepare training data and related parameters.
// Treat categorical responses as ordered - to prevent class label compression and
// to enable entering new classes in the updates
CV_CALL( cvPrepareTrainData( "CvKNearest::train", _train_data, CV_ROW_SAMPLE,
_responses, CV_VAR_ORDERED, 0, _sample_idx, true, (const float***)&_data,
&_count, &_dims, &_dims_all, &responses, 0, 0 ));
if( !responses )
CV_ERROR( CV_StsNoMem, "Could not allocate memory for responses" );
if( _update_base && _dims != var_count )
CV_ERROR( CV_StsBadArg, "The newly added data have different dimensionality" );
if( !_update_base )
}
else
{
if( _max_k < 1 )
CV_ERROR( CV_StsOutOfRange, "max_k must be a positive number" );
regression = _is_regression;
var_count = _dims;
max_k = _max_k;
CV_Assert( new_samples.cols == samples.cols &&
new_responses.cols == responses.cols );
}
_rsize = _count*sizeof(float);
CV_CALL( _samples = (CvVectors*)cvAlloc( sizeof(*_samples) + _rsize ));
_samples->next = samples;
_samples->type = CV_32F;
_samples->data.fl = _data;
_samples->count = _count;
total += _count;
samples.push_back(new_samples);
responses.push_back(new_responses);
samples = _samples;
memcpy( _samples + 1, responses->data.fl, _rsize );
ok = true;
__END__;
if( responses && responses->data.ptr != _responses->data.ptr )
cvReleaseMat(&responses);
return ok;
}
return true;
}
void findNearestCore( const Mat& _samples, int k0, const Range& range,
Mat* results, Mat* neighbor_responses,
Mat* dists, float* presult ) const
{
int testidx, baseidx, i, j, d = samples.cols, nsamples = samples.rows;
int testcount = range.end - range.start;
int k = std::min(k0, nsamples);
AutoBuffer<float> buf(testcount*k*2);
float* dbuf = buf;
float* rbuf = dbuf + testcount*k;
void CvKNearest::find_neighbors_direct( const CvMat* _samples, int k, int start, int end,
float* neighbor_responses, const float** neighbors, float* dist ) const
{
int i, j, count = end - start, k1 = 0, k2 = 0, d = var_count;
CvVectors* s = samples;
const float* rptr = responses.ptr<float>();
for( ; s != 0; s = s->next )
for( testidx = 0; testidx < testcount; testidx++ )
{
int n = s->count;
for( j = 0; j < n; j++ )
for( i = 0; i < k; i++ )
{
for( i = 0; i < count; i++ )
dbuf[testidx*k + i] = FLT_MAX;
rbuf[testidx*k + i] = 0.f;
}
}
for( baseidx = 0; baseidx < nsamples; baseidx++ )
{
double sum = 0;
Cv32suf si;
const float* v = s->data.fl[j];
const float* u = (float*)(_samples->data.ptr + _samples->step*(start + i));
Cv32suf* dd = (Cv32suf*)(dist + i*k);
float* nr;
const float** nn;
int t, ii, ii1;
for( testidx = 0; testidx < testcount; testidx++ )
{
const float* v = samples.ptr<float>(baseidx);
const float* u = _samples.ptr<float>(testidx + range.start);
for( t = 0; t <= d - 4; t += 4 )
float s = 0;
for( i = 0; i <= d - 4; i += 4 )
{
double t0 = u[t] - v[t], t1 = u[t+1] - v[t+1];
double t2 = u[t+2] - v[t+2], t3 = u[t+3] - v[t+3];
sum += t0*t0 + t1*t1 + t2*t2 + t3*t3;
float t0 = u[i] - v[i], t1 = u[i+1] - v[i+1];
float t2 = u[i+2] - v[i+2], t3 = u[i+3] - v[i+3];
s += t0*t0 + t1*t1 + t2*t2 + t3*t3;
}
for( ; t < d; t++ )
for( ; i < d; i++ )
{
double t0 = u[t] - v[t];
sum += t0*t0;
float t0 = u[i] - v[i];
s += t0*t0;
}
si.f = (float)sum;
for( ii = k1-1; ii >= 0; ii-- )
if( si.i > dd[ii].i )
Cv32suf si;
si.f = (float)s;
Cv32suf* dd = (Cv32suf*)(&dbuf[testidx*k]);
float* nr = &rbuf[testidx*k];
for( i = k; i > 0; i-- )
if( si.i >= dd[i-1].i )
break;
if( ii >= k-1 )
if( i >= k )
continue;
nr = neighbor_responses + i*k;
nn = neighbors ? neighbors + (start + i)*k : 0;
for( ii1 = k2 - 1; ii1 > ii; ii1-- )
for( j = k-2; j >= i; j-- )
{
dd[ii1+1].i = dd[ii1].i;
nr[ii1+1] = nr[ii1];
if( nn ) nn[ii1+1] = nn[ii1];
}
dd[ii+1].i = si.i;
nr[ii+1] = ((float*)(s + 1))[j];
if( nn )
nn[ii+1] = v;
dd[j+1].i = dd[j].i;
nr[j+1] = nr[j];
}
k1 = MIN( k1+1, k );
k2 = MIN( k1, k-1 );
dd[i].i = si.i;
nr[i] = rptr[baseidx];
}
}
}
float CvKNearest::write_results( int k, int k1, int start, int end,
const float* neighbor_responses, const float* dist,
CvMat* _results, CvMat* _neighbor_responses,
CvMat* _dist, Cv32suf* sort_buf ) const
{
float result = 0.f;
int i, j, j1, count = end - start;
double inv_scale = 1./k1;
int rstep = _results && !CV_IS_MAT_CONT(_results->type) ? _results->step/sizeof(result) : 1;
float inv_scale = 1./k;
for( i = 0; i < count; i++ )
for( testidx = 0; testidx < testcount; testidx++ )
{
const Cv32suf* nr = (const Cv32suf*)(neighbor_responses + i*k);
float* dst;
float r;
if( _results || start+i == 0 )
if( neighbor_responses )
{
if( regression )
float* nr = neighbor_responses->ptr<float>(testidx + range.start);
for( j = 0; j < k; j++ )
nr[j] = rbuf[testidx*k + j];
for( ; j < k0; j++ )
nr[j] = 0.f;
}
if( dists )
{
double s = 0;
for( j = 0; j < k1; j++ )
s += nr[j].f;
r = (float)(s*inv_scale);
float* dptr = dists->ptr<float>(testidx + range.start);
for( j = 0; j < k; j++ )
dptr[j] = dbuf[testidx*k + j];
for( ; j < k0; j++ )
dptr[j] = 0.f;
}
if( results || testidx+range.start == 0 )
{
if( !_isClassifier || k == 1 )
{
float s = 0.f;
for( j = 0; j < k; j++ )
s += rbuf[testidx*k + j];
result = (float)(s*inv_scale);
}
else
{
int prev_start = 0, best_count = 0, cur_count;
Cv32suf best_val;
for( j = 0; j < k1; j++ )
sort_buf[j].i = nr[j].i;
for( j = k1-1; j > 0; j-- )
float* rp = rbuf + testidx*k;
for( j = k-1; j > 0; j-- )
{
bool swap_fl = false;
for( j1 = 0; j1 < j; j1++ )
if( sort_buf[j1].i > sort_buf[j1+1].i )
for( i = 0; i < j; i++ )
{
int t;
CV_SWAP( sort_buf[j1].i, sort_buf[j1+1].i, t );
if( rp[i] > rp[i+1] )
{
std::swap(rp[i], rp[i+1]);
swap_fl = true;
}
}
if( !swap_fl )
break;
}
best_val.i = 0;
for( j = 1; j <= k1; j++ )
if( j == k1 || sort_buf[j].i != sort_buf[j-1].i )
result = rp[0];
int prev_start = 0;
int best_count = 0;
for( j = 1; j <= k; j++ )
{
cur_count = j - prev_start;
if( best_count < cur_count )
if( j == k || rp[j] != rp[j-1] )
{
best_count = cur_count;
best_val.i = sort_buf[j-1].i;
int count = j - prev_start;
if( best_count < count )
{
best_count = count;
result = rp[j-1];
}
prev_start = j;
}
r = best_val.f;
}
if( start+i == 0 )
result = r;
if( _results )
_results->data.fl[(start + i)*rstep] = r;
}
if( _neighbor_responses )
{
dst = (float*)(_neighbor_responses->data.ptr +
(start + i)*_neighbor_responses->step);
for( j = 0; j < k1; j++ )
dst[j] = nr[j].f;
for( ; j < k; j++ )
dst[j] = 0.f;
if( results )
results->at<float>(testidx + range.start) = result;
if( presult && testidx+range.start == 0 )
*presult = result;
}
if( _dist )
{
dst = (float*)(_dist->data.ptr + (start + i)*_dist->step);
for( j = 0; j < k1; j++ )
dst[j] = dist[j + i*k];
for( ; j < k; j++ )
dst[j] = 0.f;
}
}
return result;
}
struct P1 : cv::ParallelLoopBody {
P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors,
int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result)
struct findKNearestInvoker : public ParallelLoopBody
{
findKNearestInvoker(const KNearestImpl* _p, int _k, const Mat& __samples,
Mat* __results, Mat* __neighbor_responses, Mat* __dists, float* _presult)
{
pointer = _pointer;
p = _p;
k = _k;
_samples = __samples;
_neighbors = __neighbors;
k1 = _k1;
_samples = &__samples;
_results = __results;
_neighbor_responses = __neighbor_responses;
_dist = __dist;
result = _result;
buf_sz = _buf_sz;
_dists = __dists;
presult = _presult;
}
const CvKNearest* pointer;
int k;
const CvMat* _samples;
const float** _neighbors;
int k1;
CvMat* _results;
CvMat* _neighbor_responses;
CvMat* _dist;
float* result;
int buf_sz;
void operator()( const cv::Range& range ) const
void operator()( const Range& range ) const
{
cv::AutoBuffer<float> buf(buf_sz);
for(int i = range.start; i < range.end; i += 1 )
int delta = std::min(range.end - range.start, 256);
for( int start = range.start; start < range.end; start += delta )
{
float* neighbor_responses = &buf[0];
float* dist = neighbor_responses + 1*k;
Cv32suf* sort_buf = (Cv32suf*)(dist + 1*k);
pointer->find_neighbors_direct( _samples, k, i, i + 1,
neighbor_responses, _neighbors, dist );
float r = pointer->write_results( k, k1, i, i + 1, neighbor_responses, dist,
_results, _neighbor_responses, _dist, sort_buf );
if( i == 0 )
*result = r;
p->findNearestCore( *_samples, k, Range(start, std::min(start + delta, range.end)),
_results, _neighbor_responses, _dists, presult );
}
}
};
float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* _results,
const float** _neighbors, CvMat* _neighbor_responses, CvMat* _dist ) const
{
const KNearestImpl* p;
int k;
const Mat* _samples;
Mat* _results;
Mat* _neighbor_responses;
Mat* _dists;
float* presult;
};
float findNearest( InputArray _samples, int k,
OutputArray _results,
OutputArray _neighborResponses,
OutputArray _dists ) const
{
float result = 0.f;
const int max_blk_count = 128, max_buf_sz = 1 << 12;
if( !samples )
CV_Error( CV_StsError, "The search tree must be constructed first using train method" );
if( !CV_IS_MAT(_samples) ||
CV_MAT_TYPE(_samples->type) != CV_32FC1 ||
_samples->cols != var_count )
CV_Error( CV_StsBadArg, "Input samples must be floating-point matrix (<num_samples>x<var_count>)" );
CV_Assert( 0 < k );
if( _results && (!CV_IS_MAT(_results) ||
(_results->cols != 1 && _results->rows != 1) ||
_results->cols + _results->rows - 1 != _samples->rows) )
CV_Error( CV_StsBadArg,
"The results must be 1d vector containing as much elements as the number of samples" );
Mat test_samples = _samples.getMat();
CV_Assert( test_samples.type() == CV_32F && test_samples.cols == samples.cols );
int testcount = test_samples.rows;
if( _results && CV_MAT_TYPE(_results->type) != CV_32FC1 &&
(CV_MAT_TYPE(_results->type) != CV_32SC1 || regression))
CV_Error( CV_StsUnsupportedFormat,
"The results must be floating-point or integer (in case of classification) vector" );
if( k < 1 || k > max_k )
CV_Error( CV_StsOutOfRange, "k must be within 1..max_k range" );
if( _neighbor_responses )
if( testcount == 0 )
{
if( !CV_IS_MAT(_neighbor_responses) || CV_MAT_TYPE(_neighbor_responses->type) != CV_32FC1 ||
_neighbor_responses->rows != _samples->rows || _neighbor_responses->cols != k )
CV_Error( CV_StsBadArg,
"The neighbor responses (if present) must be floating-point matrix of <num_samples> x <k> size" );
_results.release();
_neighborResponses.release();
_dists.release();
return 0.f;
}
if( _dist )
Mat res, nr, d, *pres = 0, *pnr = 0, *pd = 0;
if( _results.needed() )
{
if( !CV_IS_MAT(_dist) || CV_MAT_TYPE(_dist->type) != CV_32FC1 ||
_dist->rows != _samples->rows || _dist->cols != k )
CV_Error( CV_StsBadArg,
"The distances from the neighbors (if present) must be floating-point matrix of <num_samples> x <k> size" );
_results.create(testcount, 1, CV_32F);
pres = &(res = _results.getMat());
}
if( _neighborResponses.needed() )
{
_neighborResponses.create(testcount, k, CV_32F);
pnr = &(nr = _neighborResponses.getMat());
}
if( _dists.needed() )
{
_dists.create(testcount, k, CV_32F);
pd = &(d = _dists.getMat());
}
int count = _samples->rows;
int count_scale = k*2;
int blk_count0 = MIN( count, max_blk_count );
int buf_sz = MIN( blk_count0 * count_scale, max_buf_sz );
blk_count0 = MAX( buf_sz/count_scale, 1 );
blk_count0 += blk_count0 % 2;
blk_count0 = MIN( blk_count0, count );
buf_sz = blk_count0 * count_scale + k;
int k1 = get_sample_count();
k1 = MIN( k1, k );
cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
_results, _neighbor_responses, _dist, &result)
);
findKNearestInvoker invoker(this, k, test_samples, pres, pnr, pd, &result);
parallel_for_(Range(0, testcount), invoker);
//invoker(Range(0, testcount));
return result;
}
using namespace cv;
CvKNearest::CvKNearest( const Mat& _train_data, const Mat& _responses,
const Mat& _sample_idx, bool _is_regression, int _max_k )
{
samples = 0;
train(_train_data, _responses, _sample_idx, _is_regression, _max_k, false );
}
bool CvKNearest::train( const Mat& _train_data, const Mat& _responses,
const Mat& _sample_idx, bool _is_regression,
int _max_k, bool _update_base )
{
CvMat tdata = _train_data, responses = _responses, sidx = _sample_idx;
return train(&tdata, &responses, sidx.data.ptr ? &sidx : 0, _is_regression, _max_k, _update_base );
}
float CvKNearest::find_nearest( const Mat& _samples, int k, Mat* _results,
const float** _neighbors, Mat* _neighbor_responses,
Mat* _dist ) const
{
CvMat s = _samples, results, *presults = 0, nresponses, *pnresponses = 0, dist, *pdist = 0;
}
if( _results )
float predict(InputArray inputs, OutputArray outputs, int) const
{
if(!(_results->data && (_results->type() == CV_32F ||
(_results->type() == CV_32S && regression)) &&
(_results->cols == 1 || _results->rows == 1) &&
_results->cols + _results->rows - 1 == _samples.rows) )
_results->create(_samples.rows, 1, CV_32F);
presults = &(results = *_results);
return findNearest( inputs, defaultK, outputs, noArray(), noArray() );
}
if( _neighbor_responses )
void write( FileStorage& fs ) const
{
if(!(_neighbor_responses->data && _neighbor_responses->type() == CV_32F &&
_neighbor_responses->cols == k && _neighbor_responses->rows == _samples.rows) )
_neighbor_responses->create(_samples.rows, k, CV_32F);
pnresponses = &(nresponses = *_neighbor_responses);
fs << "is_classifier" << (int)_isClassifier;
fs << "samples" << samples;
fs << "responses" << responses;
}
if( _dist )
void read( const FileNode& fn )
{
if(!(_dist->data && _dist->type() == CV_32F &&
_dist->cols == k && _dist->rows == _samples.rows) )
_dist->create(_samples.rows, k, CV_32F);
pdist = &(dist = *_dist);
clear();
_isClassifier = (int)fn["is_classifier"] != 0;
fn["samples"] >> samples;
fn["responses"] >> responses;
}
return find_nearest(&s, k, presults, _neighbors, pnresponses, pdist );
}
void setDefaultK(int _k) { defaultK = _k; }
int getDefaultK() const { return defaultK; }
Mat samples;
Mat responses;
bool _isClassifier;
int defaultK;
};
float CvKNearest::find_nearest( const cv::Mat& _samples, int k, CV_OUT cv::Mat& results,
CV_OUT cv::Mat& neighborResponses, CV_OUT cv::Mat& dists) const
Ptr<KNearest> KNearest::create(bool isClassifier)
{
return find_nearest(_samples, k, &results, 0, &neighborResponses, &dists);
return makePtr<KNearestImpl>(isClassifier);
}
}
}
/* End of file */

@ -1,63 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
namespace cv
{
CV_INIT_ALGORITHM(EM, "StatModel.EM",
obj.info()->addParam(obj, "nclusters", obj.nclusters);
obj.info()->addParam(obj, "covMatType", obj.covMatType);
obj.info()->addParam(obj, "maxIters", obj.maxIters);
obj.info()->addParam(obj, "epsilon", obj.epsilon);
obj.info()->addParam(obj, "weights", obj.weights, true);
obj.info()->addParam(obj, "means", obj.means, true);
obj.info()->addParam(obj, "covs", obj.covs, true))
bool initModule_ml(void)
{
Ptr<Algorithm> em = createEM_ptr_hidden();
return em->info() != 0;
}
}

@ -40,207 +40,123 @@
#include "precomp.hpp"
CvNormalBayesClassifier::CvNormalBayesClassifier()
{
var_count = var_all = 0;
var_idx = 0;
cls_labels = 0;
count = 0;
sum = 0;
productsum = 0;
avg = 0;
inv_eigen_values = 0;
cov_rotate_mats = 0;
c = 0;
default_model_name = "my_nb";
}
namespace cv {
namespace ml {
NormalBayesClassifier::~NormalBayesClassifier() {}
void CvNormalBayesClassifier::clear()
class NormalBayesClassifierImpl : public NormalBayesClassifier
{
if( cls_labels )
public:
NormalBayesClassifierImpl()
{
for( int cls = 0; cls < cls_labels->cols; cls++ )
{
cvReleaseMat( &count[cls] );
cvReleaseMat( &sum[cls] );
cvReleaseMat( &productsum[cls] );
cvReleaseMat( &avg[cls] );
cvReleaseMat( &inv_eigen_values[cls] );
cvReleaseMat( &cov_rotate_mats[cls] );
}
nallvars = 0;
}
cvReleaseMat( &cls_labels );
cvReleaseMat( &var_idx );
cvReleaseMat( &c );
cvFree( &count );
}
CvNormalBayesClassifier::~CvNormalBayesClassifier()
{
clear();
}
CvNormalBayesClassifier::CvNormalBayesClassifier(
const CvMat* _train_data, const CvMat* _responses,
const CvMat* _var_idx, const CvMat* _sample_idx )
{
var_count = var_all = 0;
var_idx = 0;
cls_labels = 0;
count = 0;
sum = 0;
productsum = 0;
avg = 0;
inv_eigen_values = 0;
cov_rotate_mats = 0;
c = 0;
default_model_name = "my_nb";
train( _train_data, _responses, _var_idx, _sample_idx );
}
bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _responses,
const CvMat* _var_idx, const CvMat* _sample_idx, bool update )
{
bool train( const Ptr<TrainData>& trainData, int flags )
{
const float min_variation = FLT_EPSILON;
bool result = false;
CvMat* responses = 0;
const float** train_data = 0;
CvMat* __cls_labels = 0;
CvMat* __var_idx = 0;
CvMat* cov = 0;
Mat responses = trainData->getNormCatResponses();
Mat __cls_labels = trainData->getClassLabels();
Mat __var_idx = trainData->getVarIdx();
Mat samples = trainData->getTrainSamples();
int nclasses = (int)__cls_labels.total();
CV_FUNCNAME( "CvNormalBayesClassifier::train" );
int nvars = trainData->getNVars();
int s, c1, c2, cls;
__BEGIN__;
int cls, nsamples = 0, _var_count = 0, _var_all = 0, nclasses = 0;
int s, c1, c2;
const int* responses_data;
CV_CALL( cvPrepareTrainData( 0,
_train_data, CV_ROW_SAMPLE, _responses, CV_VAR_CATEGORICAL,
_var_idx, _sample_idx, false, &train_data,
&nsamples, &_var_count, &_var_all, &responses,
&__cls_labels, &__var_idx ));
int __nallvars = trainData->getNAllVars();
bool update = (flags & UPDATE_MODEL) != 0;
if( !update )
{
const size_t mat_size = sizeof(CvMat*);
size_t data_size;
nallvars = __nallvars;
count.resize(nclasses);
sum.resize(nclasses);
productsum.resize(nclasses);
avg.resize(nclasses);
inv_eigen_values.resize(nclasses);
cov_rotate_mats.resize(nclasses);
clear();
for( cls = 0; cls < nclasses; cls++ )
{
count[cls] = Mat::zeros( 1, nvars, CV_32SC1 );
sum[cls] = Mat::zeros( 1, nvars, CV_64FC1 );
productsum[cls] = Mat::zeros( nvars, nvars, CV_64FC1 );
avg[cls] = Mat::zeros( 1, nvars, CV_64FC1 );
inv_eigen_values[cls] = Mat::zeros( 1, nvars, CV_64FC1 );
cov_rotate_mats[cls] = Mat::zeros( nvars, nvars, CV_64FC1 );
}
var_idx = __var_idx;
cls_labels = __cls_labels;
__var_idx = __cls_labels = 0;
var_count = _var_count;
var_all = _var_all;
nclasses = cls_labels->cols;
data_size = nclasses*6*mat_size;
CV_CALL( count = (CvMat**)cvAlloc( data_size ));
memset( count, 0, data_size );
sum = count + nclasses;
productsum = sum + nclasses;
avg = productsum + nclasses;
inv_eigen_values= avg + nclasses;
cov_rotate_mats = inv_eigen_values + nclasses;
CV_CALL( c = cvCreateMat( 1, nclasses, CV_64FC1 ));
for( cls = 0; cls < nclasses; cls++ )
{
CV_CALL(count[cls] = cvCreateMat( 1, var_count, CV_32SC1 ));
CV_CALL(sum[cls] = cvCreateMat( 1, var_count, CV_64FC1 ));
CV_CALL(productsum[cls] = cvCreateMat( var_count, var_count, CV_64FC1 ));
CV_CALL(avg[cls] = cvCreateMat( 1, var_count, CV_64FC1 ));
CV_CALL(inv_eigen_values[cls] = cvCreateMat( 1, var_count, CV_64FC1 ));
CV_CALL(cov_rotate_mats[cls] = cvCreateMat( var_count, var_count, CV_64FC1 ));
CV_CALL(cvZero( count[cls] ));
CV_CALL(cvZero( sum[cls] ));
CV_CALL(cvZero( productsum[cls] ));
CV_CALL(cvZero( avg[cls] ));
CV_CALL(cvZero( inv_eigen_values[cls] ));
CV_CALL(cvZero( cov_rotate_mats[cls] ));
}
c.create(1, nclasses, CV_64FC1);
}
else
{
// check that the new training data has the same dimensionality etc.
if( _var_count != var_count || _var_all != var_all || !((!_var_idx && !var_idx) ||
(_var_idx && var_idx && cvNorm(_var_idx,var_idx,CV_C) < DBL_EPSILON)) )
CV_ERROR( CV_StsBadArg,
"The new training data is inconsistent with the original training data" );
if( cls_labels->cols != __cls_labels->cols ||
cvNorm(cls_labels, __cls_labels, CV_C) > DBL_EPSILON )
CV_ERROR( CV_StsNotImplemented,
"In the current implementation the new training data must have absolutely "
"the same set of class labels as used in the original training data" );
nclasses = cls_labels->cols;
if( nallvars != __nallvars ||
var_idx.size() != __var_idx.size() ||
norm(var_idx, __var_idx, NORM_INF) != 0 ||
cls_labels.size() != __cls_labels.size() ||
norm(cls_labels, __cls_labels, NORM_INF) != 0 )
CV_Error( CV_StsBadArg,
"The new training data is inconsistent with the original training data; varIdx and the class labels should be the same" );
}
responses_data = responses->data.i;
CV_CALL( cov = cvCreateMat( _var_count, _var_count, CV_64FC1 ));
Mat cov( nvars, nvars, CV_64FC1 );
int nsamples = samples.rows;
/* process train data (count, sum , productsum) */
// process train data (count, sum , productsum)
for( s = 0; s < nsamples; s++ )
{
cls = responses_data[s];
int* count_data = count[cls]->data.i;
double* sum_data = sum[cls]->data.db;
double* prod_data = productsum[cls]->data.db;
const float* train_vec = train_data[s];
cls = responses.at<int>(s);
int* count_data = count[cls].ptr<int>();
double* sum_data = sum[cls].ptr<double>();
double* prod_data = productsum[cls].ptr<double>();
const float* train_vec = samples.ptr<float>(s);
for( c1 = 0; c1 < _var_count; c1++, prod_data += _var_count )
for( c1 = 0; c1 < nvars; c1++, prod_data += nvars )
{
double val1 = train_vec[c1];
sum_data[c1] += val1;
count_data[c1]++;
for( c2 = c1; c2 < _var_count; c2++ )
for( c2 = c1; c2 < nvars; c2++ )
prod_data[c2] += train_vec[c2]*val1;
}
}
cvReleaseMat( &responses );
responses = 0;
/* calculate avg, covariance matrix, c */
Mat vt;
// calculate avg, covariance matrix, c
for( cls = 0; cls < nclasses; cls++ )
{
double det = 1;
int i, j;
CvMat* w = inv_eigen_values[cls];
int* count_data = count[cls]->data.i;
double* avg_data = avg[cls]->data.db;
double* sum1 = sum[cls]->data.db;
Mat& w = inv_eigen_values[cls];
int* count_data = count[cls].ptr<int>();
double* avg_data = avg[cls].ptr<double>();
double* sum1 = sum[cls].ptr<double>();
cvCompleteSymm( productsum[cls], 0 );
completeSymm(productsum[cls], 0);
for( j = 0; j < _var_count; j++ )
for( j = 0; j < nvars; j++ )
{
int n = count_data[j];
avg_data[j] = n ? sum1[j] / n : 0.;
}
count_data = count[cls]->data.i;
avg_data = avg[cls]->data.db;
sum1 = sum[cls]->data.db;
count_data = count[cls].ptr<int>();
avg_data = avg[cls].ptr<double>();
sum1 = sum[cls].ptr<double>();
for( i = 0; i < _var_count; i++ )
for( i = 0; i < nvars; i++ )
{
double* avg2_data = avg[cls]->data.db;
double* sum2 = sum[cls]->data.db;
double* prod_data = productsum[cls]->data.db + i*_var_count;
double* cov_data = cov->data.db + i*_var_count;
double* avg2_data = avg[cls].ptr<double>();
double* sum2 = sum[cls].ptr<double>();
double* prod_data = productsum[cls].ptr<double>(i);
double* cov_data = cov.ptr<double>(i);
double s1val = sum1[i];
double avg1 = avg_data[i];
int _count = count_data[i];
@ -254,109 +170,104 @@ bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _res
}
}
CV_CALL( cvCompleteSymm( cov, 1 ));
CV_CALL( cvSVD( cov, w, cov_rotate_mats[cls], 0, CV_SVD_U_T ));
CV_CALL( cvMaxS( w, min_variation, w ));
for( j = 0; j < _var_count; j++ )
det *= w->data.db[j];
completeSymm( cov, 1 );
CV_CALL( cvDiv( NULL, w, w ));
c->data.db[cls] = det > 0 ? log(det) : -700;
}
SVD::compute(cov, w, cov_rotate_mats[cls], noArray());
transpose(cov_rotate_mats[cls], cov_rotate_mats[cls]);
cv::max(w, min_variation, w);
for( j = 0; j < nvars; j++ )
det *= w.at<double>(j);
result = true;
__END__;
if( !result || cvGetErrStatus() < 0 )
clear();
cvReleaseMat( &cov );
cvReleaseMat( &__cls_labels );
cvReleaseMat( &__var_idx );
cvFree( &train_data );
divide(1., w, w);
c.at<double>(cls) = det > 0 ? log(det) : -700;
}
return result;
}
return true;
}
struct predict_body : cv::ParallelLoopBody {
predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg,
const CvMat* _samples, const int* _vidx, CvMat* _cls_labels,
CvMat* _results, float* _value, int _var_count1, CvMat* _results_prob
)
class NBPredictBody : public ParallelLoopBody
{
public:
NBPredictBody( const Mat& _c, const vector<Mat>& _cov_rotate_mats,
const vector<Mat>& _inv_eigen_values,
const vector<Mat>& _avg,
const Mat& _samples, const Mat& _vidx, const Mat& _cls_labels,
Mat& _results, Mat& _results_prob, bool _rawOutput )
{
c = _c;
cov_rotate_mats = _cov_rotate_mats;
inv_eigen_values = _inv_eigen_values;
avg = _avg;
samples = _samples;
vidx = _vidx;
cls_labels = _cls_labels;
results = _results;
value = _value;
var_count1 = _var_count1;
results_prob = _results_prob;
c = &_c;
cov_rotate_mats = &_cov_rotate_mats;
inv_eigen_values = &_inv_eigen_values;
avg = &_avg;
samples = &_samples;
vidx = &_vidx;
cls_labels = &_cls_labels;
results = &_results;
results_prob = _results_prob.data ? &_results_prob : 0;
rawOutput = _rawOutput;
}
CvMat* c;
CvMat** cov_rotate_mats;
CvMat** inv_eigen_values;
CvMat** avg;
const CvMat* samples;
const int* vidx;
CvMat* cls_labels;
const Mat* c;
const vector<Mat>* cov_rotate_mats;
const vector<Mat>* inv_eigen_values;
const vector<Mat>* avg;
const Mat* samples;
const Mat* vidx;
const Mat* cls_labels;
CvMat* results_prob;
CvMat* results;
Mat* results_prob;
Mat* results;
float* value;
int var_count1;
bool rawOutput;
void operator()( const cv::Range& range ) const
void operator()( const Range& range ) const
{
int cls = -1;
int rtype = 0, rstep = 0, rptype = 0, rpstep = 0;
int nclasses = cls_labels->cols;
int _var_count = avg[0]->cols;
int rtype = 0, rptype = 0;
size_t rstep = 0, rpstep = 0;
int nclasses = (int)cls_labels->total();
int nvars = avg->at(0).cols;
double probability = 0;
const int* vptr = vidx && !vidx->empty() ? vidx->ptr<int>() : 0;
if (results)
{
rtype = CV_MAT_TYPE(results->type);
rstep = CV_IS_MAT_CONT(results->type) ? 1 : results->step/CV_ELEM_SIZE(rtype);
rtype = results->type();
rstep = results->isContinuous() ? 1 : results->step/results->elemSize();
}
if (results_prob)
{
rptype = CV_MAT_TYPE(results_prob->type);
rpstep = CV_IS_MAT_CONT(results_prob->type) ? 1 : results_prob->step/CV_ELEM_SIZE(rptype);
rptype = results_prob->type();
rpstep = results_prob->isContinuous() ? 1 : results_prob->step/results_prob->elemSize();
}
// allocate memory and initializing headers for calculating
cv::AutoBuffer<double> buffer(nclasses + var_count1);
CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] );
cv::AutoBuffer<double> _buffer(nvars*2);
double* _diffin = _buffer;
double* _diffout = _buffer + nvars;
Mat diffin( 1, nvars, CV_64FC1, _diffin );
Mat diffout( 1, nvars, CV_64FC1, _diffout );
for(int k = range.start; k < range.end; k += 1 )
for(int k = range.start; k < range.end; k++ )
{
int ival;
double opt = FLT_MAX;
for(int i = 0; i < nclasses; i++ )
{
double cur = c->data.db[i];
CvMat* u = cov_rotate_mats[i];
CvMat* w = inv_eigen_values[i];
double cur = c->at<double>(i);
const Mat& u = cov_rotate_mats->at(i);
const Mat& w = inv_eigen_values->at(i);
const double* avg_data = avg[i]->data.db;
const float* x = (const float*)(samples->data.ptr + samples->step*k);
const double* avg_data = avg->at(i).ptr<double>();
const float* x = samples->ptr<float>(k);
// cov = u w u' --> cov^(-1) = u w^(-1) u'
for(int j = 0; j < _var_count; j++ )
diff.data.db[j] = avg_data[j] - x[vidx ? vidx[j] : j];
for(int j = 0; j < nvars; j++ )
_diffin[j] = avg_data[j] - x[vptr ? vptr[j] : j];
cvGEMM( &diff, u, 1, 0, 0, &diff, CV_GEMM_B_T );
for(int j = 0; j < _var_count; j++ )
gemm( diffin, u, 1, noArray(), 0, diffout, GEMM_2_T );
for(int j = 0; j < nvars; j++ )
{
double d = diff.data.db[j];
cur += d*d*w->data.db[j];
double d = _diffout[j];
cur += d*d*w.ptr<double>()[j];
}
if( cur < opt )
@ -364,298 +275,190 @@ struct predict_body : cv::ParallelLoopBody {
cls = i;
opt = cur;
}
/* probability = exp( -0.5 * cur ) */
probability = exp( -0.5 * cur );
}
ival = cls_labels->data.i[cls];
if( results )
if( results_prob )
{
if( rtype == CV_32SC1 )
results->data.i[k*rstep] = ival;
if ( rptype == CV_32FC1 )
results_prob->ptr<float>()[k*rpstep + i] = (float)probability;
else
results->data.fl[k*rstep] = (float)ival;
results_prob->ptr<double>()[k*rpstep + i] = probability;
}
}
if ( results_prob )
int ival = rawOutput ? cls : cls_labels->at<int>(cls);
if( results )
{
if ( rptype == CV_32FC1 )
results_prob->data.fl[k*rpstep] = (float)probability;
if( rtype == CV_32SC1 )
results->ptr<int>()[k*rstep] = ival;
else
results_prob->data.db[k*rpstep] = probability;
results->ptr<float>()[k*rstep] = (float)ival;
}
if( k == 0 )
*value = (float)ival;
}
}
};
};
float predict( InputArray _samples, OutputArray _results, int flags ) const
{
return predictProb(_samples, _results, noArray(), flags);
}
float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results, CvMat* results_prob ) const
{
float value = 0;
float predictProb( InputArray _samples, OutputArray _results, OutputArray _resultsProb, int flags ) const
{
int value=0;
Mat samples = _samples.getMat(), results, resultsProb;
int nsamples = samples.rows, nclasses = (int)cls_labels.total();
bool rawOutput = (flags & RAW_OUTPUT) != 0;
if( !CV_IS_MAT(samples) || CV_MAT_TYPE(samples->type) != CV_32FC1 || samples->cols != var_all )
if( samples.type() != CV_32F || samples.cols != nallvars )
CV_Error( CV_StsBadArg,
"The input samples must be 32f matrix with the number of columns = var_all" );
"The input samples must be 32f matrix with the number of columns = nallvars" );
if( samples->rows > 1 && !results )
if( samples.rows > 1 && _results.needed() )
CV_Error( CV_StsNullPtr,
"When the number of input samples is >1, the output vector of results must be passed" );
if( results )
if( _results.needed() )
{
if( !CV_IS_MAT(results) || (CV_MAT_TYPE(results->type) != CV_32FC1 &&
CV_MAT_TYPE(results->type) != CV_32SC1) ||
(results->cols != 1 && results->rows != 1) ||
results->cols + results->rows - 1 != samples->rows )
CV_Error( CV_StsBadArg, "The output array must be integer or floating-point vector "
"with the number of elements = number of rows in the input matrix" );
_results.create(nsamples, 1, CV_32S);
results = _results.getMat();
}
else
results = Mat(1, 1, CV_32S, &value);
if( results_prob )
if( _resultsProb.needed() )
{
if( !CV_IS_MAT(results_prob) || (CV_MAT_TYPE(results_prob->type) != CV_32FC1 &&
CV_MAT_TYPE(results_prob->type) != CV_64FC1) ||
(results_prob->cols != 1 && results_prob->rows != 1) ||
results_prob->cols + results_prob->rows - 1 != samples->rows )
CV_Error( CV_StsBadArg, "The output array must be double or float vector "
"with the number of elements = number of rows in the input matrix" );
_resultsProb.create(nsamples, nclasses, CV_32F);
resultsProb = _resultsProb.getMat();
}
const int* vidx = var_idx ? var_idx->data.i : 0;
cv::parallel_for_(cv::Range(0, samples->rows),
predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
vidx, cls_labels, results, &value, var_count, results_prob));
return value;
}
void CvNormalBayesClassifier::write( CvFileStorage* fs, const char* name ) const
{
CV_FUNCNAME( "CvNormalBayesClassifier::write" );
cv::parallel_for_(cv::Range(0, nsamples),
NBPredictBody(c, cov_rotate_mats, inv_eigen_values, avg, samples,
var_idx, cls_labels, results, resultsProb, rawOutput));
__BEGIN__;
int nclasses, i;
nclasses = cls_labels->cols;
return (float)value;
}
cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_NBAYES );
void write( FileStorage& fs ) const
{
int nclasses = (int)cls_labels.total(), i;
CV_CALL( cvWriteInt( fs, "var_count", var_count ));
CV_CALL( cvWriteInt( fs, "var_all", var_all ));
fs << "var_count" << (var_idx.empty() ? nallvars : (int)var_idx.total());
fs << "var_all" << nallvars;
if( var_idx )
CV_CALL( cvWrite( fs, "var_idx", var_idx ));
CV_CALL( cvWrite( fs, "cls_labels", cls_labels ));
if( !var_idx.empty() )
fs << "var_idx" << var_idx;
fs << "cls_labels" << cls_labels;
CV_CALL( cvStartWriteStruct( fs, "count", CV_NODE_SEQ ));
fs << "count" << "[";
for( i = 0; i < nclasses; i++ )
CV_CALL( cvWrite( fs, NULL, count[i] ));
CV_CALL( cvEndWriteStruct( fs ));
fs << count[i];
CV_CALL( cvStartWriteStruct( fs, "sum", CV_NODE_SEQ ));
fs << "]" << "sum" << "[";
for( i = 0; i < nclasses; i++ )
CV_CALL( cvWrite( fs, NULL, sum[i] ));
CV_CALL( cvEndWriteStruct( fs ));
fs << sum[i];
CV_CALL( cvStartWriteStruct( fs, "productsum", CV_NODE_SEQ ));
fs << "]" << "productsum" << "[";
for( i = 0; i < nclasses; i++ )
CV_CALL( cvWrite( fs, NULL, productsum[i] ));
CV_CALL( cvEndWriteStruct( fs ));
fs << productsum[i];
CV_CALL( cvStartWriteStruct( fs, "avg", CV_NODE_SEQ ));
fs << "]" << "avg" << "[";
for( i = 0; i < nclasses; i++ )
CV_CALL( cvWrite( fs, NULL, avg[i] ));
CV_CALL( cvEndWriteStruct( fs ));
fs << avg[i];
CV_CALL( cvStartWriteStruct( fs, "inv_eigen_values", CV_NODE_SEQ ));
fs << "]" << "inv_eigen_values" << "[";
for( i = 0; i < nclasses; i++ )
CV_CALL( cvWrite( fs, NULL, inv_eigen_values[i] ));
CV_CALL( cvEndWriteStruct( fs ));
fs << inv_eigen_values[i];
CV_CALL( cvStartWriteStruct( fs, "cov_rotate_mats", CV_NODE_SEQ ));
fs << "]" << "cov_rotate_mats" << "[";
for( i = 0; i < nclasses; i++ )
CV_CALL( cvWrite( fs, NULL, cov_rotate_mats[i] ));
CV_CALL( cvEndWriteStruct( fs ));
fs << cov_rotate_mats[i];
CV_CALL( cvWrite( fs, "c", c ));
fs << "]";
cvEndWriteStruct( fs );
fs << "c" << c;
}
__END__;
}
void read( const FileNode& fn )
{
clear();
fn["var_all"] >> nallvars;
void CvNormalBayesClassifier::read( CvFileStorage* fs, CvFileNode* root_node )
{
bool ok = false;
CV_FUNCNAME( "CvNormalBayesClassifier::read" );
if( nallvars <= 0 )
CV_Error( CV_StsParseError,
"The field \"var_count\" of NBayes classifier is missing or non-positive" );
__BEGIN__;
fn["var_idx"] >> var_idx;
fn["cls_labels"] >> cls_labels;
int nclasses, i;
size_t data_size;
CvFileNode* node;
CvSeq* seq;
CvSeqReader reader;
int nclasses = (int)cls_labels.total(), i;
clear();
if( cls_labels.empty() || nclasses < 1 )
CV_Error( CV_StsParseError, "No or invalid \"cls_labels\" in NBayes classifier" );
CV_CALL( var_count = cvReadIntByName( fs, root_node, "var_count", -1 ));
CV_CALL( var_all = cvReadIntByName( fs, root_node, "var_all", -1 ));
CV_CALL( var_idx = (CvMat*)cvReadByName( fs, root_node, "var_idx" ));
CV_CALL( cls_labels = (CvMat*)cvReadByName( fs, root_node, "cls_labels" ));
if( !cls_labels )
CV_ERROR( CV_StsParseError, "No \"cls_labels\" in NBayes classifier" );
if( cls_labels->cols < 1 )
CV_ERROR( CV_StsBadArg, "Number of classes is less 1" );
if( var_count <= 0 )
CV_ERROR( CV_StsParseError,
"The field \"var_count\" of NBayes classifier is missing" );
nclasses = cls_labels->cols;
data_size = nclasses*6*sizeof(CvMat*);
CV_CALL( count = (CvMat**)cvAlloc( data_size ));
memset( count, 0, data_size );
sum = count + nclasses;
productsum = sum + nclasses;
avg = productsum + nclasses;
inv_eigen_values = avg + nclasses;
cov_rotate_mats = inv_eigen_values + nclasses;
CV_CALL( node = cvGetFileNodeByName( fs, root_node, "count" ));
seq = node->data.seq;
if( !CV_NODE_IS_SEQ(node->tag) || seq->total != nclasses)
CV_ERROR( CV_StsBadArg, "" );
CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
for( i = 0; i < nclasses; i++ )
{
CV_CALL( count[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
}
FileNodeIterator
count_it = fn["count"].begin(),
sum_it = fn["sum"].begin(),
productsum_it = fn["productsum"].begin(),
avg_it = fn["avg"].begin(),
inv_eigen_values_it = fn["inv_eigen_values"].begin(),
cov_rotate_mats_it = fn["cov_rotate_mats"].begin();
CV_CALL( node = cvGetFileNodeByName( fs, root_node, "sum" ));
seq = node->data.seq;
if( !CV_NODE_IS_SEQ(node->tag) || seq->total != nclasses)
CV_ERROR( CV_StsBadArg, "" );
CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
for( i = 0; i < nclasses; i++ )
{
CV_CALL( sum[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
}
count.resize(nclasses);
sum.resize(nclasses);
productsum.resize(nclasses);
avg.resize(nclasses);
inv_eigen_values.resize(nclasses);
cov_rotate_mats.resize(nclasses);
CV_CALL( node = cvGetFileNodeByName( fs, root_node, "productsum" ));
seq = node->data.seq;
if( !CV_NODE_IS_SEQ(node->tag) || seq->total != nclasses)
CV_ERROR( CV_StsBadArg, "" );
CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
for( i = 0; i < nclasses; i++ )
for( i = 0; i < nclasses; i++, ++count_it, ++sum_it, ++productsum_it, ++avg_it,
++inv_eigen_values_it, ++cov_rotate_mats_it )
{
CV_CALL( productsum[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
*count_it >> count[i];
*sum_it >> sum[i];
*productsum_it >> productsum[i];
*avg_it >> avg[i];
*inv_eigen_values_it >> inv_eigen_values[i];
*cov_rotate_mats_it >> cov_rotate_mats[i];
}
CV_CALL( node = cvGetFileNodeByName( fs, root_node, "avg" ));
seq = node->data.seq;
if( !CV_NODE_IS_SEQ(node->tag) || seq->total != nclasses)
CV_ERROR( CV_StsBadArg, "" );
CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
for( i = 0; i < nclasses; i++ )
{
CV_CALL( avg[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
fn["c"] >> c;
}
CV_CALL( node = cvGetFileNodeByName( fs, root_node, "inv_eigen_values" ));
seq = node->data.seq;
if( !CV_NODE_IS_SEQ(node->tag) || seq->total != nclasses)
CV_ERROR( CV_StsBadArg, "" );
CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
for( i = 0; i < nclasses; i++ )
void clear()
{
CV_CALL( inv_eigen_values[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
count.clear();
sum.clear();
productsum.clear();
avg.clear();
inv_eigen_values.clear();
cov_rotate_mats.clear();
var_idx.release();
cls_labels.release();
c.release();
nallvars = 0;
}
CV_CALL( node = cvGetFileNodeByName( fs, root_node, "cov_rotate_mats" ));
seq = node->data.seq;
if( !CV_NODE_IS_SEQ(node->tag) || seq->total != nclasses)
CV_ERROR( CV_StsBadArg, "" );
CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
for( i = 0; i < nclasses; i++ )
{
CV_CALL( cov_rotate_mats[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
}
CV_CALL( c = (CvMat*)cvReadByName( fs, root_node, "c" ));
ok = true;
__END__;
bool isTrained() const { return !avg.empty(); }
bool isClassifier() const { return true; }
int getVarCount() const { return nallvars; }
String getDefaultModelName() const { return "opencv_ml_nbayes"; }
if( !ok )
clear();
}
int nallvars;
Mat var_idx, cls_labels, c;
vector<Mat> count, sum, productsum, avg, inv_eigen_values, cov_rotate_mats;
};
using namespace cv;
CvNormalBayesClassifier::CvNormalBayesClassifier( const Mat& _train_data, const Mat& _responses,
const Mat& _var_idx, const Mat& _sample_idx )
Ptr<NormalBayesClassifier> NormalBayesClassifier::create()
{
var_count = var_all = 0;
var_idx = 0;
cls_labels = 0;
count = 0;
sum = 0;
productsum = 0;
avg = 0;
inv_eigen_values = 0;
cov_rotate_mats = 0;
c = 0;
default_model_name = "my_nb";
CvMat tdata = _train_data, responses = _responses, vidx = _var_idx, sidx = _sample_idx;
train(&tdata, &responses, vidx.data.ptr ? &vidx : 0,
sidx.data.ptr ? &sidx : 0);
Ptr<NormalBayesClassifierImpl> p = makePtr<NormalBayesClassifierImpl>();
return p;
}
bool CvNormalBayesClassifier::train( const Mat& _train_data, const Mat& _responses,
const Mat& _var_idx, const Mat& _sample_idx, bool update )
{
CvMat tdata = _train_data, responses = _responses, vidx = _var_idx, sidx = _sample_idx;
return train(&tdata, &responses, vidx.data.ptr ? &vidx : 0,
sidx.data.ptr ? &sidx : 0, update);
}
float CvNormalBayesClassifier::predict( const Mat& _samples, Mat* _results, Mat* _results_prob ) const
{
CvMat samples = _samples, results, *presults = 0, results_prob, *presults_prob = 0;
if( _results )
{
if( !(_results->data && _results->type() == CV_32F &&
(_results->cols == 1 || _results->rows == 1) &&
_results->cols + _results->rows - 1 == _samples.rows) )
_results->create(_samples.rows, 1, CV_32F);
presults = &(results = *_results);
}
if( _results_prob )
{
if( !(_results_prob->data && _results_prob->type() == CV_64F &&
(_results_prob->cols == 1 || _results_prob->rows == 1) &&
_results_prob->cols + _results_prob->rows - 1 == _samples.rows) )
_results_prob->create(_samples.rows, 1, CV_64F);
presults_prob = &(results_prob = *_results_prob);
}
return predict(&samples, presults, presults_prob);
}
/* End of file. */

@ -38,8 +38,8 @@
//
//M*/
#ifndef __OPENCV_PRECOMP_H__
#define __OPENCV_PRECOMP_H__
#ifndef __OPENCV_ML_PRECOMP_HPP__
#define __OPENCV_ML_PRECOMP_HPP__
#include "opencv2/core.hpp"
#include "opencv2/ml.hpp"
@ -56,321 +56,217 @@
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <vector>
#define ML_IMPL CV_IMPL
#define __BEGIN__ __CV_BEGIN__
#define __END__ __CV_END__
#define EXIT __CV_EXIT__
#define CV_MAT_ELEM_FLAG( mat, type, comp, vect, tflag ) \
(( tflag == CV_ROW_SAMPLE ) \
? (CV_MAT_ELEM( mat, type, comp, vect )) \
: (CV_MAT_ELEM( mat, type, vect, comp )))
/* Convert matrix to vector */
#define ICV_MAT2VEC( mat, vdata, vstep, num ) \
if( MIN( (mat).rows, (mat).cols ) != 1 ) \
CV_ERROR( CV_StsBadArg, "" ); \
(vdata) = ((mat).data.ptr); \
if( (mat).rows == 1 ) \
{ \
(vstep) = CV_ELEM_SIZE( (mat).type ); \
(num) = (mat).cols; \
} \
else \
{ \
(vstep) = (mat).step; \
(num) = (mat).rows; \
}
/****************************************************************************************\
* Main struct definitions *
\****************************************************************************************/
/* get raw data */
#define ICV_RAWDATA( mat, flags, rdata, sstep, cstep, m, n ) \
(rdata) = (mat).data.ptr; \
if( CV_IS_ROW_SAMPLE( flags ) ) \
{ \
(sstep) = (mat).step; \
(cstep) = CV_ELEM_SIZE( (mat).type ); \
(m) = (mat).rows; \
(n) = (mat).cols; \
} \
else \
{ \
(cstep) = (mat).step; \
(sstep) = CV_ELEM_SIZE( (mat).type ); \
(n) = (mat).rows; \
(m) = (mat).cols; \
}
/* log(2*PI) */
#define CV_LOG2PI (1.8378770664093454835606594728112)
#define ICV_IS_MAT_OF_TYPE( mat, mat_type) \
(CV_IS_MAT( mat ) && CV_MAT_TYPE( mat->type ) == (mat_type) && \
(mat)->cols > 0 && (mat)->rows > 0)
/*
uchar* data; int sstep, cstep; - trainData->data
uchar* classes; int clstep; int ncl;- trainClasses
uchar* tmask; int tmstep; int ntm; - typeMask
uchar* missed;int msstep, mcstep; -missedMeasurements...
int mm, mn; == m,n == size,dim
uchar* sidx;int sistep; - sampleIdx
uchar* cidx;int cistep; - compIdx
int k, l; == n,m == dim,size (length of cidx, sidx)
int m, n; == size,dim
*/
#define ICV_DECLARE_TRAIN_ARGS() \
uchar* data; \
int sstep, cstep; \
uchar* classes; \
int clstep; \
int ncl; \
uchar* tmask; \
int tmstep; \
int ntm; \
uchar* missed; \
int msstep, mcstep; \
int mm, mn; \
uchar* sidx; \
int sistep; \
uchar* cidx; \
int cistep; \
int k, l; \
int m, n; \
\
data = classes = tmask = missed = sidx = cidx = NULL; \
sstep = cstep = clstep = ncl = tmstep = ntm = msstep = mcstep = mm = mn = 0; \
sistep = cistep = k = l = m = n = 0;
#define ICV_TRAIN_DATA_REQUIRED( param, flags ) \
if( !ICV_IS_MAT_OF_TYPE( (param), CV_32FC1 ) ) \
{ \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
else \
{ \
ICV_RAWDATA( *(param), (flags), data, sstep, cstep, m, n ); \
k = n; \
l = m; \
}
namespace cv
{
namespace ml
{
using std::vector;
#define ICV_TRAIN_CLASSES_REQUIRED( param ) \
if( !ICV_IS_MAT_OF_TYPE( (param), CV_32FC1 ) ) \
{ \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
else \
{ \
ICV_MAT2VEC( *(param), classes, clstep, ncl ); \
if( m != ncl ) \
{ \
CV_ERROR( CV_StsBadArg, "Unmatched sizes" ); \
} \
}
#define CV_DTREE_CAT_DIR(idx,subset) \
(2*((subset[(idx)>>5]&(1 << ((idx) & 31)))==0)-1)
#define ICV_ARG_NULL( param ) \
if( (param) != NULL ) \
{ \
CV_ERROR( CV_StsBadArg, #param " parameter must be NULL" ); \
template<typename _Tp> struct cmp_lt_idx
{
cmp_lt_idx(const _Tp* _arr) : arr(_arr) {}
bool operator ()(int a, int b) const { return arr[a] < arr[b]; }
const _Tp* arr;
};
template<typename _Tp> struct cmp_lt_ptr
{
cmp_lt_ptr() {}
bool operator ()(const _Tp* a, const _Tp* b) const { return *a < *b; }
};
static inline void setRangeVector(std::vector<int>& vec, int n)
{
vec.resize(n);
for( int i = 0; i < n; i++ )
vec[i] = i;
}
#define ICV_MISSED_MEASUREMENTS_OPTIONAL( param, flags ) \
if( param ) \
{ \
if( !ICV_IS_MAT_OF_TYPE( param, CV_8UC1 ) ) \
{ \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
else \
{ \
ICV_RAWDATA( *(param), (flags), missed, msstep, mcstep, mm, mn ); \
if( mm != m || mn != n ) \
{ \
CV_ERROR( CV_StsBadArg, "Unmatched sizes" ); \
} \
} \
static inline void writeTermCrit(FileStorage& fs, const TermCriteria& termCrit)
{
if( (termCrit.type & TermCriteria::EPS) != 0 )
fs << "epsilon" << termCrit.epsilon;
if( (termCrit.type & TermCriteria::COUNT) != 0 )
fs << "iterations" << termCrit.maxCount;
}
#define ICV_COMP_IDX_OPTIONAL( param ) \
if( param ) \
{ \
if( !ICV_IS_MAT_OF_TYPE( param, CV_32SC1 ) ) \
{ \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
else \
{ \
ICV_MAT2VEC( *(param), cidx, cistep, k ); \
if( k > n ) \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
static inline TermCriteria readTermCrit(const FileNode& fn)
{
TermCriteria termCrit;
double epsilon = (double)fn["epsilon"];
if( epsilon > 0 )
{
termCrit.type |= TermCriteria::EPS;
termCrit.epsilon = epsilon;
}
int iters = (double)fn["iterations"];
if( iters > 0 )
{
termCrit.type |= TermCriteria::COUNT;
termCrit.maxCount = iters;
}
return termCrit;
}
#define ICV_SAMPLE_IDX_OPTIONAL( param ) \
if( param ) \
{ \
if( !ICV_IS_MAT_OF_TYPE( param, CV_32SC1 ) ) \
{ \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
else \
{ \
ICV_MAT2VEC( *sampleIdx, sidx, sistep, l ); \
if( l > m ) \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
class DTreesImpl : public DTrees
{
public:
struct WNode
{
WNode()
{
class_idx = sample_count = depth = complexity = 0;
parent = left = right = split = defaultDir = -1;
Tn = INT_MAX;
value = maxlr = alpha = node_risk = tree_risk = tree_error = 0.;
}
/****************************************************************************************/
#define ICV_CONVERT_FLOAT_ARRAY_TO_MATRICE( array, matrice ) \
{ \
CvMat a, b; \
int dims = (matrice)->cols; \
int nsamples = (matrice)->rows; \
int type = CV_MAT_TYPE((matrice)->type); \
int i, offset = dims; \
\
CV_ASSERT( type == CV_32FC1 || type == CV_64FC1 ); \
offset *= ((type == CV_32FC1) ? sizeof(float) : sizeof(double));\
\
b = cvMat( 1, dims, CV_32FC1 ); \
cvGetRow( matrice, &a, 0 ); \
for( i = 0; i < nsamples; i++, a.data.ptr += offset ) \
{ \
b.data.fl = (float*)array[i]; \
CV_CALL( cvConvert( &b, &a ) ); \
} \
}
int class_idx;
int Tn;
double value;
/****************************************************************************************\
* Auxiliary functions declarations *
\****************************************************************************************/
/* Generates a set of classes centers in quantity <num_of_clusters> that are generated as
uniform random vectors in parallelepiped, where <data> is concentrated. Vectors in
<data> should have horizontal orientation. If <centers> != NULL, the function doesn't
allocate any memory and stores generated centers in <centers>, returns <centers>.
If <centers> == NULL, the function allocates memory and creates the matrice. Centers
are supposed to be oriented horizontally. */
CvMat* icvGenerateRandomClusterCenters( int seed,
const CvMat* data,
int num_of_clusters,
CvMat* centers CV_DEFAULT(0));
/* Fills the <labels> using <probs> by choosing the maximal probability. Outliers are
fixed by <oulier_tresh> and have cluster label (-1). Function also controls that there
weren't "empty" clusters by filling empty clusters with the maximal probability vector.
If probs_sums != NULL, filles it with the sums of probabilities for each sample (it is
useful for normalizing probabilities' matrice of FCM) */
void icvFindClusterLabels( const CvMat* probs, float outlier_thresh, float r,
const CvMat* labels );
typedef struct CvSparseVecElem32f
{
int idx;
float val;
}
CvSparseVecElem32f;
/* Prepare training data and related parameters */
#define CV_TRAIN_STATMODEL_DEFRAGMENT_TRAIN_DATA 1
#define CV_TRAIN_STATMODEL_SAMPLES_AS_ROWS 2
#define CV_TRAIN_STATMODEL_SAMPLES_AS_COLUMNS 4
#define CV_TRAIN_STATMODEL_CATEGORICAL_RESPONSE 8
#define CV_TRAIN_STATMODEL_ORDERED_RESPONSE 16
#define CV_TRAIN_STATMODEL_RESPONSES_ON_OUTPUT 32
#define CV_TRAIN_STATMODEL_ALWAYS_COPY_TRAIN_DATA 64
#define CV_TRAIN_STATMODEL_SPARSE_AS_SPARSE 128
int
cvPrepareTrainData( const char* /*funcname*/,
const CvMat* train_data, int tflag,
const CvMat* responses, int response_type,
const CvMat* var_idx,
const CvMat* sample_idx,
bool always_copy_data,
const float*** out_train_samples,
int* _sample_count,
int* _var_count,
int* _var_all,
CvMat** out_responses,
CvMat** out_response_map,
CvMat** out_var_idx,
CvMat** out_sample_idx=0 );
void
cvSortSamplesByClasses( const float** samples, const CvMat* classes,
int* class_ranges, const uchar** mask CV_DEFAULT(0) );
void
cvCombineResponseMaps (CvMat* _responses,
const CvMat* old_response_map,
CvMat* new_response_map,
CvMat** out_response_map);
void
cvPreparePredictData( const CvArr* sample, int dims_all, const CvMat* comp_idx,
int class_count, const CvMat* prob, float** row_sample,
int as_sparse CV_DEFAULT(0) );
/* copies clustering [or batch "predict"] results
(labels and/or centers and/or probs) back to the output arrays */
void
cvWritebackLabels( const CvMat* labels, CvMat* dst_labels,
const CvMat* centers, CvMat* dst_centers,
const CvMat* probs, CvMat* dst_probs,
const CvMat* sample_idx, int samples_all,
const CvMat* comp_idx, int dims_all );
#define cvWritebackResponses cvWritebackLabels
#define XML_FIELD_NAME "_name"
CvFileNode* icvFileNodeGetChild(CvFileNode* father, const char* name);
CvFileNode* icvFileNodeGetChildArrayElem(CvFileNode* father, const char* name,int index);
CvFileNode* icvFileNodeGetNext(CvFileNode* n, const char* name);
void cvCheckTrainData( const CvMat* train_data, int tflag,
const CvMat* missing_mask,
int* var_all, int* sample_all );
CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false );
CvMat* cvPreprocessVarType( const CvMat* type_mask, const CvMat* var_idx,
int var_all, int* response_type );
CvMat* cvPreprocessOrderedResponses( const CvMat* responses,
const CvMat* sample_idx, int sample_all );
CvMat* cvPreprocessCategoricalResponses( const CvMat* responses,
const CvMat* sample_idx, int sample_all,
CvMat** out_response_map, CvMat** class_counts=0 );
const float** cvGetTrainSamples( const CvMat* train_data, int tflag,
const CvMat* var_idx, const CvMat* sample_idx,
int* _var_count, int* _sample_count,
bool always_copy_data=false );
int parent;
int left;
int right;
int defaultDir;
namespace cv
{
struct DTreeBestSplitFinder
int split;
int sample_count;
int depth;
double maxlr;
// global pruning data
int complexity;
double alpha;
double node_risk, tree_risk, tree_error;
};
struct WSplit
{
DTreeBestSplitFinder(){ splitSize = 0, tree = 0; node = 0; }
DTreeBestSplitFinder( CvDTree* _tree, CvDTreeNode* _node);
DTreeBestSplitFinder( const DTreeBestSplitFinder& finder, Split );
virtual ~DTreeBestSplitFinder() {}
virtual void operator()(const BlockedRange& range);
void join( DTreeBestSplitFinder& rhs );
Ptr<CvDTreeSplit> bestSplit;
Ptr<CvDTreeSplit> split;
int splitSize;
CvDTree* tree;
CvDTreeNode* node;
WSplit()
{
varIdx = inversed = next = 0;
quality = c = 0.f;
subsetOfs = -1;
}
int varIdx;
int inversed;
float quality;
int next;
float c;
int subsetOfs;
};
struct ForestTreeBestSplitFinder : DTreeBestSplitFinder
struct WorkData
{
ForestTreeBestSplitFinder() : DTreeBestSplitFinder() {}
ForestTreeBestSplitFinder( CvForestTree* _tree, CvDTreeNode* _node );
ForestTreeBestSplitFinder( const ForestTreeBestSplitFinder& finder, Split );
virtual void operator()(const BlockedRange& range);
WorkData(const Ptr<TrainData>& _data);
Ptr<TrainData> data;
vector<WNode> wnodes;
vector<WSplit> wsplits;
vector<int> wsubsets;
vector<int> cv_Tn;
vector<double> cv_node_risk;
vector<double> cv_node_error;
vector<int> cv_labels;
vector<double> sample_weights;
vector<int> cat_responses;
vector<double> ord_responses;
vector<int> sidx;
int maxSubsetSize;
};
DTreesImpl();
virtual ~DTreesImpl();
virtual void clear();
String getDefaultModelName() const { return "opencv_ml_dtree"; }
bool isTrained() const { return !roots.empty(); }
bool isClassifier() const { return _isClassifier; }
int getVarCount() const { return varType.empty() ? 0 : (int)(varType.size() - 1); }
int getCatCount(int vi) const { return catOfs[vi][1] - catOfs[vi][0]; }
int getSubsetSize(int vi) const { return (getCatCount(vi) + 31)/32; }
virtual void setDParams(const Params& _params);
virtual Params getDParams() const;
virtual void startTraining( const Ptr<TrainData>& trainData, int flags );
virtual void endTraining();
virtual void initCompVarIdx();
virtual bool train( const Ptr<TrainData>& trainData, int flags );
virtual int addTree( const vector<int>& sidx );
virtual int addNodeAndTrySplit( int parent, const vector<int>& sidx );
virtual const vector<int>& getActiveVars();
virtual int findBestSplit( const vector<int>& _sidx );
virtual void calcValue( int nidx, const vector<int>& _sidx );
virtual WSplit findSplitOrdClass( int vi, const vector<int>& _sidx, double initQuality );
// simple k-means, slightly modified to take into account the "weight" (L1-norm) of each vector.
virtual void clusterCategories( const double* vectors, int n, int m, double* csums, int k, int* labels );
virtual WSplit findSplitCatClass( int vi, const vector<int>& _sidx, double initQuality, int* subset );
virtual WSplit findSplitOrdReg( int vi, const vector<int>& _sidx, double initQuality );
virtual WSplit findSplitCatReg( int vi, const vector<int>& _sidx, double initQuality, int* subset );
virtual int calcDir( int splitidx, const vector<int>& _sidx, vector<int>& _sleft, vector<int>& _sright );
virtual int pruneCV( int root );
virtual double updateTreeRNC( int root, double T, int fold );
virtual bool cutTree( int root, double T, int fold, double min_alpha );
virtual float predictTrees( const Range& range, const Mat& sample, int flags ) const;
virtual float predict( InputArray inputs, OutputArray outputs, int flags ) const;
virtual void writeTrainingParams( FileStorage& fs ) const;
virtual void writeParams( FileStorage& fs ) const;
virtual void writeSplit( FileStorage& fs, int splitidx ) const;
virtual void writeNode( FileStorage& fs, int nidx, int depth ) const;
virtual void writeTree( FileStorage& fs, int root ) const;
virtual void write( FileStorage& fs ) const;
virtual void readParams( const FileNode& fn );
virtual int readSplit( const FileNode& fn );
virtual int readNode( const FileNode& fn );
virtual int readTree( const FileNode& fn );
virtual void read( const FileNode& fn );
virtual const std::vector<int>& getRoots() const { return roots; }
virtual const std::vector<Node>& getNodes() const { return nodes; }
virtual const std::vector<Split>& getSplits() const { return splits; }
virtual const std::vector<int>& getSubsets() const { return subsets; }
Params params0, params;
vector<int> varIdx;
vector<int> compVarIdx;
vector<uchar> varType;
vector<Vec2i> catOfs;
vector<int> catMap;
vector<int> roots;
vector<Node> nodes;
vector<Split> splits;
vector<int> subsets;
vector<int> classLabels;
vector<float> missingSubst;
bool _isClassifier;
Ptr<WorkData> w;
};
}
#endif /* __ML_H__ */
}}
#endif /* __OPENCV_ML_PRECOMP_HPP__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -40,131 +40,74 @@
#include "precomp.hpp"
typedef struct CvDI
namespace cv { namespace ml {
struct PairDI
{
double d;
int i;
} CvDI;
};
static int CV_CDECL
icvCmpDI( const void* a, const void* b, void* )
struct CmpPairDI
{
const CvDI* e1 = (const CvDI*) a;
const CvDI* e2 = (const CvDI*) b;
return (e1->d < e2->d) ? -1 : (e1->d > e2->d);
}
bool operator ()(const PairDI& e1, const PairDI& e2) const
{
return (e1.d < e2.d) || (e1.d == e2.d && e1.i < e2.i);
}
};
CV_IMPL void
cvCreateTestSet( int type, CvMat** samples,
int num_samples,
int num_features,
CvMat** responses,
int num_classes, ... )
void createConcentricSpheresTestSet( int num_samples, int num_features, int num_classes,
OutputArray _samples, OutputArray _responses)
{
CvMat* mean = NULL;
CvMat* cov = NULL;
CvMemStorage* storage = NULL;
CV_FUNCNAME( "cvCreateTestSet" );
__BEGIN__;
if( samples )
*samples = NULL;
if( responses )
*responses = NULL;
if( type != CV_TS_CONCENTRIC_SPHERES )
CV_ERROR( CV_StsBadArg, "Invalid type parameter" );
if( !samples )
CV_ERROR( CV_StsNullPtr, "samples parameter must be not NULL" );
if( !responses )
CV_ERROR( CV_StsNullPtr, "responses parameter must be not NULL" );
if( num_samples < 1 )
CV_ERROR( CV_StsBadArg, "num_samples parameter must be positive" );
CV_Error( CV_StsBadArg, "num_samples parameter must be positive" );
if( num_features < 1 )
CV_ERROR( CV_StsBadArg, "num_features parameter must be positive" );
CV_Error( CV_StsBadArg, "num_features parameter must be positive" );
if( num_classes < 1 )
CV_ERROR( CV_StsBadArg, "num_classes parameter must be positive" );
CV_Error( CV_StsBadArg, "num_classes parameter must be positive" );
if( type == CV_TS_CONCENTRIC_SPHERES )
{
CvSeqWriter writer;
CvSeqReader reader;
CvMat sample;
CvDI elem;
CvSeq* seq = NULL;
int i, cur_class;
CV_CALL( *samples = cvCreateMat( num_samples, num_features, CV_32FC1 ) );
CV_CALL( *responses = cvCreateMat( 1, num_samples, CV_32SC1 ) );
CV_CALL( mean = cvCreateMat( 1, num_features, CV_32FC1 ) );
CV_CALL( cvSetZero( mean ) );
CV_CALL( cov = cvCreateMat( num_features, num_features, CV_32FC1 ) );
CV_CALL( cvSetIdentity( cov ) );
/* fill the feature values matrix with random numbers drawn from standard
normal distribution */
CV_CALL( cvRandMVNormal( mean, cov, *samples ) );
/* calculate distances from the origin to the samples and put them
into the sequence along with indices */
CV_CALL( storage = cvCreateMemStorage() );
CV_CALL( cvStartWriteSeq( 0, sizeof( CvSeq ), sizeof( CvDI ), storage, &writer ));
for( i = 0; i < (*samples)->rows; ++i )
{
CV_CALL( cvGetRow( *samples, &sample, i ));
elem.i = i;
CV_CALL( elem.d = cvNorm( &sample, NULL, CV_L2 ));
CV_WRITE_SEQ_ELEM( elem, writer );
}
CV_CALL( seq = cvEndWriteSeq( &writer ) );
_samples.create( num_samples, num_features, CV_32F );
_responses.create( 1, num_samples, CV_32S );
/* sort the sequence in a distance ascending order */
CV_CALL( cvSeqSort( seq, icvCmpDI, NULL ) );
Mat responses = _responses.getMat();
/* assign class labels */
num_classes = MIN( num_samples, num_classes );
CV_CALL( cvStartReadSeq( seq, &reader ) );
CV_READ_SEQ_ELEM( elem, reader );
for( i = 0, cur_class = 0; i < num_samples; ++cur_class )
{
int last_idx;
double max_dst;
Mat mean = Mat::zeros(1, num_features, CV_32F);
Mat cov = Mat::eye(num_features, num_features, CV_32F);
last_idx = num_samples * (cur_class + 1) / num_classes - 1;
CV_CALL( max_dst = (*((CvDI*) cvGetSeqElem( seq, last_idx ))).d );
max_dst = MAX( max_dst, elem.d );
// fill the feature values matrix with random numbers drawn from standard normal distribution
randMVNormal( mean, cov, num_samples, _samples );
Mat samples = _samples.getMat();
for( ; elem.d <= max_dst && i < num_samples; ++i )
{
CV_MAT_ELEM( **responses, int, 0, elem.i ) = cur_class;
if( i < num_samples - 1 )
// calculate distances from the origin to the samples and put them
// into the sequence along with indices
std::vector<PairDI> dis(samples.rows);
for( i = 0; i < samples.rows; i++ )
{
CV_READ_SEQ_ELEM( elem, reader );
}
}
}
PairDI& elem = dis[i];
elem.i = i;
elem.d = norm(samples.row(i), NORM_L2);
}
__END__;
std::sort(dis.begin(), dis.end(), CmpPairDI());
if( cvGetErrStatus() < 0 )
// assign class labels
num_classes = std::min( num_samples, num_classes );
for( i = 0, cur_class = 0; i < num_samples; ++cur_class )
{
if( samples )
cvReleaseMat( samples );
if( responses )
cvReleaseMat( responses );
int last_idx = num_samples * (cur_class + 1) / num_classes - 1;
double max_dst = dis[last_idx].d;
max_dst = std::max( max_dst, dis[i].d );
for( ; i < num_samples && dis[i].d <= max_dst; ++i )
responses.at<int>(i) = cur_class;
}
cvReleaseMat( &mean );
cvReleaseMat( &cov );
cvReleaseMemStorage( &storage );
}
}}
/* End of file. */

File diff suppressed because it is too large Load Diff

@ -43,6 +43,9 @@
using namespace std;
using namespace cv;
using cv::ml::TrainData;
using cv::ml::EM;
using cv::ml::KNearest;
static
void defaultDistribs( Mat& means, vector<Mat>& covs, int type=CV_32FC1 )
@ -309,9 +312,9 @@ void CV_KNearestTest::run( int /*start_from*/ )
generateData( testData, testLabels, sizes, means, covs, CV_32FC1, CV_32FC1 );
int code = cvtest::TS::OK;
KNearest knearest;
knearest.train( trainData, trainLabels );
knearest.find_nearest( testData, 4, &bestLabels );
Ptr<KNearest> knearest = KNearest::create(true);
knearest->train(TrainData::create(trainData, cv::ml::ROW_SAMPLE, trainLabels), 0);;
knearest->findNearest( testData, 4, bestLabels);
float err;
if( !calcErr( bestLabels, testLabels, sizes, err, true ) )
{
@ -373,13 +376,16 @@ int CV_EMTest::runCase( int caseIndex, const EM_Params& params,
cv::Mat labels;
float err;
cv::EM em(params.nclusters, params.covMatType, params.termCrit);
Ptr<EM> em;
EM::Params emp(params.nclusters, params.covMatType, params.termCrit);
if( params.startStep == EM::START_AUTO_STEP )
em.train( trainData, noArray(), labels );
em = EM::train( trainData, noArray(), labels, noArray(), emp );
else if( params.startStep == EM::START_E_STEP )
em.trainE( trainData, *params.means, *params.covs, *params.weights, noArray(), labels );
em = EM::train_startWithE( trainData, *params.means, *params.covs,
*params.weights, noArray(), labels, noArray(), emp );
else if( params.startStep == EM::START_M_STEP )
em.trainM( trainData, *params.probs, noArray(), labels );
em = EM::train_startWithM( trainData, *params.probs,
noArray(), labels, noArray(), emp );
// check train error
if( !calcErr( labels, trainLabels, sizes, err , false, false ) )
@ -399,7 +405,7 @@ int CV_EMTest::runCase( int caseIndex, const EM_Params& params,
{
Mat sample = testData.row(i);
Mat probs;
labels.at<int>(i) = static_cast<int>(em.predict( sample, probs )[1]);
labels.at<int>(i) = static_cast<int>(em->predict2( sample, probs )[1]);
}
if( !calcErr( labels, testLabels, sizes, err, false, false ) )
{
@ -446,56 +452,56 @@ void CV_EMTest::run( int /*start_from*/ )
int code = cvtest::TS::OK;
int caseIndex = 0;
{
params.startStep = cv::EM::START_AUTO_STEP;
params.covMatType = cv::EM::COV_MAT_GENERIC;
params.startStep = EM::START_AUTO_STEP;
params.covMatType = EM::COV_MAT_GENERIC;
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
code = currCode == cvtest::TS::OK ? code : currCode;
}
{
params.startStep = cv::EM::START_AUTO_STEP;
params.covMatType = cv::EM::COV_MAT_DIAGONAL;
params.startStep = EM::START_AUTO_STEP;
params.covMatType = EM::COV_MAT_DIAGONAL;
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
code = currCode == cvtest::TS::OK ? code : currCode;
}
{
params.startStep = cv::EM::START_AUTO_STEP;
params.covMatType = cv::EM::COV_MAT_SPHERICAL;
params.startStep = EM::START_AUTO_STEP;
params.covMatType = EM::COV_MAT_SPHERICAL;
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
code = currCode == cvtest::TS::OK ? code : currCode;
}
{
params.startStep = cv::EM::START_M_STEP;
params.covMatType = cv::EM::COV_MAT_GENERIC;
params.startStep = EM::START_M_STEP;
params.covMatType = EM::COV_MAT_GENERIC;
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
code = currCode == cvtest::TS::OK ? code : currCode;
}
{
params.startStep = cv::EM::START_M_STEP;
params.covMatType = cv::EM::COV_MAT_DIAGONAL;
params.startStep = EM::START_M_STEP;
params.covMatType = EM::COV_MAT_DIAGONAL;
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
code = currCode == cvtest::TS::OK ? code : currCode;
}
{
params.startStep = cv::EM::START_M_STEP;
params.covMatType = cv::EM::COV_MAT_SPHERICAL;
params.startStep = EM::START_M_STEP;
params.covMatType = EM::COV_MAT_SPHERICAL;
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
code = currCode == cvtest::TS::OK ? code : currCode;
}
{
params.startStep = cv::EM::START_E_STEP;
params.covMatType = cv::EM::COV_MAT_GENERIC;
params.startStep = EM::START_E_STEP;
params.covMatType = EM::COV_MAT_GENERIC;
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
code = currCode == cvtest::TS::OK ? code : currCode;
}
{
params.startStep = cv::EM::START_E_STEP;
params.covMatType = cv::EM::COV_MAT_DIAGONAL;
params.startStep = EM::START_E_STEP;
params.covMatType = EM::COV_MAT_DIAGONAL;
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
code = currCode == cvtest::TS::OK ? code : currCode;
}
{
params.startStep = cv::EM::START_E_STEP;
params.covMatType = cv::EM::COV_MAT_SPHERICAL;
params.startStep = EM::START_E_STEP;
params.covMatType = EM::COV_MAT_SPHERICAL;
int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
code = currCode == cvtest::TS::OK ? code : currCode;
}
@ -511,7 +517,6 @@ protected:
{
int code = cvtest::TS::OK;
const int nclusters = 2;
cv::EM em(nclusters);
Mat samples = Mat(3,1,CV_64FC1);
samples.at<double>(0,0) = 1;
@ -520,11 +525,11 @@ protected:
Mat labels;
em.train(samples, labels);
Ptr<EM> em = EM::train(samples, noArray(), labels, noArray(), EM::Params(nclusters));
Mat firstResult(samples.rows, 1, CV_32SC1);
for( int i = 0; i < samples.rows; i++)
firstResult.at<int>(i) = static_cast<int>(em.predict(samples.row(i))[1]);
firstResult.at<int>(i) = static_cast<int>(em->predict2(samples.row(i), noArray())[1]);
// Write out
string filename = cv::tempfile(".xml");
@ -533,7 +538,7 @@ protected:
try
{
fs << "em" << "{";
em.write(fs);
em->write(fs);
fs << "}";
}
catch(...)
@ -543,29 +548,24 @@ protected:
}
}
em.clear();
em.release();
// Read in
{
FileStorage fs = FileStorage(filename, FileStorage::READ);
CV_Assert(fs.isOpened());
FileNode fn = fs["em"];
try
{
em.read(fn);
em = StatModel::load<EM>(filename);
}
catch(...)
{
ts->printf( cvtest::TS::LOG, "Crash in read method.\n" );
ts->set_failed_test_info( cvtest::TS::FAIL_EXCEPTION );
}
}
remove( filename.c_str() );
int errCaseCount = 0;
for( int i = 0; i < samples.rows; i++)
errCaseCount = std::abs(em.predict(samples.row(i))[1] - firstResult.at<int>(i)) < FLT_EPSILON ? 0 : 1;
errCaseCount = std::abs(em->predict2(samples.row(i), noArray())[1] - firstResult.at<int>(i)) < FLT_EPSILON ? 0 : 1;
if( errCaseCount > 0 )
{
@ -588,21 +588,18 @@ protected:
// 1. estimates distributions of "spam" / "not spam"
// 2. predict classID using Bayes classifier for estimated distributions.
CvMLData data;
string dataFilename = string(ts->get_data_path()) + "spambase.data";
Ptr<TrainData> data = TrainData::loadFromCSV(dataFilename, 0);
if(data.read_csv(dataFilename.c_str()) != 0)
if( data.empty() )
{
ts->printf(cvtest::TS::LOG, "File with spambase dataset cann't be read.\n");
ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
}
Mat values = cv::cvarrToMat(data.get_values());
CV_Assert(values.cols == 58);
int responseIndex = 57;
Mat samples = values.colRange(0, responseIndex);
Mat responses = values.col(responseIndex);
Mat samples = data->getSamples();
CV_Assert(samples.cols == 57);
Mat responses = data->getResponses();
vector<int> trainSamplesMask(samples.rows, 0);
int trainSamplesCount = (int)(0.5f * samples.rows);
@ -616,7 +613,6 @@ protected:
std::swap(trainSamplesMask[i1], trainSamplesMask[i2]);
}
EM model0(3), model1(3);
Mat samples0, samples1;
for(int i = 0; i < samples.rows; i++)
{
@ -630,8 +626,8 @@ protected:
samples1.push_back(sample);
}
}
model0.train(samples0);
model1.train(samples1);
Ptr<EM> model0 = EM::train(samples0, noArray(), noArray(), noArray(), EM::Params(3));
Ptr<EM> model1 = EM::train(samples1, noArray(), noArray(), noArray(), EM::Params(3));
Mat trainConfusionMat(2, 2, CV_32SC1, Scalar(0)),
testConfusionMat(2, 2, CV_32SC1, Scalar(0));
@ -639,8 +635,8 @@ protected:
for(int i = 0; i < samples.rows; i++)
{
Mat sample = samples.row(i);
double sampleLogLikelihoods0 = model0.predict(sample)[0];
double sampleLogLikelihoods1 = model1.predict(sample)[0];
double sampleLogLikelihoods0 = model0->predict2(sample, noArray())[0];
double sampleLogLikelihoods1 = model1->predict2(sample, noArray())[0];
int classID = sampleLogLikelihoods0 >= lambda * sampleLogLikelihoods1 ? 0 : 1;

@ -1,6 +1,8 @@
#include "test_precomp.hpp"
#if 0
#include <string>
#include <fstream>
#include <iostream>
@ -284,3 +286,5 @@ void CV_GBTreesTest::run(int)
/////////////////////////////////////////////////////////////////////////////
TEST(ML_GBTrees, regression) { CV_GBTreesTest test; test.safe_run(); }
#endif

@ -65,7 +65,7 @@ int CV_AMLTest::run_test_case( int testCaseIdx )
for (int k = 0; k < icount; k++)
{
#endif
data.mix_train_and_test_idx();
data->shuffleTrainTest();
code = train( testCaseIdx );
#ifdef GET_STAT
float case_result = get_error();
@ -101,9 +101,10 @@ int CV_AMLTest::validate_test_results( int testCaseIdx )
{
resultNode["mean"] >> mean;
resultNode["sigma"] >> sigma;
float curErr = get_error( testCaseIdx, CV_TEST_ERROR );
model->save(format("/Users/vp/tmp/dtree/testcase_%02d.cur.yml", testCaseIdx));
float curErr = get_test_error( testCaseIdx );
const int coeff = 4;
ts->printf( cvtest::TS::LOG, "Test case = %d; test error = %f; mean error = %f (diff=%f), %d*sigma = %f",
ts->printf( cvtest::TS::LOG, "Test case = %d; test error = %f; mean error = %f (diff=%f), %d*sigma = %f\n",
testCaseIdx, curErr, mean, abs( curErr - mean), coeff, coeff*sigma );
if ( abs( curErr - mean) > coeff*sigma )
{
@ -125,6 +126,6 @@ int CV_AMLTest::validate_test_results( int testCaseIdx )
TEST(ML_DTree, regression) { CV_AMLTest test( CV_DTREE ); test.safe_run(); }
TEST(ML_Boost, regression) { CV_AMLTest test( CV_BOOST ); test.safe_run(); }
TEST(ML_RTrees, regression) { CV_AMLTest test( CV_RTREES ); test.safe_run(); }
TEST(ML_ERTrees, regression) { CV_AMLTest test( CV_ERTREES ); test.safe_run(); }
TEST(DISABLED_ML_ERTrees, regression) { CV_AMLTest test( CV_ERTREES ); test.safe_run(); }
/* End of file. */

@ -44,257 +44,49 @@
using namespace cv;
using namespace std;
// auxiliary functions
// 1. nbayes
void nbayes_check_data( CvMLData* _data )
{
if( _data->get_missing() )
CV_Error( CV_StsBadArg, "missing values are not supported" );
const CvMat* var_types = _data->get_var_types();
bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL;
Mat _var_types = cvarrToMat(var_types);
if( ( fabs( cvtest::norm( _var_types, Mat::zeros(_var_types.dims, _var_types.size, _var_types.type()), CV_L1 ) -
(var_types->rows + var_types->cols - 2)*CV_VAR_ORDERED - CV_VAR_CATEGORICAL ) > FLT_EPSILON ) ||
!is_classifier )
CV_Error( CV_StsBadArg, "incorrect types of predictors or responses" );
}
bool nbayes_train( CvNormalBayesClassifier* nbayes, CvMLData* _data )
{
nbayes_check_data( _data );
const CvMat* values = _data->get_values();
const CvMat* responses = _data->get_responses();
const CvMat* train_sidx = _data->get_train_sample_idx();
const CvMat* var_idx = _data->get_var_idx();
return nbayes->train( values, responses, var_idx, train_sidx );
}
float nbayes_calc_error( CvNormalBayesClassifier* nbayes, CvMLData* _data, int type, vector<float> *resp )
{
float err = 0;
nbayes_check_data( _data );
const CvMat* values = _data->get_values();
const CvMat* response = _data->get_responses();
const CvMat* sample_idx = (type == CV_TEST_ERROR) ? _data->get_test_sample_idx() : _data->get_train_sample_idx();
int* sidx = sample_idx ? sample_idx->data.i : 0;
int r_step = CV_IS_MAT_CONT(response->type) ?
1 : response->step / CV_ELEM_SIZE(response->type);
int sample_count = sample_idx ? sample_idx->cols : 0;
sample_count = (type == CV_TRAIN_ERROR && sample_count == 0) ? values->rows : sample_count;
float* pred_resp = 0;
if( resp && (sample_count > 0) )
{
resp->resize( sample_count );
pred_resp = &((*resp)[0]);
}
for( int i = 0; i < sample_count; i++ )
{
CvMat sample;
int si = sidx ? sidx[i] : i;
cvGetRow( values, &sample, si );
float r = (float)nbayes->predict( &sample, 0 );
if( pred_resp )
pred_resp[i] = r;
int d = fabs((double)r - response->data.fl[si*r_step]) <= FLT_EPSILON ? 0 : 1;
err += d;
}
err = sample_count ? err / (float)sample_count * 100 : -FLT_MAX;
return err;
}
// 2. knearest
void knearest_check_data_and_get_predictors( CvMLData* _data, CvMat* _predictors )
{
const CvMat* values = _data->get_values();
const CvMat* var_idx = _data->get_var_idx();
if( var_idx->cols + var_idx->rows != values->cols )
CV_Error( CV_StsBadArg, "var_idx is not supported" );
if( _data->get_missing() )
CV_Error( CV_StsBadArg, "missing values are not supported" );
int resp_idx = _data->get_response_idx();
if( resp_idx == 0)
cvGetCols( values, _predictors, 1, values->cols );
else if( resp_idx == values->cols - 1 )
cvGetCols( values, _predictors, 0, values->cols - 1 );
else
CV_Error( CV_StsBadArg, "responses must be in the first or last column; other cases are not supported" );
}
bool knearest_train( CvKNearest* knearest, CvMLData* _data )
{
const CvMat* responses = _data->get_responses();
const CvMat* train_sidx = _data->get_train_sample_idx();
bool is_regression = _data->get_var_type( _data->get_response_idx() ) == CV_VAR_ORDERED;
CvMat predictors;
knearest_check_data_and_get_predictors( _data, &predictors );
return knearest->train( &predictors, responses, train_sidx, is_regression );
}
float knearest_calc_error( CvKNearest* knearest, CvMLData* _data, int k, int type, vector<float> *resp )
{
float err = 0;
const CvMat* response = _data->get_responses();
const CvMat* sample_idx = (type == CV_TEST_ERROR) ? _data->get_test_sample_idx() : _data->get_train_sample_idx();
int* sidx = sample_idx ? sample_idx->data.i : 0;
int r_step = CV_IS_MAT_CONT(response->type) ?
1 : response->step / CV_ELEM_SIZE(response->type);
bool is_regression = _data->get_var_type( _data->get_response_idx() ) == CV_VAR_ORDERED;
CvMat predictors;
knearest_check_data_and_get_predictors( _data, &predictors );
int sample_count = sample_idx ? sample_idx->cols : 0;
sample_count = (type == CV_TRAIN_ERROR && sample_count == 0) ? predictors.rows : sample_count;
float* pred_resp = 0;
if( resp && (sample_count > 0) )
{
resp->resize( sample_count );
pred_resp = &((*resp)[0]);
}
if ( !is_regression )
{
for( int i = 0; i < sample_count; i++ )
{
CvMat sample;
int si = sidx ? sidx[i] : i;
cvGetRow( &predictors, &sample, si );
float r = knearest->find_nearest( &sample, k );
if( pred_resp )
pred_resp[i] = r;
int d = fabs((double)r - response->data.fl[si*r_step]) <= FLT_EPSILON ? 0 : 1;
err += d;
}
err = sample_count ? err / (float)sample_count * 100 : -FLT_MAX;
}
else
{
for( int i = 0; i < sample_count; i++ )
{
CvMat sample;
int si = sidx ? sidx[i] : i;
cvGetRow( &predictors, &sample, si );
float r = knearest->find_nearest( &sample, k );
if( pred_resp )
pred_resp[i] = r;
float d = r - response->data.fl[si*r_step];
err += d*d;
}
err = sample_count ? err / (float)sample_count : -FLT_MAX;
}
return err;
}
// 3. svm
int str_to_svm_type(String& str)
{
if( !str.compare("C_SVC") )
return CvSVM::C_SVC;
return SVM::C_SVC;
if( !str.compare("NU_SVC") )
return CvSVM::NU_SVC;
return SVM::NU_SVC;
if( !str.compare("ONE_CLASS") )
return CvSVM::ONE_CLASS;
return SVM::ONE_CLASS;
if( !str.compare("EPS_SVR") )
return CvSVM::EPS_SVR;
return SVM::EPS_SVR;
if( !str.compare("NU_SVR") )
return CvSVM::NU_SVR;
return SVM::NU_SVR;
CV_Error( CV_StsBadArg, "incorrect svm type string" );
return -1;
}
int str_to_svm_kernel_type( String& str )
{
if( !str.compare("LINEAR") )
return CvSVM::LINEAR;
return SVM::LINEAR;
if( !str.compare("POLY") )
return CvSVM::POLY;
return SVM::POLY;
if( !str.compare("RBF") )
return CvSVM::RBF;
return SVM::RBF;
if( !str.compare("SIGMOID") )
return CvSVM::SIGMOID;
return SVM::SIGMOID;
CV_Error( CV_StsBadArg, "incorrect svm type string" );
return -1;
}
void svm_check_data( CvMLData* _data )
{
if( _data->get_missing() )
CV_Error( CV_StsBadArg, "missing values are not supported" );
const CvMat* var_types = _data->get_var_types();
for( int i = 0; i < var_types->cols-1; i++ )
if (var_types->data.ptr[i] == CV_VAR_CATEGORICAL)
{
char msg[50];
sprintf( msg, "incorrect type of %d-predictor", i );
CV_Error( CV_StsBadArg, msg );
}
}
bool svm_train( CvSVM* svm, CvMLData* _data, CvSVMParams _params )
{
svm_check_data(_data);
const CvMat* _train_data = _data->get_values();
const CvMat* _responses = _data->get_responses();
const CvMat* _var_idx = _data->get_var_idx();
const CvMat* _sample_idx = _data->get_train_sample_idx();
return svm->train( _train_data, _responses, _var_idx, _sample_idx, _params );
}
bool svm_train_auto( CvSVM* svm, CvMLData* _data, CvSVMParams _params,
int k_fold, CvParamGrid C_grid, CvParamGrid gamma_grid,
CvParamGrid p_grid, CvParamGrid nu_grid, CvParamGrid coef_grid,
CvParamGrid degree_grid )
{
svm_check_data(_data);
const CvMat* _train_data = _data->get_values();
const CvMat* _responses = _data->get_responses();
const CvMat* _var_idx = _data->get_var_idx();
const CvMat* _sample_idx = _data->get_train_sample_idx();
return svm->train_auto( _train_data, _responses, _var_idx,
_sample_idx, _params, k_fold, C_grid, gamma_grid, p_grid, nu_grid, coef_grid, degree_grid );
}
float svm_calc_error( CvSVM* svm, CvMLData* _data, int type, vector<float> *resp )
Ptr<SVM> svm_train_auto( Ptr<TrainData> _data, SVM::Params _params,
int k_fold, ParamGrid C_grid, ParamGrid gamma_grid,
ParamGrid p_grid, ParamGrid nu_grid, ParamGrid coef_grid,
ParamGrid degree_grid )
{
svm_check_data(_data);
float err = 0;
const CvMat* values = _data->get_values();
const CvMat* response = _data->get_responses();
const CvMat* sample_idx = (type == CV_TEST_ERROR) ? _data->get_test_sample_idx() : _data->get_train_sample_idx();
const CvMat* var_types = _data->get_var_types();
int* sidx = sample_idx ? sample_idx->data.i : 0;
int r_step = CV_IS_MAT_CONT(response->type) ?
1 : response->step / CV_ELEM_SIZE(response->type);
bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL;
int sample_count = sample_idx ? sample_idx->cols : 0;
sample_count = (type == CV_TRAIN_ERROR && sample_count == 0) ? values->rows : sample_count;
float* pred_resp = 0;
if( resp && (sample_count > 0) )
{
resp->resize( sample_count );
pred_resp = &((*resp)[0]);
}
if ( is_classifier )
{
for( int i = 0; i < sample_count; i++ )
{
CvMat sample;
int si = sidx ? sidx[i] : i;
cvGetRow( values, &sample, si );
float r = svm->predict( &sample );
if( pred_resp )
pred_resp[i] = r;
int d = fabs((double)r - response->data.fl[si*r_step]) <= FLT_EPSILON ? 0 : 1;
err += d;
}
err = sample_count ? err / (float)sample_count * 100 : -FLT_MAX;
}
else
{
for( int i = 0; i < sample_count; i++ )
{
CvMat sample;
int si = sidx ? sidx[i] : i;
cvGetRow( values, &sample, si );
float r = svm->predict( &sample );
if( pred_resp )
pred_resp[i] = r;
float d = r - response->data.fl[si*r_step];
err += d*d;
}
err = sample_count ? err / (float)sample_count : -FLT_MAX;
}
return err;
Mat _train_data = _data->getSamples();
Mat _responses = _data->getResponses();
Mat _var_idx = _data->getVarIdx();
Mat _sample_idx = _data->getTrainSampleIdx();
Ptr<SVM> svm = SVM::create(_params);
if( svm->trainAuto( _data, k_fold, C_grid, gamma_grid, p_grid, nu_grid, coef_grid, degree_grid ) )
return svm;
return Ptr<SVM>();
}
// 4. em
@ -302,79 +94,66 @@ float svm_calc_error( CvSVM* svm, CvMLData* _data, int type, vector<float> *resp
int str_to_ann_train_method( String& str )
{
if( !str.compare("BACKPROP") )
return CvANN_MLP_TrainParams::BACKPROP;
return ANN_MLP::Params::BACKPROP;
if( !str.compare("RPROP") )
return CvANN_MLP_TrainParams::RPROP;
return ANN_MLP::Params::RPROP;
CV_Error( CV_StsBadArg, "incorrect ann train method string" );
return -1;
}
void ann_check_data_and_get_predictors( CvMLData* _data, CvMat* _inputs )
void ann_check_data( Ptr<TrainData> _data )
{
const CvMat* values = _data->get_values();
const CvMat* var_idx = _data->get_var_idx();
if( var_idx->cols + var_idx->rows != values->cols )
Mat values = _data->getSamples();
Mat var_idx = _data->getVarIdx();
int nvars = (int)var_idx.total();
if( nvars != 0 && nvars != values.cols )
CV_Error( CV_StsBadArg, "var_idx is not supported" );
if( _data->get_missing() )
if( !_data->getMissing().empty() )
CV_Error( CV_StsBadArg, "missing values are not supported" );
int resp_idx = _data->get_response_idx();
if( resp_idx == 0)
cvGetCols( values, _inputs, 1, values->cols );
else if( resp_idx == values->cols - 1 )
cvGetCols( values, _inputs, 0, values->cols - 1 );
else
CV_Error( CV_StsBadArg, "outputs must be in the first or last column; other cases are not supported" );
}
void ann_get_new_responses( CvMLData* _data, Mat& new_responses, map<int, int>& cls_map )
// unroll the categorical responses to binary vectors
Mat ann_get_new_responses( Ptr<TrainData> _data, map<int, int>& cls_map )
{
const CvMat* train_sidx = _data->get_train_sample_idx();
int* train_sidx_ptr = train_sidx->data.i;
const CvMat* responses = _data->get_responses();
float* responses_ptr = responses->data.fl;
int r_step = CV_IS_MAT_CONT(responses->type) ?
1 : responses->step / CV_ELEM_SIZE(responses->type);
Mat train_sidx = _data->getTrainSampleIdx();
int* train_sidx_ptr = train_sidx.ptr<int>();
Mat responses = _data->getResponses();
int cls_count = 0;
// construct cls_map
cls_map.clear();
for( int si = 0; si < train_sidx->cols; si++ )
int nresponses = (int)responses.total();
int si, n = !train_sidx.empty() ? (int)train_sidx.total() : nresponses;
for( si = 0; si < n; si++ )
{
int sidx = train_sidx_ptr[si];
int r = cvRound(responses_ptr[sidx*r_step]);
CV_DbgAssert( fabs(responses_ptr[sidx*r_step]-r) < FLT_EPSILON );
int cls_map_size = (int)cls_map.size();
cls_map[r];
if ( (int)cls_map.size() > cls_map_size )
int sidx = train_sidx_ptr ? train_sidx_ptr[si] : si;
int r = cvRound(responses.at<float>(sidx));
CV_DbgAssert( fabs(responses.at<float>(sidx) - r) < FLT_EPSILON );
map<int,int>::iterator it = cls_map.find(r);
if( it == cls_map.end() )
cls_map[r] = cls_count++;
}
new_responses.create( responses->rows, cls_count, CV_32F );
new_responses.setTo( 0 );
for( int si = 0; si < train_sidx->cols; si++ )
Mat new_responses = Mat::zeros( nresponses, cls_count, CV_32F );
for( si = 0; si < n; si++ )
{
int sidx = train_sidx_ptr[si];
int r = cvRound(responses_ptr[sidx*r_step]);
int sidx = train_sidx_ptr ? train_sidx_ptr[si] : si;
int r = cvRound(responses.at<float>(sidx));
int cidx = cls_map[r];
new_responses.ptr<float>(sidx)[cidx] = 1;
new_responses.at<float>(sidx, cidx) = 1.f;
}
return new_responses;
}
int ann_train( CvANN_MLP* ann, CvMLData* _data, Mat& new_responses, CvANN_MLP_TrainParams _params, int flags = 0 )
{
const CvMat* train_sidx = _data->get_train_sample_idx();
CvMat predictors;
ann_check_data_and_get_predictors( _data, &predictors );
CvMat _new_responses = CvMat( new_responses );
return ann->train( &predictors, &_new_responses, 0, train_sidx, _params, flags );
}
float ann_calc_error( CvANN_MLP* ann, CvMLData* _data, map<int, int>& cls_map, int type , vector<float> *resp_labels )
float ann_calc_error( Ptr<StatModel> ann, Ptr<TrainData> _data, map<int, int>& cls_map, int type, vector<float> *resp_labels )
{
float err = 0;
const CvMat* responses = _data->get_responses();
const CvMat* sample_idx = (type == CV_TEST_ERROR) ? _data->get_test_sample_idx() : _data->get_train_sample_idx();
int* sidx = sample_idx ? sample_idx->data.i : 0;
int r_step = CV_IS_MAT_CONT(responses->type) ?
1 : responses->step / CV_ELEM_SIZE(responses->type);
CvMat predictors;
ann_check_data_and_get_predictors( _data, &predictors );
int sample_count = sample_idx ? sample_idx->cols : 0;
sample_count = (type == CV_TRAIN_ERROR && sample_count == 0) ? predictors.rows : sample_count;
Mat samples = _data->getSamples();
Mat responses = _data->getResponses();
Mat sample_idx = (type == CV_TEST_ERROR) ? _data->getTestSampleIdx() : _data->getTrainSampleIdx();
int* sidx = !sample_idx.empty() ? sample_idx.ptr<int>() : 0;
ann_check_data( _data );
int sample_count = (int)sample_idx.total();
sample_count = (type == CV_TRAIN_ERROR && sample_count == 0) ? samples.rows : sample_count;
float* pred_resp = 0;
vector<float> innresp;
if( sample_count > 0 )
@ -392,17 +171,16 @@ float ann_calc_error( CvANN_MLP* ann, CvMLData* _data, map<int, int>& cls_map, i
}
int cls_count = (int)cls_map.size();
Mat output( 1, cls_count, CV_32FC1 );
CvMat _output = CvMat(output);
for( int i = 0; i < sample_count; i++ )
{
CvMat sample;
int si = sidx ? sidx[i] : i;
cvGetRow( &predictors, &sample, si );
ann->predict( &sample, &_output );
CvPoint best_cls;
cvMinMaxLoc( &_output, 0, 0, 0, &best_cls, 0 );
int r = cvRound(responses->data.fl[si*r_step]);
CV_DbgAssert( fabs(responses->data.fl[si*r_step]-r) < FLT_EPSILON );
Mat sample = samples.row(si);
ann->predict( sample, output );
Point best_cls;
minMaxLoc(output, 0, 0, 0, &best_cls, 0);
int r = cvRound(responses.at<float>(si));
CV_DbgAssert( fabs(responses.at<float>(si) - r) < FLT_EPSILON );
r = cls_map[r];
int d = best_cls.x == r ? 0 : 1;
err += d;
@ -417,13 +195,13 @@ float ann_calc_error( CvANN_MLP* ann, CvMLData* _data, map<int, int>& cls_map, i
int str_to_boost_type( String& str )
{
if ( !str.compare("DISCRETE") )
return CvBoost::DISCRETE;
return Boost::DISCRETE;
if ( !str.compare("REAL") )
return CvBoost::REAL;
return Boost::REAL;
if ( !str.compare("LOGIT") )
return CvBoost::LOGIT;
return Boost::LOGIT;
if ( !str.compare("GENTLE") )
return CvBoost::GENTLE;
return Boost::GENTLE;
CV_Error( CV_StsBadArg, "incorrect boost type string" );
return -1;
}
@ -446,76 +224,37 @@ CV_MLBaseTest::CV_MLBaseTest(const char* _modelName)
RNG& rng = theRNG();
initSeed = rng.state;
rng.state = seeds[rng(seedCount)];
modelName = _modelName;
nbayes = 0;
knearest = 0;
svm = 0;
ann = 0;
dtree = 0;
boost = 0;
rtrees = 0;
ertrees = 0;
if( !modelName.compare(CV_NBAYES) )
nbayes = new CvNormalBayesClassifier;
else if( !modelName.compare(CV_KNEAREST) )
knearest = new CvKNearest;
else if( !modelName.compare(CV_SVM) )
svm = new CvSVM;
else if( !modelName.compare(CV_ANN) )
ann = new CvANN_MLP;
else if( !modelName.compare(CV_DTREE) )
dtree = new CvDTree;
else if( !modelName.compare(CV_BOOST) )
boost = new CvBoost;
else if( !modelName.compare(CV_RTREES) )
rtrees = new CvRTrees;
else if( !modelName.compare(CV_ERTREES) )
ertrees = new CvERTrees;
}
CV_MLBaseTest::~CV_MLBaseTest()
{
if( validationFS.isOpened() )
validationFS.release();
if( nbayes )
delete nbayes;
if( knearest )
delete knearest;
if( svm )
delete svm;
if( ann )
delete ann;
if( dtree )
delete dtree;
if( boost )
delete boost;
if( rtrees )
delete rtrees;
if( ertrees )
delete ertrees;
theRNG().state = initSeed;
}
int CV_MLBaseTest::read_params( CvFileStorage* _fs )
int CV_MLBaseTest::read_params( CvFileStorage* __fs )
{
if( !_fs )
FileStorage _fs(__fs, false);
if( !_fs.isOpened() )
test_case_count = -1;
else
{
CvFileNode* fn = cvGetRootFileNode( _fs, 0 );
fn = (CvFileNode*)cvGetSeqElem( fn->data.seq, 0 );
fn = cvGetFileNodeByName( _fs, fn, "run_params" );
CvSeq* dataSetNamesSeq = cvGetFileNodeByName( _fs, fn, modelName.c_str() )->data.seq;
test_case_count = dataSetNamesSeq ? dataSetNamesSeq->total : -1;
FileNode fn = _fs.getFirstTopLevelNode()["run_params"][modelName];
test_case_count = (int)fn.size();
if( test_case_count <= 0 )
test_case_count = -1;
if( test_case_count > 0 )
{
dataSetNames.resize( test_case_count );
vector<string>::iterator it = dataSetNames.begin();
for( int i = 0; i < test_case_count; i++, it++ )
*it = ((CvFileNode*)cvGetSeqElem( dataSetNamesSeq, i ))->data.str.ptr;
FileNodeIterator it = fn.begin();
for( int i = 0; i < test_case_count; i++, ++it )
{
dataSetNames[i] = (string)*it;
}
}
}
return cvtest::TS::OK;;
@ -547,8 +286,6 @@ void CV_MLBaseTest::run( int )
int CV_MLBaseTest::prepare_test_case( int test_case_idx )
{
int trainSampleCount, respIdx;
String varTypes;
clear();
string dataPath = ts->get_data_path();
@ -560,30 +297,27 @@ int CV_MLBaseTest::prepare_test_case( int test_case_idx )
string dataName = dataSetNames[test_case_idx],
filename = dataPath + dataName + ".data";
if ( data.read_csv( filename.c_str() ) != 0)
{
char msg[100];
sprintf( msg, "file %s can not be read", filename.c_str() );
ts->printf( cvtest::TS::LOG, msg );
return cvtest::TS::FAIL_INVALID_TEST_DATA;
}
FileNode dataParamsNode = validationFS.getFirstTopLevelNode()["validation"][modelName][dataName]["data_params"];
CV_DbgAssert( !dataParamsNode.empty() );
CV_DbgAssert( !dataParamsNode["LS"].empty() );
dataParamsNode["LS"] >> trainSampleCount;
CvTrainTestSplit spl( trainSampleCount );
data.set_train_test_split( &spl );
int trainSampleCount = (int)dataParamsNode["LS"];
CV_DbgAssert( !dataParamsNode["resp_idx"].empty() );
dataParamsNode["resp_idx"] >> respIdx;
data.set_response_idx( respIdx );
int respIdx = (int)dataParamsNode["resp_idx"];
CV_DbgAssert( !dataParamsNode["types"].empty() );
dataParamsNode["types"] >> varTypes;
data.set_var_types( varTypes.c_str() );
String varTypes = (String)dataParamsNode["types"];
data = TrainData::loadFromCSV(filename, 0, respIdx, respIdx+1, varTypes);
if( data.empty() )
{
ts->printf( cvtest::TS::LOG, "file %s can not be read\n", filename.c_str() );
return cvtest::TS::FAIL_INVALID_TEST_DATA;
}
data->setTrainTestSplit(trainSampleCount);
return cvtest::TS::OK;
}
@ -598,114 +332,97 @@ int CV_MLBaseTest::train( int testCaseIdx )
FileNode modelParamsNode =
validationFS.getFirstTopLevelNode()["validation"][modelName][dataSetNames[testCaseIdx]]["model_params"];
if( !modelName.compare(CV_NBAYES) )
is_trained = nbayes_train( nbayes, &data );
else if( !modelName.compare(CV_KNEAREST) )
if( modelName == CV_NBAYES )
model = NormalBayesClassifier::create();
else if( modelName == CV_KNEAREST )
{
assert( 0 );
//is_trained = knearest->train( &data );
model = KNearest::create();
}
else if( !modelName.compare(CV_SVM) )
else if( modelName == CV_SVM )
{
String svm_type_str, kernel_type_str;
modelParamsNode["svm_type"] >> svm_type_str;
modelParamsNode["kernel_type"] >> kernel_type_str;
CvSVMParams params;
params.svm_type = str_to_svm_type( svm_type_str );
params.kernel_type = str_to_svm_kernel_type( kernel_type_str );
SVM::Params params;
params.svmType = str_to_svm_type( svm_type_str );
params.kernelType = str_to_svm_kernel_type( kernel_type_str );
modelParamsNode["degree"] >> params.degree;
modelParamsNode["gamma"] >> params.gamma;
modelParamsNode["coef0"] >> params.coef0;
modelParamsNode["C"] >> params.C;
modelParamsNode["nu"] >> params.nu;
modelParamsNode["p"] >> params.p;
is_trained = svm_train( svm, &data, params );
model = SVM::create(params);
}
else if( !modelName.compare(CV_EM) )
else if( modelName == CV_EM )
{
assert( 0 );
}
else if( !modelName.compare(CV_ANN) )
else if( modelName == CV_ANN )
{
String train_method_str;
double param1, param2;
modelParamsNode["train_method"] >> train_method_str;
modelParamsNode["param1"] >> param1;
modelParamsNode["param2"] >> param2;
Mat new_responses;
ann_get_new_responses( &data, new_responses, cls_map );
int layer_sz[] = { data.get_values()->cols - 1, 100, 100, (int)cls_map.size() };
CvMat layer_sizes =
cvMat( 1, (int)(sizeof(layer_sz)/sizeof(layer_sz[0])), CV_32S, layer_sz );
ann->create( &layer_sizes );
is_trained = ann_train( ann, &data, new_responses, CvANN_MLP_TrainParams(cvTermCriteria(CV_TERMCRIT_ITER,300,0.01),
str_to_ann_train_method(train_method_str), param1, param2) ) >= 0;
Mat new_responses = ann_get_new_responses( data, cls_map );
// binarize the responses
data = TrainData::create(data->getSamples(), data->getLayout(), new_responses,
data->getVarIdx(), data->getTrainSampleIdx());
int layer_sz[] = { data->getNAllVars(), 100, 100, (int)cls_map.size() };
Mat layer_sizes( 1, (int)(sizeof(layer_sz)/sizeof(layer_sz[0])), CV_32S, layer_sz );
model = ANN_MLP::create(layer_sizes, ANN_MLP::Params(TermCriteria(TermCriteria::COUNT,300,0.01),
str_to_ann_train_method(train_method_str), param1, param2));
}
else if( !modelName.compare(CV_DTREE) )
else if( modelName == CV_DTREE )
{
int MAX_DEPTH, MIN_SAMPLE_COUNT, MAX_CATEGORIES, CV_FOLDS;
float REG_ACCURACY = 0;
bool USE_SURROGATE, IS_PRUNED;
bool USE_SURROGATE = false, IS_PRUNED;
modelParamsNode["max_depth"] >> MAX_DEPTH;
modelParamsNode["min_sample_count"] >> MIN_SAMPLE_COUNT;
modelParamsNode["use_surrogate"] >> USE_SURROGATE;
//modelParamsNode["use_surrogate"] >> USE_SURROGATE;
modelParamsNode["max_categories"] >> MAX_CATEGORIES;
modelParamsNode["cv_folds"] >> CV_FOLDS;
modelParamsNode["is_pruned"] >> IS_PRUNED;
is_trained = dtree->train( &data,
CvDTreeParams(MAX_DEPTH, MIN_SAMPLE_COUNT, REG_ACCURACY, USE_SURROGATE,
MAX_CATEGORIES, CV_FOLDS, false, IS_PRUNED, 0 )) != 0;
model = DTrees::create(DTrees::Params(MAX_DEPTH, MIN_SAMPLE_COUNT, REG_ACCURACY, USE_SURROGATE,
MAX_CATEGORIES, CV_FOLDS, false, IS_PRUNED, Mat() ));
}
else if( !modelName.compare(CV_BOOST) )
else if( modelName == CV_BOOST )
{
int BOOST_TYPE, WEAK_COUNT, MAX_DEPTH;
float WEIGHT_TRIM_RATE;
bool USE_SURROGATE;
bool USE_SURROGATE = false;
String typeStr;
modelParamsNode["type"] >> typeStr;
BOOST_TYPE = str_to_boost_type( typeStr );
modelParamsNode["weak_count"] >> WEAK_COUNT;
modelParamsNode["weight_trim_rate"] >> WEIGHT_TRIM_RATE;
modelParamsNode["max_depth"] >> MAX_DEPTH;
modelParamsNode["use_surrogate"] >> USE_SURROGATE;
is_trained = boost->train( &data,
CvBoostParams(BOOST_TYPE, WEAK_COUNT, WEIGHT_TRIM_RATE, MAX_DEPTH, USE_SURROGATE, 0) ) != 0;
//modelParamsNode["use_surrogate"] >> USE_SURROGATE;
model = Boost::create( Boost::Params(BOOST_TYPE, WEAK_COUNT, WEIGHT_TRIM_RATE, MAX_DEPTH, USE_SURROGATE, Mat()) );
}
else if( !modelName.compare(CV_RTREES) )
else if( modelName == CV_RTREES )
{
int MAX_DEPTH, MIN_SAMPLE_COUNT, MAX_CATEGORIES, CV_FOLDS, NACTIVE_VARS, MAX_TREES_NUM;
float REG_ACCURACY = 0, OOB_EPS = 0.0;
bool USE_SURROGATE, IS_PRUNED;
bool USE_SURROGATE = false, IS_PRUNED;
modelParamsNode["max_depth"] >> MAX_DEPTH;
modelParamsNode["min_sample_count"] >> MIN_SAMPLE_COUNT;
modelParamsNode["use_surrogate"] >> USE_SURROGATE;
//modelParamsNode["use_surrogate"] >> USE_SURROGATE;
modelParamsNode["max_categories"] >> MAX_CATEGORIES;
modelParamsNode["cv_folds"] >> CV_FOLDS;
modelParamsNode["is_pruned"] >> IS_PRUNED;
modelParamsNode["nactive_vars"] >> NACTIVE_VARS;
modelParamsNode["max_trees_num"] >> MAX_TREES_NUM;
is_trained = rtrees->train( &data, CvRTParams( MAX_DEPTH, MIN_SAMPLE_COUNT, REG_ACCURACY,
USE_SURROGATE, MAX_CATEGORIES, 0, true, // (calc_var_importance == true) <=> RF processes variable importance
NACTIVE_VARS, MAX_TREES_NUM, OOB_EPS, CV_TERMCRIT_ITER)) != 0;
}
else if( !modelName.compare(CV_ERTREES) )
{
int MAX_DEPTH, MIN_SAMPLE_COUNT, MAX_CATEGORIES, CV_FOLDS, NACTIVE_VARS, MAX_TREES_NUM;
float REG_ACCURACY = 0, OOB_EPS = 0.0;
bool USE_SURROGATE, IS_PRUNED;
modelParamsNode["max_depth"] >> MAX_DEPTH;
modelParamsNode["min_sample_count"] >> MIN_SAMPLE_COUNT;
modelParamsNode["use_surrogate"] >> USE_SURROGATE;
modelParamsNode["max_categories"] >> MAX_CATEGORIES;
modelParamsNode["cv_folds"] >> CV_FOLDS;
modelParamsNode["is_pruned"] >> IS_PRUNED;
modelParamsNode["nactive_vars"] >> NACTIVE_VARS;
modelParamsNode["max_trees_num"] >> MAX_TREES_NUM;
is_trained = ertrees->train( &data, CvRTParams( MAX_DEPTH, MIN_SAMPLE_COUNT, REG_ACCURACY,
USE_SURROGATE, MAX_CATEGORIES, 0, false, // (calc_var_importance == true) <=> RF processes variable importance
NACTIVE_VARS, MAX_TREES_NUM, OOB_EPS, CV_TERMCRIT_ITER)) != 0;
model = RTrees::create(RTrees::Params( MAX_DEPTH, MIN_SAMPLE_COUNT, REG_ACCURACY,
USE_SURROGATE, MAX_CATEGORIES, Mat(), true, // (calc_var_importance == true) <=> RF processes variable importance
NACTIVE_VARS, TermCriteria(TermCriteria::COUNT, MAX_TREES_NUM, OOB_EPS)));
}
if( !model.empty() )
is_trained = model->train(data, 0);
if( !is_trained )
{
ts->printf( cvtest::TS::LOG, "in test case %d model training was failed", testCaseIdx );
@ -714,78 +431,46 @@ int CV_MLBaseTest::train( int testCaseIdx )
return cvtest::TS::OK;
}
float CV_MLBaseTest::get_error( int /*testCaseIdx*/, int type, vector<float> *resp )
float CV_MLBaseTest::get_test_error( int /*testCaseIdx*/, vector<float> *resp )
{
int type = CV_TEST_ERROR;
float err = 0;
if( !modelName.compare(CV_NBAYES) )
err = nbayes_calc_error( nbayes, &data, type, resp );
else if( !modelName.compare(CV_KNEAREST) )
{
assert( 0 );
/*testCaseIdx = 0;
int k = 2;
validationFS.getFirstTopLevelNode()["validation"][modelName][dataSetNames[testCaseIdx]]["model_params"]["k"] >> k;
err = knearest->calc_error( &data, k, type, resp );*/
}
else if( !modelName.compare(CV_SVM) )
err = svm_calc_error( svm, &data, type, resp );
else if( !modelName.compare(CV_EM) )
Mat _resp;
if( modelName == CV_EM )
assert( 0 );
else if( !modelName.compare(CV_ANN) )
err = ann_calc_error( ann, &data, cls_map, type, resp );
else if( !modelName.compare(CV_DTREE) )
err = dtree->calc_error( &data, type, resp );
else if( !modelName.compare(CV_BOOST) )
err = boost->calc_error( &data, type, resp );
else if( !modelName.compare(CV_RTREES) )
err = rtrees->calc_error( &data, type, resp );
else if( !modelName.compare(CV_ERTREES) )
err = ertrees->calc_error( &data, type, resp );
else if( modelName == CV_ANN )
err = ann_calc_error( model, data, cls_map, type, resp );
else if( modelName == CV_DTREE || modelName == CV_BOOST || modelName == CV_RTREES ||
modelName == CV_SVM || modelName == CV_NBAYES || modelName == CV_KNEAREST )
err = model->calcError( data, true, _resp );
if( !_resp.empty() && resp )
_resp.convertTo(*resp, CV_32F);
return err;
}
void CV_MLBaseTest::save( const char* filename )
{
if( !modelName.compare(CV_NBAYES) )
nbayes->save( filename );
else if( !modelName.compare(CV_KNEAREST) )
knearest->save( filename );
else if( !modelName.compare(CV_SVM) )
svm->save( filename );
else if( !modelName.compare(CV_ANN) )
ann->save( filename );
else if( !modelName.compare(CV_DTREE) )
dtree->save( filename );
else if( !modelName.compare(CV_BOOST) )
boost->save( filename );
else if( !modelName.compare(CV_RTREES) )
rtrees->save( filename );
else if( !modelName.compare(CV_ERTREES) )
ertrees->save( filename );
model->save( filename );
}
void CV_MLBaseTest::load( const char* filename )
{
if( !modelName.compare(CV_NBAYES) )
nbayes->load( filename );
else if( !modelName.compare(CV_KNEAREST) )
knearest->load( filename );
else if( !modelName.compare(CV_SVM) )
{
delete svm;
svm = new CvSVM;
svm->load( filename );
}
else if( !modelName.compare(CV_ANN) )
ann->load( filename );
else if( !modelName.compare(CV_DTREE) )
dtree->load( filename );
else if( !modelName.compare(CV_BOOST) )
boost->load( filename );
else if( !modelName.compare(CV_RTREES) )
rtrees->load( filename );
else if( !modelName.compare(CV_ERTREES) )
ertrees->load( filename );
if( modelName == CV_NBAYES )
model = StatModel::load<NormalBayesClassifier>( filename );
else if( modelName == CV_KNEAREST )
model = StatModel::load<KNearest>( filename );
else if( modelName == CV_SVM )
model = StatModel::load<SVM>( filename );
else if( modelName == CV_ANN )
model = StatModel::load<ANN_MLP>( filename );
else if( modelName == CV_DTREE )
model = StatModel::load<DTrees>( filename );
else if( modelName == CV_BOOST )
model = StatModel::load<Boost>( filename );
else if( modelName == CV_RTREES )
model = StatModel::load<RTrees>( filename );
else
CV_Error( CV_StsNotImplemented, "invalid stat model name");
}
/* End of file. */

@ -25,6 +25,20 @@
#define CV_RTREES "rtrees"
#define CV_ERTREES "ertrees"
enum { CV_TRAIN_ERROR=0, CV_TEST_ERROR=1 };
using cv::Ptr;
using cv::ml::StatModel;
using cv::ml::TrainData;
using cv::ml::NormalBayesClassifier;
using cv::ml::SVM;
using cv::ml::KNearest;
using cv::ml::ParamGrid;
using cv::ml::ANN_MLP;
using cv::ml::DTrees;
using cv::ml::Boost;
using cv::ml::RTrees;
class CV_MLBaseTest : public cvtest::BaseTest
{
public:
@ -39,24 +53,16 @@ protected:
virtual int validate_test_results( int testCaseIdx ) = 0;
int train( int testCaseIdx );
float get_error( int testCaseIdx, int type, std::vector<float> *resp = 0 );
float get_test_error( int testCaseIdx, std::vector<float> *resp = 0 );
void save( const char* filename );
void load( const char* filename );
CvMLData data;
Ptr<TrainData> data;
std::string modelName, validationFN;
std::vector<std::string> dataSetNames;
cv::FileStorage validationFS;
// MLL models
CvNormalBayesClassifier* nbayes;
CvKNearest* knearest;
CvSVM* svm;
CvANN_MLP* ann;
CvDTree* dtree;
CvBoost* boost;
CvRTrees* rtrees;
CvERTrees* ertrees;
Ptr<StatModel> model;
std::map<int, int> cls_map;
@ -67,6 +73,7 @@ class CV_AMLTest : public CV_MLBaseTest
{
public:
CV_AMLTest( const char* _modelName );
virtual ~CV_AMLTest() {}
protected:
virtual int run_test_case( int testCaseIdx );
virtual int validate_test_results( int testCaseIdx );
@ -76,6 +83,7 @@ class CV_SLMLTest : public CV_MLBaseTest
{
public:
CV_SLMLTest( const char* _modelName );
virtual ~CV_SLMLTest() {}
protected:
virtual int run_test_case( int testCaseIdx );
virtual int validate_test_results( int testCaseIdx );

@ -59,15 +59,15 @@ int CV_SLMLTest::run_test_case( int testCaseIdx )
if( code == cvtest::TS::OK )
{
data.mix_train_and_test_idx();
data->setTrainTestSplit(data->getNTrainSamples(), true);
code = train( testCaseIdx );
if( code == cvtest::TS::OK )
{
get_error( testCaseIdx, CV_TEST_ERROR, &test_resps1 );
get_test_error( testCaseIdx, &test_resps1 );
fname1 = tempfile(".yml.gz");
save( fname1.c_str() );
load( fname1.c_str() );
get_error( testCaseIdx, CV_TEST_ERROR, &test_resps2 );
get_test_error( testCaseIdx, &test_resps2 );
fname2 = tempfile(".yml.gz");
save( fname2.c_str() );
}
@ -130,6 +130,8 @@ int CV_SLMLTest::validate_test_results( int testCaseIdx )
remove( fname2.c_str() );
}
if( code >= 0 )
{
// 2. compare responses
CV_Assert( test_resps1.size() == test_resps2.size() );
vector<float>::const_iterator it1 = test_resps1.begin(), it2 = test_resps2.begin();
@ -139,6 +141,8 @@ int CV_SLMLTest::validate_test_results( int testCaseIdx )
{
ts->printf( cvtest::TS::LOG, "in test case %d responses predicted before saving and after loading is different", testCaseIdx );
code = cvtest::TS::FAIL_INVALID_OUTPUT;
break;
}
}
}
return code;
@ -152,40 +156,41 @@ TEST(ML_ANN, save_load) { CV_SLMLTest test( CV_ANN ); test.safe_run(); }
TEST(ML_DTree, save_load) { CV_SLMLTest test( CV_DTREE ); test.safe_run(); }
TEST(ML_Boost, save_load) { CV_SLMLTest test( CV_BOOST ); test.safe_run(); }
TEST(ML_RTrees, save_load) { CV_SLMLTest test( CV_RTREES ); test.safe_run(); }
TEST(ML_ERTrees, save_load) { CV_SLMLTest test( CV_ERTREES ); test.safe_run(); }
TEST(DISABLED_ML_ERTrees, save_load) { CV_SLMLTest test( CV_ERTREES ); test.safe_run(); }
TEST(ML_SVM, throw_exception_when_save_untrained_model)
/*TEST(ML_SVM, throw_exception_when_save_untrained_model)
{
SVM svm;
Ptr<cv::ml::SVM> svm;
string filename = tempfile("svm.xml");
ASSERT_THROW(svm.save(filename.c_str()), Exception);
remove(filename.c_str());
}
}*/
TEST(DISABLED_ML_SVM, linear_save_load)
{
CvSVM svm1, svm2, svm3;
svm1.load("SVM45_X_38-1.xml");
svm2.load("SVM45_X_38-2.xml");
Ptr<cv::ml::SVM> svm1, svm2, svm3;
svm1 = StatModel::load<SVM>("SVM45_X_38-1.xml");
svm2 = StatModel::load<SVM>("SVM45_X_38-2.xml");
string tname = tempfile("a.xml");
svm2.save(tname.c_str());
svm3.load(tname.c_str());
svm2->save(tname);
svm3 = StatModel::load<SVM>(tname);
ASSERT_EQ(svm1.get_var_count(), svm2.get_var_count());
ASSERT_EQ(svm1.get_var_count(), svm3.get_var_count());
ASSERT_EQ(svm1->getVarCount(), svm2->getVarCount());
ASSERT_EQ(svm1->getVarCount(), svm3->getVarCount());
int m = 10000, n = svm1.get_var_count();
int m = 10000, n = svm1->getVarCount();
Mat samples(m, n, CV_32F), r1, r2, r3;
randu(samples, 0., 1.);
svm1.predict(samples, r1);
svm2.predict(samples, r2);
svm3.predict(samples, r3);
svm1->predict(samples, r1);
svm2->predict(samples, r2);
svm3->predict(samples, r3);
double eps = 1e-4;
EXPECT_LE(cvtest::norm(r1, r2, NORM_INF), eps);
EXPECT_LE(cvtest::norm(r1, r3, NORM_INF), eps);
EXPECT_LE(norm(r1, r2, NORM_INF), eps);
EXPECT_LE(norm(r1, r3, NORM_INF), eps);
remove(tname.c_str());
}

Loading…
Cancel
Save