From 5ff1fababc053063212f44e0b0bb49ee3794a393 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Mon, 25 Nov 2019 20:03:16 +0000 Subject: [PATCH] Merge pull request #15959 from mshabunin:refactor-ml-tests ml: refactored tests * use parametrized tests where appropriate * use stable theRNG in most tests * use modern style with EXPECT_/ASSERT_ checks --- modules/ml/test/test_ann.cpp | 200 ++++++ modules/ml/test/test_bayes.cpp | 56 ++ modules/ml/test/test_em.cpp | 186 +++++ modules/ml/test/test_emknearestkmeans.cpp | 727 -------------------- modules/ml/test/test_gbttest.cpp | 286 -------- modules/ml/test/test_kmeans.cpp | 53 ++ modules/ml/test/test_knearest.cpp | 77 +++ modules/ml/test/test_lr.cpp | 190 +----- modules/ml/test/test_mltests.cpp | 507 +++++++++----- modules/ml/test/test_mltests2.cpp | 794 ---------------------- modules/ml/test/test_precomp.hpp | 65 +- modules/ml/test/test_rtrees.cpp | 54 ++ modules/ml/test/test_save_load.cpp | 350 +++------- modules/ml/test/test_svmsgd.cpp | 296 ++------ modules/ml/test/test_svmtrainauto.cpp | 183 ++--- modules/ml/test/test_utils.cpp | 189 +++++ 16 files changed, 1387 insertions(+), 2826 deletions(-) create mode 100644 modules/ml/test/test_ann.cpp create mode 100644 modules/ml/test/test_bayes.cpp create mode 100644 modules/ml/test/test_em.cpp delete mode 100644 modules/ml/test/test_emknearestkmeans.cpp delete mode 100644 modules/ml/test/test_gbttest.cpp create mode 100644 modules/ml/test/test_kmeans.cpp create mode 100644 modules/ml/test/test_knearest.cpp delete mode 100644 modules/ml/test/test_mltests2.cpp create mode 100644 modules/ml/test/test_rtrees.cpp create mode 100644 modules/ml/test/test_utils.cpp diff --git a/modules/ml/test/test_ann.cpp b/modules/ml/test/test_ann.cpp new file mode 100644 index 0000000000..1ab4105de7 --- /dev/null +++ b/modules/ml/test/test_ann.cpp @@ -0,0 +1,200 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +// #define GENERATE_TESTDATA + +namespace opencv_test { namespace { + +struct Activation +{ + int id; + const char * name; +}; +void PrintTo(const Activation &a, std::ostream *os) { *os << a.name; } + +Activation activation_list[] = +{ + { ml::ANN_MLP::IDENTITY, "identity" }, + { ml::ANN_MLP::SIGMOID_SYM, "sigmoid_sym" }, + { ml::ANN_MLP::GAUSSIAN, "gaussian" }, + { ml::ANN_MLP::RELU, "relu" }, + { ml::ANN_MLP::LEAKYRELU, "leakyrelu" }, +}; + +typedef testing::TestWithParam< Activation > ML_ANN_Params; + +TEST_P(ML_ANN_Params, ActivationFunction) +{ + const Activation &activation = GetParam(); + const string dataname = "waveform"; + const string data_path = findDataFile(dataname + ".data"); + const string model_name = dataname + "_" + activation.name + ".yml"; + + Ptr tdata = TrainData::loadFromCSV(data_path, 0); + ASSERT_FALSE(tdata.empty()); + + // hack? + const uint64 old_state = theRNG().state; + theRNG().state = 1027401484159173092; + tdata->setTrainTestSplit(500); + theRNG().state = old_state; + + Mat_ layerSizes(1, 4); + layerSizes(0, 0) = tdata->getNVars(); + layerSizes(0, 1) = 100; + layerSizes(0, 2) = 100; + layerSizes(0, 3) = tdata->getResponses().cols; + + Mat testSamples = tdata->getTestSamples(); + Mat rx, ry; + + { + Ptr x = ml::ANN_MLP::create(); + x->setActivationFunction(activation.id); + x->setLayerSizes(layerSizes); + x->setTrainMethod(ml::ANN_MLP::RPROP, 0.01, 0.1); + x->setTermCriteria(TermCriteria(TermCriteria::COUNT, 300, 0.01)); + x->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE); + ASSERT_TRUE(x->isTrained()); + x->predict(testSamples, rx); +#ifdef GENERATE_TESTDATA + x->save(cvtest::TS::ptr()->get_data_path() + model_name); +#endif + } + + { + const string model_path = findDataFile(model_name); + Ptr y = Algorithm::load(model_path); + ASSERT_TRUE(y); + y->predict(testSamples, ry); + EXPECT_MAT_NEAR(rx, ry, FLT_EPSILON); + } +} + +INSTANTIATE_TEST_CASE_P(/**/, ML_ANN_Params, testing::ValuesIn(activation_list)); + +//================================================================================================== + +CV_ENUM(ANN_MLP_METHOD, ANN_MLP::RPROP, ANN_MLP::ANNEAL) + +typedef tuple ML_ANN_METHOD_Params; +typedef TestWithParam ML_ANN_METHOD; + +TEST_P(ML_ANN_METHOD, Test) +{ + int methodType = get<0>(GetParam()); + string methodName = get<1>(GetParam()); + int N = get<2>(GetParam()); + + String folder = string(cvtest::TS::ptr()->get_data_path()); + String original_path = findDataFile("waveform.data"); + string dataname = "waveform_" + methodName; + string weight_name = dataname + "_init_weight.yml.gz"; + string model_name = dataname + ".yml.gz"; + string response_name = dataname + "_response.yml.gz"; + + Ptr tdata2 = TrainData::loadFromCSV(original_path, 0); + ASSERT_FALSE(tdata2.empty()); + + Mat samples = tdata2->getSamples()(Range(0, N), Range::all()); + Mat responses(N, 3, CV_32FC1, Scalar(0)); + for (int i = 0; i < N; i++) + responses.at(i, static_cast(tdata2->getResponses().at(i, 0))) = 1; + + Ptr tdata = TrainData::create(samples, ml::ROW_SAMPLE, responses); + ASSERT_FALSE(tdata.empty()); + + // hack? + const uint64 old_state = theRNG().state; + theRNG().state = 0; + tdata->setTrainTestSplitRatio(0.8); + theRNG().state = old_state; + + Mat testSamples = tdata->getTestSamples(); + + // train 1st stage + + Ptr xx = ml::ANN_MLP_ANNEAL::create(); + Mat_ layerSizes(1, 4); + layerSizes(0, 0) = tdata->getNVars(); + layerSizes(0, 1) = 30; + layerSizes(0, 2) = 30; + layerSizes(0, 3) = tdata->getResponses().cols; + xx->setLayerSizes(layerSizes); + xx->setActivationFunction(ml::ANN_MLP::SIGMOID_SYM); + xx->setTrainMethod(ml::ANN_MLP::RPROP); + xx->setTermCriteria(TermCriteria(TermCriteria::COUNT, 1, 0.01)); + xx->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE + ml::ANN_MLP::NO_INPUT_SCALE); +#ifdef GENERATE_TESTDATA + { + FileStorage fs; + fs.open(cvtest::TS::ptr()->get_data_path() + weight_name, FileStorage::WRITE + FileStorage::BASE64); + xx->write(fs); + } +#endif + + // train 2nd stage + Mat r_gold; + Ptr x = ml::ANN_MLP_ANNEAL::create(); + { + const string weight_file = findDataFile(weight_name); + FileStorage fs; + fs.open(weight_file, FileStorage::READ); + x->read(fs.root()); + } + x->setTrainMethod(methodType); + if (methodType == ml::ANN_MLP::ANNEAL) + { + x->setAnnealEnergyRNG(RNG(CV_BIG_INT(0xffffffff))); + x->setAnnealInitialT(12); + x->setAnnealFinalT(0.15); + x->setAnnealCoolingRatio(0.96); + x->setAnnealItePerStep(11); + } + x->setTermCriteria(TermCriteria(TermCriteria::COUNT, 100, 0.01)); + x->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE + ml::ANN_MLP::NO_INPUT_SCALE + ml::ANN_MLP::UPDATE_WEIGHTS); + ASSERT_TRUE(x->isTrained()); +#ifdef GENERATE_TESTDATA + x->save(cvtest::TS::ptr()->get_data_path() + model_name); + x->predict(testSamples, r_gold); + { + FileStorage fs_response(cvtest::TS::ptr()->get_data_path() + response_name, FileStorage::WRITE + FileStorage::BASE64); + fs_response << "response" << r_gold; + } +#endif + { + const string response_file = findDataFile(response_name); + FileStorage fs_response(response_file, FileStorage::READ); + fs_response["response"] >> r_gold; + } + ASSERT_FALSE(r_gold.empty()); + + // verify + const string model_file = findDataFile(model_name); + Ptr y = Algorithm::load(model_file); + ASSERT_TRUE(y); + Mat rx, ry; + for (int j = 0; j < 4; j++) + { + rx = x->getWeights(j); + ry = y->getWeights(j); + EXPECT_MAT_NEAR(rx, ry, FLT_EPSILON) << "Weights are not equal for layer: " << j; + } + x->predict(testSamples, rx); + y->predict(testSamples, ry); + EXPECT_MAT_NEAR(ry, rx, FLT_EPSILON) << "Predict are not equal to result of the saved model"; + EXPECT_MAT_NEAR(r_gold, rx, FLT_EPSILON) << "Predict are not equal to 'gold' response"; +} + +INSTANTIATE_TEST_CASE_P(/*none*/, ML_ANN_METHOD, + testing::Values( + ML_ANN_METHOD_Params(ml::ANN_MLP::RPROP, "rprop", 5000), + ML_ANN_METHOD_Params(ml::ANN_MLP::ANNEAL, "anneal", 1000) + // ML_ANN_METHOD_Params(ml::ANN_MLP::BACKPROP, "backprop", 500) -----> NO BACKPROP TEST + ) +); + +}} // namespace diff --git a/modules/ml/test/test_bayes.cpp b/modules/ml/test/test_bayes.cpp new file mode 100644 index 0000000000..07ff8b2a36 --- /dev/null +++ b/modules/ml/test/test_bayes.cpp @@ -0,0 +1,56 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +TEST(ML_NBAYES, regression_5911) +{ + int N=12; + Ptr nb = cv::ml::NormalBayesClassifier::create(); + + // data: + float X_data[] = { + 1,2,3,4, 1,2,3,4, 1,2,3,4, 1,2,3,4, + 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5, + 4,3,2,1, 4,3,2,1, 4,3,2,1, 4,3,2,1 + }; + Mat_ X(N, 4, X_data); + + // labels: + int Y_data[] = { 0,0,0,0, 1,1,1,1, 2,2,2,2 }; + Mat_ Y(N, 1, Y_data); + + nb->train(X, ml::ROW_SAMPLE, Y); + + // single prediction: + Mat R1,P1; + for (int i=0; ipredictProb(X.row(i), r, p); + R1.push_back(r); + P1.push_back(p); + } + + // bulk prediction (continuous memory): + Mat R2,P2; + nb->predictProb(X, R2, P2); + + EXPECT_EQ(255 * R2.total(), sum(R1 == R2)[0]); + EXPECT_EQ(255 * P2.total(), sum(P1 == P2)[0]); + + // bulk prediction, with non-continuous memory storage + Mat R3_(N, 1+1, CV_32S), + P3_(N, 3+1, CV_32F); + nb->predictProb(X, R3_.col(0), P3_.colRange(0,3)); + Mat R3 = R3_.col(0).clone(), + P3 = P3_.colRange(0,3).clone(); + + EXPECT_EQ(255 * R3.total(), sum(R1 == R3)[0]); + EXPECT_EQ(255 * P3.total(), sum(P1 == P3)[0]); +} + +}} // namespace diff --git a/modules/ml/test/test_em.cpp b/modules/ml/test/test_em.cpp new file mode 100644 index 0000000000..373385d406 --- /dev/null +++ b/modules/ml/test/test_em.cpp @@ -0,0 +1,186 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +CV_ENUM(EM_START_STEP, EM::START_AUTO_STEP, EM::START_M_STEP, EM::START_E_STEP) +CV_ENUM(EM_COV_MAT, EM::COV_MAT_GENERIC, EM::COV_MAT_DIAGONAL, EM::COV_MAT_SPHERICAL) + +typedef testing::TestWithParam< tuple > ML_EM_Params; + +TEST_P(ML_EM_Params, accuracy) +{ + const int nclusters = 3; + const int sizesArr[] = { 500, 700, 800 }; + const vector sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) ); + const int pointsCount = sizesArr[0] + sizesArr[1] + sizesArr[2]; + Mat means; + vector covs; + defaultDistribs( means, covs, CV_64FC1 ); + Mat trainData(pointsCount, 2, CV_64FC1 ); + Mat trainLabels; + generateData( trainData, trainLabels, sizes, means, covs, CV_64FC1, CV_32SC1 ); + Mat testData( pointsCount, 2, CV_64FC1 ); + Mat testLabels; + generateData( testData, testLabels, sizes, means, covs, CV_64FC1, CV_32SC1 ); + Mat probs(trainData.rows, nclusters, CV_64FC1, cv::Scalar(1)); + Mat weights(1, nclusters, CV_64FC1, cv::Scalar(1)); + TermCriteria termCrit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 100, FLT_EPSILON); + int startStep = get<0>(GetParam()); + int covMatType = get<1>(GetParam()); + cv::Mat labels; + + Ptr em = EM::create(); + em->setClustersNumber(nclusters); + em->setCovarianceMatrixType(covMatType); + em->setTermCriteria(termCrit); + if( startStep == EM::START_AUTO_STEP ) + em->trainEM( trainData, noArray(), labels, noArray() ); + else if( startStep == EM::START_E_STEP ) + em->trainE( trainData, means, covs, weights, noArray(), labels, noArray() ); + else if( startStep == EM::START_M_STEP ) + em->trainM( trainData, probs, noArray(), labels, noArray() ); + + { + SCOPED_TRACE("Train"); + float err = 1000; + EXPECT_TRUE(calcErr( labels, trainLabels, sizes, err , false, false )); + EXPECT_LE(err, 0.008f); + } + + { + SCOPED_TRACE("Test"); + float err = 1000; + labels.create( testData.rows, 1, CV_32SC1 ); + for( int i = 0; i < testData.rows; i++ ) + { + Mat sample = testData.row(i); + Mat out_probs; + labels.at(i) = static_cast(em->predict2( sample, out_probs )[1]); + } + EXPECT_TRUE(calcErr( labels, testLabels, sizes, err, false, false )); + EXPECT_LE(err, 0.008f); + } +} + +INSTANTIATE_TEST_CASE_P(/**/, ML_EM_Params, + testing::Combine( + testing::Values(EM::START_AUTO_STEP, EM::START_M_STEP, EM::START_E_STEP), + testing::Values(EM::COV_MAT_GENERIC, EM::COV_MAT_DIAGONAL, EM::COV_MAT_SPHERICAL) + )); + +//================================================================================================== + +TEST(ML_EM, save_load) +{ + const int nclusters = 2; + Mat_ samples(3, 1); + samples << 1., 2., 3.; + + std::vector firstResult; + string filename = cv::tempfile(".xml"); + { + Mat labels; + Ptr em = EM::create(); + em->setClustersNumber(nclusters); + em->trainEM(samples, noArray(), labels, noArray()); + for( int i = 0; i < samples.rows; i++) + { + Vec2d res = em->predict2(samples.row(i), noArray()); + firstResult.push_back(res[1]); + } + { + FileStorage fs = FileStorage(filename, FileStorage::WRITE); + ASSERT_NO_THROW(fs << "em" << "{"); + ASSERT_NO_THROW(em->write(fs)); + ASSERT_NO_THROW(fs << "}"); + } + } + { + Ptr em; + ASSERT_NO_THROW(em = Algorithm::load(filename)); + for( int i = 0; i < samples.rows; i++) + { + SCOPED_TRACE(i); + Vec2d res = em->predict2(samples.row(i), noArray()); + EXPECT_DOUBLE_EQ(firstResult[i], res[1]); + } + } + remove(filename.c_str()); +} + +//================================================================================================== + +TEST(ML_EM, classification) +{ + // This test classifies spam by the following way: + // 1. estimates distributions of "spam" / "not spam" + // 2. predict classID using Bayes classifier for estimated distributions. + string dataFilename = findDataFile("spambase.data"); + Ptr data = TrainData::loadFromCSV(dataFilename, 0); + ASSERT_FALSE(data.empty()); + + Mat samples = data->getSamples(); + ASSERT_EQ(samples.cols, 57); + Mat responses = data->getResponses(); + + vector trainSamplesMask(samples.rows, 0); + const int trainSamplesCount = (int)(0.5f * samples.rows); + const int testSamplesCount = samples.rows - trainSamplesCount; + for(int i = 0; i < trainSamplesCount; i++) + trainSamplesMask[i] = 1; + RNG &rng = cv::theRNG(); + for(size_t i = 0; i < trainSamplesMask.size(); i++) + { + int i1 = rng(static_cast(trainSamplesMask.size())); + int i2 = rng(static_cast(trainSamplesMask.size())); + std::swap(trainSamplesMask[i1], trainSamplesMask[i2]); + } + + Mat samples0, samples1; + for(int i = 0; i < samples.rows; i++) + { + if(trainSamplesMask[i]) + { + Mat sample = samples.row(i); + int resp = (int)responses.at(i); + if(resp == 0) + samples0.push_back(sample); + else + samples1.push_back(sample); + } + } + + Ptr model0 = EM::create(); + model0->setClustersNumber(3); + model0->trainEM(samples0, noArray(), noArray(), noArray()); + + Ptr model1 = EM::create(); + model1->setClustersNumber(3); + model1->trainEM(samples1, noArray(), noArray(), noArray()); + + // confusion matrices + Mat_ trainCM(2, 2, 0); + Mat_ testCM(2, 2, 0); + const double lambda = 1.; + for(int i = 0; i < samples.rows; i++) + { + Mat sample = samples.row(i); + double sampleLogLikelihoods0 = model0->predict2(sample, noArray())[0]; + double sampleLogLikelihoods1 = model1->predict2(sample, noArray())[0]; + int classID = (sampleLogLikelihoods0 >= lambda * sampleLogLikelihoods1) ? 0 : 1; + int resp = (int)responses.at(i); + EXPECT_TRUE(resp == 0 || resp == 1); + if(trainSamplesMask[i]) + trainCM(resp, classID)++; + else + testCM(resp, classID)++; + } + EXPECT_LE((double)(trainCM(1,0) + trainCM(0,1)) / trainSamplesCount, 0.23); + EXPECT_LE((double)(testCM(1,0) + testCM(0,1)) / testSamplesCount, 0.26); +} + +}} // namespace diff --git a/modules/ml/test/test_emknearestkmeans.cpp b/modules/ml/test/test_emknearestkmeans.cpp deleted file mode 100644 index 744eef8a9b..0000000000 --- a/modules/ml/test/test_emknearestkmeans.cpp +++ /dev/null @@ -1,727 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "test_precomp.hpp" - -namespace opencv_test { namespace { - -using cv::ml::TrainData; -using cv::ml::EM; -using cv::ml::KNearest; - -void defaultDistribs( Mat& means, vector& covs, int type=CV_32FC1 ) -{ - CV_TRACE_FUNCTION(); - float mp0[] = {0.0f, 0.0f}, cp0[] = {0.67f, 0.0f, 0.0f, 0.67f}; - float mp1[] = {5.0f, 0.0f}, cp1[] = {1.0f, 0.0f, 0.0f, 1.0f}; - float mp2[] = {1.0f, 5.0f}, cp2[] = {1.0f, 0.0f, 0.0f, 1.0f}; - means.create(3, 2, type); - Mat m0( 1, 2, CV_32FC1, mp0 ), c0( 2, 2, CV_32FC1, cp0 ); - Mat m1( 1, 2, CV_32FC1, mp1 ), c1( 2, 2, CV_32FC1, cp1 ); - Mat m2( 1, 2, CV_32FC1, mp2 ), c2( 2, 2, CV_32FC1, cp2 ); - means.resize(3), covs.resize(3); - - Mat mr0 = means.row(0); - m0.convertTo(mr0, type); - c0.convertTo(covs[0], type); - - Mat mr1 = means.row(1); - m1.convertTo(mr1, type); - c1.convertTo(covs[1], type); - - Mat mr2 = means.row(2); - m2.convertTo(mr2, type); - c2.convertTo(covs[2], type); -} - -// generate points sets by normal distributions -void generateData( Mat& data, Mat& labels, const vector& sizes, const Mat& _means, const vector& covs, int dataType, int labelType ) -{ - CV_TRACE_FUNCTION(); - vector::const_iterator sit = sizes.begin(); - int total = 0; - for( ; sit != sizes.end(); ++sit ) - total += *sit; - CV_Assert( _means.rows == (int)sizes.size() && covs.size() == sizes.size() ); - CV_Assert( !data.empty() && data.rows == total ); - CV_Assert( data.type() == dataType ); - - labels.create( data.rows, 1, labelType ); - - randn( data, Scalar::all(-1.0), Scalar::all(1.0) ); - vector means(sizes.size()); - for(int i = 0; i < _means.rows; i++) - means[i] = _means.row(i); - vector::const_iterator mit = means.begin(), cit = covs.begin(); - int bi, ei = 0; - sit = sizes.begin(); - for( int p = 0, l = 0; sit != sizes.end(); ++sit, ++mit, ++cit, l++ ) - { - bi = ei; - ei = bi + *sit; - assert( mit->rows == 1 && mit->cols == data.cols ); - assert( cit->rows == data.cols && cit->cols == data.cols ); - for( int i = bi; i < ei; i++, p++ ) - { - Mat r = data.row(i); - r = r * (*cit) + *mit; - if( labelType == CV_32FC1 ) - labels.at(p, 0) = (float)l; - else if( labelType == CV_32SC1 ) - labels.at(p, 0) = l; - else - { - CV_DbgAssert(0); - } - } - } -} - -int maxIdx( const vector& count ) -{ - int idx = -1; - int maxVal = -1; - vector::const_iterator it = count.begin(); - for( int i = 0; it != count.end(); ++it, i++ ) - { - if( *it > maxVal) - { - maxVal = *it; - idx = i; - } - } - assert( idx >= 0); - return idx; -} - -bool getLabelsMap( const Mat& labels, const vector& sizes, vector& labelsMap, bool checkClusterUniq=true ) -{ - size_t total = 0, nclusters = sizes.size(); - for(size_t i = 0; i < sizes.size(); i++) - total += sizes[i]; - - assert( !labels.empty() ); - assert( labels.total() == total && (labels.cols == 1 || labels.rows == 1)); - assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 ); - - bool isFlt = labels.type() == CV_32FC1; - - labelsMap.resize(nclusters); - - vector buzy(nclusters, false); - int startIndex = 0; - for( size_t clusterIndex = 0; clusterIndex < sizes.size(); clusterIndex++ ) - { - vector count( nclusters, 0 ); - for( int i = startIndex; i < startIndex + sizes[clusterIndex]; i++) - { - int lbl = isFlt ? (int)labels.at(i) : labels.at(i); - CV_Assert(lbl < (int)nclusters); - count[lbl]++; - CV_Assert(count[lbl] < (int)total); - } - startIndex += sizes[clusterIndex]; - - int cls = maxIdx( count ); - CV_Assert( !checkClusterUniq || !buzy[cls] ); - - labelsMap[clusterIndex] = cls; - - buzy[cls] = true; - } - - if(checkClusterUniq) - { - for(size_t i = 0; i < buzy.size(); i++) - if(!buzy[i]) - return false; - } - - return true; -} - -bool calcErr( const Mat& labels, const Mat& origLabels, const vector& sizes, float& err, bool labelsEquivalent = true, bool checkClusterUniq=true ) -{ - err = 0; - CV_Assert( !labels.empty() && !origLabels.empty() ); - CV_Assert( labels.rows == 1 || labels.cols == 1 ); - CV_Assert( origLabels.rows == 1 || origLabels.cols == 1 ); - CV_Assert( labels.total() == origLabels.total() ); - CV_Assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 ); - CV_Assert( origLabels.type() == labels.type() ); - - vector labelsMap; - bool isFlt = labels.type() == CV_32FC1; - if( !labelsEquivalent ) - { - if( !getLabelsMap( labels, sizes, labelsMap, checkClusterUniq ) ) - return false; - - for( int i = 0; i < labels.rows; i++ ) - if( isFlt ) - err += labels.at(i) != labelsMap[(int)origLabels.at(i)] ? 1.f : 0.f; - else - err += labels.at(i) != labelsMap[origLabels.at(i)] ? 1.f : 0.f; - } - else - { - for( int i = 0; i < labels.rows; i++ ) - if( isFlt ) - err += labels.at(i) != origLabels.at(i) ? 1.f : 0.f; - else - err += labels.at(i) != origLabels.at(i) ? 1.f : 0.f; - } - err /= (float)labels.rows; - return true; -} - -//-------------------------------------------------------------------------------------------- -class CV_KMeansTest : public cvtest::BaseTest { -public: - CV_KMeansTest() {} -protected: - virtual void run( int start_from ); -}; - -void CV_KMeansTest::run( int /*start_from*/ ) -{ - CV_TRACE_FUNCTION(); - const int iters = 100; - int sizesArr[] = { 5000, 7000, 8000 }; - int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2]; - - Mat data( pointsCount, 2, CV_32FC1 ), labels; - vector sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) ); - Mat means; - vector covs; - defaultDistribs( means, covs ); - generateData( data, labels, sizes, means, covs, CV_32FC1, CV_32SC1 ); - - int code = cvtest::TS::OK; - float err; - Mat bestLabels; - // 1. flag==KMEANS_PP_CENTERS - kmeans( data, 3, bestLabels, TermCriteria( TermCriteria::COUNT, iters, 0.0), 0, KMEANS_PP_CENTERS, noArray() ); - if( !calcErr( bestLabels, labels, sizes, err , false ) ) - { - ts->printf( cvtest::TS::LOG, "Bad output labels if flag==KMEANS_PP_CENTERS.\n" ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - } - else if( err > 0.01f ) - { - ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) if flag==KMEANS_PP_CENTERS.\n", err ); - code = cvtest::TS::FAIL_BAD_ACCURACY; - } - - // 2. flag==KMEANS_RANDOM_CENTERS - kmeans( data, 3, bestLabels, TermCriteria( TermCriteria::COUNT, iters, 0.0), 0, KMEANS_RANDOM_CENTERS, noArray() ); - if( !calcErr( bestLabels, labels, sizes, err, false ) ) - { - ts->printf( cvtest::TS::LOG, "Bad output labels if flag==KMEANS_RANDOM_CENTERS.\n" ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - } - else if( err > 0.01f ) - { - ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) if flag==KMEANS_RANDOM_CENTERS.\n", err ); - code = cvtest::TS::FAIL_BAD_ACCURACY; - } - - // 3. flag==KMEANS_USE_INITIAL_LABELS - labels.copyTo( bestLabels ); - RNG rng; - for( int i = 0; i < 0.5f * pointsCount; i++ ) - bestLabels.at( rng.next() % pointsCount, 0 ) = rng.next() % 3; - kmeans( data, 3, bestLabels, TermCriteria( TermCriteria::COUNT, iters, 0.0), 0, KMEANS_USE_INITIAL_LABELS, noArray() ); - if( !calcErr( bestLabels, labels, sizes, err, false ) ) - { - ts->printf( cvtest::TS::LOG, "Bad output labels if flag==KMEANS_USE_INITIAL_LABELS.\n" ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - } - else if( err > 0.01f ) - { - ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) if flag==KMEANS_USE_INITIAL_LABELS.\n", err ); - code = cvtest::TS::FAIL_BAD_ACCURACY; - } - - ts->set_failed_test_info( code ); -} - -//-------------------------------------------------------------------------------------------- -class CV_KNearestTest : public cvtest::BaseTest { -public: - CV_KNearestTest() {} -protected: - virtual void run( int start_from ); -}; - -void CV_KNearestTest::run( int /*start_from*/ ) -{ - int sizesArr[] = { 500, 700, 800 }; - int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2]; - - // train data - Mat trainData( pointsCount, 2, CV_32FC1 ), trainLabels; - vector sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) ); - Mat means; - vector covs; - defaultDistribs( means, covs ); - generateData( trainData, trainLabels, sizes, means, covs, CV_32FC1, CV_32FC1 ); - - // test data - Mat testData( pointsCount, 2, CV_32FC1 ), testLabels, bestLabels; - generateData( testData, testLabels, sizes, means, covs, CV_32FC1, CV_32FC1 ); - - int code = cvtest::TS::OK; - - // KNearest default implementation - Ptr knearest = KNearest::create(); - knearest->train(trainData, ml::ROW_SAMPLE, trainLabels); - knearest->findNearest(testData, 4, bestLabels); - float err; - if( !calcErr( bestLabels, testLabels, sizes, err, true ) ) - { - ts->printf( cvtest::TS::LOG, "Bad output labels.\n" ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - } - else if( err > 0.01f ) - { - ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) on test data.\n", err ); - code = cvtest::TS::FAIL_BAD_ACCURACY; - } - - // KNearest KDTree implementation - Ptr knearestKdt = KNearest::create(); - knearestKdt->setAlgorithmType(KNearest::KDTREE); - knearestKdt->train(trainData, ml::ROW_SAMPLE, trainLabels); - knearestKdt->findNearest(testData, 4, bestLabels); - if( !calcErr( bestLabels, testLabels, sizes, err, true ) ) - { - ts->printf( cvtest::TS::LOG, "Bad output labels.\n" ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - } - else if( err > 0.01f ) - { - ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) on test data.\n", err ); - code = cvtest::TS::FAIL_BAD_ACCURACY; - } - - ts->set_failed_test_info( code ); -} - -class EM_Params -{ -public: - EM_Params(int _nclusters=10, int _covMatType=EM::COV_MAT_DIAGONAL, int _startStep=EM::START_AUTO_STEP, - const cv::TermCriteria& _termCrit=cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS, 100, FLT_EPSILON), - const cv::Mat* _probs=0, const cv::Mat* _weights=0, - const cv::Mat* _means=0, const std::vector* _covs=0) - : nclusters(_nclusters), covMatType(_covMatType), startStep(_startStep), - probs(_probs), weights(_weights), means(_means), covs(_covs), termCrit(_termCrit) - {} - - int nclusters; - int covMatType; - int startStep; - - // all 4 following matrices should have type CV_32FC1 - const cv::Mat* probs; - const cv::Mat* weights; - const cv::Mat* means; - const std::vector* covs; - - cv::TermCriteria termCrit; -}; - -//-------------------------------------------------------------------------------------------- -class CV_EMTest : public cvtest::BaseTest -{ -public: - CV_EMTest() {} -protected: - virtual void run( int start_from ); - int runCase( int caseIndex, const EM_Params& params, - const cv::Mat& trainData, const cv::Mat& trainLabels, - const cv::Mat& testData, const cv::Mat& testLabels, - const vector& sizes); -}; - -int CV_EMTest::runCase( int caseIndex, const EM_Params& params, - const cv::Mat& trainData, const cv::Mat& trainLabels, - const cv::Mat& testData, const cv::Mat& testLabels, - const vector& sizes ) -{ - int code = cvtest::TS::OK; - - cv::Mat labels; - float err; - - Ptr em = EM::create(); - em->setClustersNumber(params.nclusters); - em->setCovarianceMatrixType(params.covMatType); - em->setTermCriteria(params.termCrit); - if( params.startStep == EM::START_AUTO_STEP ) - em->trainEM( trainData, noArray(), labels, noArray() ); - else if( params.startStep == EM::START_E_STEP ) - em->trainE( trainData, *params.means, *params.covs, - *params.weights, noArray(), labels, noArray() ); - else if( params.startStep == EM::START_M_STEP ) - em->trainM( trainData, *params.probs, - noArray(), labels, noArray() ); - - // check train error - if( !calcErr( labels, trainLabels, sizes, err , false, false ) ) - { - ts->printf( cvtest::TS::LOG, "Case index %i : Bad output labels.\n", caseIndex ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - } - else if( err > 0.008f ) - { - ts->printf( cvtest::TS::LOG, "Case index %i : Bad accuracy (%f) on train data.\n", caseIndex, err ); - code = cvtest::TS::FAIL_BAD_ACCURACY; - } - - // check test error - labels.create( testData.rows, 1, CV_32SC1 ); - for( int i = 0; i < testData.rows; i++ ) - { - Mat sample = testData.row(i); - Mat probs; - labels.at(i) = static_cast(em->predict2( sample, probs )[1]); - } - if( !calcErr( labels, testLabels, sizes, err, false, false ) ) - { - ts->printf( cvtest::TS::LOG, "Case index %i : Bad output labels.\n", caseIndex ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - } - else if( err > 0.008f ) - { - ts->printf( cvtest::TS::LOG, "Case index %i : Bad accuracy (%f) on test data.\n", caseIndex, err ); - code = cvtest::TS::FAIL_BAD_ACCURACY; - } - - return code; -} - -void CV_EMTest::run( int /*start_from*/ ) -{ - int sizesArr[] = { 500, 700, 800 }; - int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2]; - - // Points distribution - Mat means; - vector covs; - defaultDistribs( means, covs, CV_64FC1 ); - - // train data - Mat trainData( pointsCount, 2, CV_64FC1 ), trainLabels; - vector sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) ); - generateData( trainData, trainLabels, sizes, means, covs, CV_64FC1, CV_32SC1 ); - - // test data - Mat testData( pointsCount, 2, CV_64FC1 ), testLabels; - generateData( testData, testLabels, sizes, means, covs, CV_64FC1, CV_32SC1 ); - - EM_Params params; - params.nclusters = 3; - Mat probs(trainData.rows, params.nclusters, CV_64FC1, cv::Scalar(1)); - params.probs = &probs; - Mat weights(1, params.nclusters, CV_64FC1, cv::Scalar(1)); - params.weights = &weights; - params.means = &means; - params.covs = &covs; - - int code = cvtest::TS::OK; - int caseIndex = 0; - { - params.startStep = EM::START_AUTO_STEP; - params.covMatType = EM::COV_MAT_GENERIC; - int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes); - code = currCode == cvtest::TS::OK ? code : currCode; - } - { - params.startStep = EM::START_AUTO_STEP; - params.covMatType = EM::COV_MAT_DIAGONAL; - int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes); - code = currCode == cvtest::TS::OK ? code : currCode; - } - { - params.startStep = EM::START_AUTO_STEP; - params.covMatType = EM::COV_MAT_SPHERICAL; - int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes); - code = currCode == cvtest::TS::OK ? code : currCode; - } - { - params.startStep = EM::START_M_STEP; - params.covMatType = EM::COV_MAT_GENERIC; - int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes); - code = currCode == cvtest::TS::OK ? code : currCode; - } - { - params.startStep = EM::START_M_STEP; - params.covMatType = EM::COV_MAT_DIAGONAL; - int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes); - code = currCode == cvtest::TS::OK ? code : currCode; - } - { - params.startStep = EM::START_M_STEP; - params.covMatType = EM::COV_MAT_SPHERICAL; - int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes); - code = currCode == cvtest::TS::OK ? code : currCode; - } - { - params.startStep = EM::START_E_STEP; - params.covMatType = EM::COV_MAT_GENERIC; - int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes); - code = currCode == cvtest::TS::OK ? code : currCode; - } - { - params.startStep = EM::START_E_STEP; - params.covMatType = EM::COV_MAT_DIAGONAL; - int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes); - code = currCode == cvtest::TS::OK ? code : currCode; - } - { - params.startStep = EM::START_E_STEP; - params.covMatType = EM::COV_MAT_SPHERICAL; - int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes); - code = currCode == cvtest::TS::OK ? code : currCode; - } - - ts->set_failed_test_info( code ); -} - -class CV_EMTest_SaveLoad : public cvtest::BaseTest { -public: - CV_EMTest_SaveLoad() {} -protected: - virtual void run( int /*start_from*/ ) - { - int code = cvtest::TS::OK; - const int nclusters = 2; - - Mat samples = Mat(3,1,CV_64FC1); - samples.at(0,0) = 1; - samples.at(1,0) = 2; - samples.at(2,0) = 3; - - Mat labels; - - Ptr em = EM::create(); - em->setClustersNumber(nclusters); - em->trainEM(samples, noArray(), labels, noArray()); - - Mat firstResult(samples.rows, 1, CV_32SC1); - for( int i = 0; i < samples.rows; i++) - firstResult.at(i) = static_cast(em->predict2(samples.row(i), noArray())[1]); - - // Write out - string filename = cv::tempfile(".xml"); - { - FileStorage fs = FileStorage(filename, FileStorage::WRITE); - try - { - fs << "em" << "{"; - em->write(fs); - fs << "}"; - } - catch(...) - { - ts->printf( cvtest::TS::LOG, "Crash in write method.\n" ); - ts->set_failed_test_info( cvtest::TS::FAIL_EXCEPTION ); - } - } - - em.release(); - - // Read in - try - { - em = Algorithm::load(filename); - } - catch(...) - { - ts->printf( cvtest::TS::LOG, "Crash in read method.\n" ); - ts->set_failed_test_info( cvtest::TS::FAIL_EXCEPTION ); - } - - remove( filename.c_str() ); - - int errCaseCount = 0; - for( int i = 0; i < samples.rows; i++) - errCaseCount = std::abs(em->predict2(samples.row(i), noArray())[1] - firstResult.at(i)) < FLT_EPSILON ? 0 : 1; - - if( errCaseCount > 0 ) - { - ts->printf( cvtest::TS::LOG, "Different prediction results before writing and after reading (errCaseCount=%d).\n", errCaseCount ); - code = cvtest::TS::FAIL_BAD_ACCURACY; - } - - ts->set_failed_test_info( code ); - } -}; - -class CV_EMTest_Classification : public cvtest::BaseTest -{ -public: - CV_EMTest_Classification() {} -protected: - virtual void run(int) - { - // This test classifies spam by the following way: - // 1. estimates distributions of "spam" / "not spam" - // 2. predict classID using Bayes classifier for estimated distributions. - - string dataFilename = string(ts->get_data_path()) + "spambase.data"; - Ptr data = TrainData::loadFromCSV(dataFilename, 0); - - if( data.empty() ) - { - ts->printf(cvtest::TS::LOG, "File with spambase dataset can't be read.\n"); - ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA); - return; - } - - Mat samples = data->getSamples(); - CV_Assert(samples.cols == 57); - Mat responses = data->getResponses(); - - vector trainSamplesMask(samples.rows, 0); - int trainSamplesCount = (int)(0.5f * samples.rows); - for(int i = 0; i < trainSamplesCount; i++) - trainSamplesMask[i] = 1; - RNG rng(0); - for(size_t i = 0; i < trainSamplesMask.size(); i++) - { - int i1 = rng(static_cast(trainSamplesMask.size())); - int i2 = rng(static_cast(trainSamplesMask.size())); - std::swap(trainSamplesMask[i1], trainSamplesMask[i2]); - } - - Mat samples0, samples1; - for(int i = 0; i < samples.rows; i++) - { - if(trainSamplesMask[i]) - { - Mat sample = samples.row(i); - int resp = (int)responses.at(i); - if(resp == 0) - samples0.push_back(sample); - else - samples1.push_back(sample); - } - } - Ptr model0 = EM::create(); - model0->setClustersNumber(3); - model0->trainEM(samples0, noArray(), noArray(), noArray()); - - Ptr model1 = EM::create(); - model1->setClustersNumber(3); - model1->trainEM(samples1, noArray(), noArray(), noArray()); - - Mat trainConfusionMat(2, 2, CV_32SC1, Scalar(0)), - testConfusionMat(2, 2, CV_32SC1, Scalar(0)); - const double lambda = 1.; - for(int i = 0; i < samples.rows; i++) - { - Mat sample = samples.row(i); - double sampleLogLikelihoods0 = model0->predict2(sample, noArray())[0]; - double sampleLogLikelihoods1 = model1->predict2(sample, noArray())[0]; - - int classID = sampleLogLikelihoods0 >= lambda * sampleLogLikelihoods1 ? 0 : 1; - - if(trainSamplesMask[i]) - trainConfusionMat.at((int)responses.at(i), classID)++; - else - testConfusionMat.at((int)responses.at(i), classID)++; - } -// std::cout << trainConfusionMat << std::endl; -// std::cout << testConfusionMat << std::endl; - - double trainError = (double)(trainConfusionMat.at(1,0) + trainConfusionMat.at(0,1)) / trainSamplesCount; - double testError = (double)(testConfusionMat.at(1,0) + testConfusionMat.at(0,1)) / (samples.rows - trainSamplesCount); - const double maxTrainError = 0.23; - const double maxTestError = 0.26; - - int code = cvtest::TS::OK; - if(trainError > maxTrainError) - { - ts->printf(cvtest::TS::LOG, "Too large train classification error (calc = %f, valid=%f).\n", trainError, maxTrainError); - code = cvtest::TS::FAIL_INVALID_TEST_DATA; - } - if(testError > maxTestError) - { - ts->printf(cvtest::TS::LOG, "Too large test classification error (calc = %f, valid=%f).\n", testError, maxTestError); - code = cvtest::TS::FAIL_INVALID_TEST_DATA; - } - - ts->set_failed_test_info(code); - } -}; - -TEST(ML_KMeans, accuracy) { CV_KMeansTest test; test.safe_run(); } -TEST(ML_KNearest, accuracy) { CV_KNearestTest test; test.safe_run(); } -TEST(ML_EM, accuracy) { CV_EMTest test; test.safe_run(); } -TEST(ML_EM, save_load) { CV_EMTest_SaveLoad test; test.safe_run(); } -TEST(ML_EM, classification) { CV_EMTest_Classification test; test.safe_run(); } - -TEST(ML_KNearest, regression_12347) -{ - Mat xTrainData = (Mat_(5,2) << 1, 1.1, 1.1, 1, 2, 2, 2.1, 2, 2.1, 2.1); - Mat yTrainLabels = (Mat_(5,1) << 1, 1, 2, 2, 2); - Ptr knn = KNearest::create(); - knn->train(xTrainData, ml::ROW_SAMPLE, yTrainLabels); - - Mat xTestData = (Mat_(2,2) << 1.1, 1.1, 2, 2.2); - Mat zBestLabels, neighbours, dist; - // check output shapes: - int K = 16, Kexp = std::min(K, xTrainData.rows); - knn->findNearest(xTestData, K, zBestLabels, neighbours, dist); - EXPECT_EQ(xTestData.rows, zBestLabels.rows); - EXPECT_EQ(neighbours.cols, Kexp); - EXPECT_EQ(dist.cols, Kexp); - // see if the result is still correct: - K = 2; - knn->findNearest(xTestData, K, zBestLabels, neighbours, dist); - EXPECT_EQ(1, zBestLabels.at(0,0)); - EXPECT_EQ(2, zBestLabels.at(1,0)); -} - -}} // namespace diff --git a/modules/ml/test/test_gbttest.cpp b/modules/ml/test/test_gbttest.cpp deleted file mode 100644 index 98e1bc1386..0000000000 --- a/modules/ml/test/test_gbttest.cpp +++ /dev/null @@ -1,286 +0,0 @@ - -#include "test_precomp.hpp" - -#if 0 - -using namespace std; - - -class CV_GBTreesTest : public cvtest::BaseTest -{ -public: - CV_GBTreesTest(); - ~CV_GBTreesTest(); - -protected: - void run(int); - - int TestTrainPredict(int test_num); - int TestSaveLoad(); - - int checkPredictError(int test_num); - int checkLoadSave(); - - string model_file_name1; - string model_file_name2; - - string* datasets; - string data_path; - - CvMLData* data; - CvGBTrees* gtb; - - vector test_resps1; - vector test_resps2; - - int64 initSeed; -}; - - -int _get_len(const CvMat* mat) -{ - return (mat->cols > mat->rows) ? mat->cols : mat->rows; -} - - -CV_GBTreesTest::CV_GBTreesTest() -{ - int64 seeds[] = { CV_BIG_INT(0x00009fff4f9c8d52), - CV_BIG_INT(0x0000a17166072c7c), - CV_BIG_INT(0x0201b32115cd1f9a), - CV_BIG_INT(0x0513cb37abcd1234), - CV_BIG_INT(0x0001a2b3c4d5f678) - }; - - int seedCount = sizeof(seeds)/sizeof(seeds[0]); - cv::RNG& rng = cv::theRNG(); - initSeed = rng.state; - rng.state = seeds[rng(seedCount)]; - - datasets = 0; - data = 0; - gtb = 0; -} - -CV_GBTreesTest::~CV_GBTreesTest() -{ - if (data) - delete data; - delete[] datasets; - cv::theRNG().state = initSeed; -} - - -int CV_GBTreesTest::TestTrainPredict(int test_num) -{ - int code = cvtest::TS::OK; - - int weak_count = 200; - float shrinkage = 0.1f; - float subsample_portion = 0.5f; - int max_depth = 5; - bool use_surrogates = false; - int loss_function_type = 0; - switch (test_num) - { - case (1) : loss_function_type = CvGBTrees::SQUARED_LOSS; break; - case (2) : loss_function_type = CvGBTrees::ABSOLUTE_LOSS; break; - case (3) : loss_function_type = CvGBTrees::HUBER_LOSS; break; - case (0) : loss_function_type = CvGBTrees::DEVIANCE_LOSS; break; - default : - { - ts->printf( cvtest::TS::LOG, "Bad test_num value in CV_GBTreesTest::TestTrainPredict(..) function." ); - return cvtest::TS::FAIL_BAD_ARG_CHECK; - } - } - - int dataset_num = test_num == 0 ? 0 : 1; - if (!data) - { - data = new CvMLData(); - data->set_delimiter(','); - - if (data->read_csv(datasets[dataset_num].c_str())) - { - ts->printf( cvtest::TS::LOG, "File reading error." ); - return cvtest::TS::FAIL_INVALID_TEST_DATA; - } - - if (test_num == 0) - { - data->set_response_idx(57); - data->set_var_types("ord[0-56],cat[57]"); - } - else - { - data->set_response_idx(13); - data->set_var_types("ord[0-2,4-13],cat[3]"); - subsample_portion = 0.7f; - } - - int train_sample_count = cvFloor(_get_len(data->get_responses())*0.5f); - CvTrainTestSplit spl( train_sample_count ); - data->set_train_test_split( &spl ); - } - - data->mix_train_and_test_idx(); - - - if (gtb) delete gtb; - gtb = new CvGBTrees(); - bool tmp_code = true; - tmp_code = gtb->train(data, CvGBTreesParams(loss_function_type, weak_count, - shrinkage, subsample_portion, - max_depth, use_surrogates)); - - if (!tmp_code) - { - ts->printf( cvtest::TS::LOG, "Model training was failed."); - return cvtest::TS::FAIL_INVALID_OUTPUT; - } - - code = checkPredictError(test_num); - - return code; - -} - - -int CV_GBTreesTest::checkPredictError(int test_num) -{ - if (!gtb) - return cvtest::TS::FAIL_GENERIC; - - //float mean[] = {5.430247f, 13.5654f, 12.6569f, 13.1661f}; - //float sigma[] = {0.4162694f, 3.21161f, 3.43297f, 3.00624f}; - float mean[] = {5.80226f, 12.68689f, 13.49095f, 13.19628f}; - float sigma[] = {0.4764534f, 3.166919f, 3.022405f, 2.868722f}; - - float current_error = gtb->calc_error(data, CV_TEST_ERROR); - - if ( abs( current_error - mean[test_num]) > 6*sigma[test_num] ) - { - ts->printf( cvtest::TS::LOG, "Test error is out of range:\n" - "abs(%f/*curEr*/ - %f/*mean*/ > %f/*6*sigma*/", - current_error, mean[test_num], 6*sigma[test_num] ); - return cvtest::TS::FAIL_BAD_ACCURACY; - } - - return cvtest::TS::OK; - -} - - -int CV_GBTreesTest::TestSaveLoad() -{ - if (!gtb) - return cvtest::TS::FAIL_GENERIC; - - model_file_name1 = cv::tempfile(); - model_file_name2 = cv::tempfile(); - - gtb->save(model_file_name1.c_str()); - gtb->calc_error(data, CV_TEST_ERROR, &test_resps1); - gtb->load(model_file_name1.c_str()); - gtb->calc_error(data, CV_TEST_ERROR, &test_resps2); - gtb->save(model_file_name2.c_str()); - - return checkLoadSave(); - -} - - - -int CV_GBTreesTest::checkLoadSave() -{ - int code = cvtest::TS::OK; - - // 1. compare files - ifstream f1( model_file_name1.c_str() ), f2( model_file_name2.c_str() ); - string s1, s2; - int lineIdx = 0; - CV_Assert( f1.is_open() && f2.is_open() ); - for( ; !f1.eof() && !f2.eof(); lineIdx++ ) - { - getline( f1, s1 ); - getline( f2, s2 ); - if( s1.compare(s2) ) - { - ts->printf( cvtest::TS::LOG, "first and second saved files differ in %n-line; first %n line: %s; second %n-line: %s", - lineIdx, lineIdx, s1.c_str(), lineIdx, s2.c_str() ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - } - } - if( !f1.eof() || !f2.eof() ) - { - ts->printf( cvtest::TS::LOG, "First and second saved files differ in %n-line; first %n line: %s; second %n-line: %s", - lineIdx, lineIdx, s1.c_str(), lineIdx, s2.c_str() ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - } - f1.close(); - f2.close(); - // delete temporary files - remove( model_file_name1.c_str() ); - remove( model_file_name2.c_str() ); - - // 2. compare responses - CV_Assert( test_resps1.size() == test_resps2.size() ); - vector::const_iterator it1 = test_resps1.begin(), it2 = test_resps2.begin(); - for( ; it1 != test_resps1.end(); ++it1, ++it2 ) - { - if( fabs(*it1 - *it2) > FLT_EPSILON ) - { - ts->printf( cvtest::TS::LOG, "Responses predicted before saving and after loading are different" ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - } - } - return code; -} - - - -void CV_GBTreesTest::run(int) -{ - - string dataPath = string(ts->get_data_path()); - datasets = new string[2]; - datasets[0] = dataPath + string("spambase.data"); /*string("dataset_classification.csv");*/ - datasets[1] = dataPath + string("housing_.data"); /*string("dataset_regression.csv");*/ - - int code = cvtest::TS::OK; - - for (int i = 0; i < 4; i++) - { - - int temp_code = TestTrainPredict(i); - if (temp_code != cvtest::TS::OK) - { - code = temp_code; - break; - } - - else if (i==0) - { - temp_code = TestSaveLoad(); - if (temp_code != cvtest::TS::OK) - code = temp_code; - delete data; - data = 0; - } - - delete gtb; - gtb = 0; - } - delete data; - data = 0; - - ts->set_failed_test_info( code ); -} - -///////////////////////////////////////////////////////////////////////////// -//////////////////// test registration ///////////////////////////////////// -///////////////////////////////////////////////////////////////////////////// - -TEST(ML_GBTrees, regression) { CV_GBTreesTest test; test.safe_run(); } - -#endif diff --git a/modules/ml/test/test_kmeans.cpp b/modules/ml/test/test_kmeans.cpp new file mode 100644 index 0000000000..153ed642d3 --- /dev/null +++ b/modules/ml/test/test_kmeans.cpp @@ -0,0 +1,53 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +TEST(ML_KMeans, accuracy) +{ + const int iters = 100; + int sizesArr[] = { 5000, 7000, 8000 }; + int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2]; + + Mat data( pointsCount, 2, CV_32FC1 ), labels; + vector sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) ); + Mat means; + vector covs; + defaultDistribs( means, covs ); + generateData( data, labels, sizes, means, covs, CV_32FC1, CV_32SC1 ); + TermCriteria termCriteria( TermCriteria::COUNT, iters, 0.0); + + { + SCOPED_TRACE("KMEANS_PP_CENTERS"); + float err = 1000; + Mat bestLabels; + kmeans( data, 3, bestLabels, termCriteria, 0, KMEANS_PP_CENTERS, noArray() ); + EXPECT_TRUE(calcErr( bestLabels, labels, sizes, err , false )); + EXPECT_LE(err, 0.01f); + } + { + SCOPED_TRACE("KMEANS_RANDOM_CENTERS"); + float err = 1000; + Mat bestLabels; + kmeans( data, 3, bestLabels, termCriteria, 0, KMEANS_RANDOM_CENTERS, noArray() ); + EXPECT_TRUE(calcErr( bestLabels, labels, sizes, err, false )); + EXPECT_LE(err, 0.01f); + } + { + SCOPED_TRACE("KMEANS_USE_INITIAL_LABELS"); + float err = 1000; + Mat bestLabels; + labels.copyTo( bestLabels ); + RNG &rng = cv::theRNG(); + for( int i = 0; i < 0.5f * pointsCount; i++ ) + bestLabels.at( rng.next() % pointsCount, 0 ) = rng.next() % 3; + kmeans( data, 3, bestLabels, termCriteria, 0, KMEANS_USE_INITIAL_LABELS, noArray() ); + EXPECT_TRUE(calcErr( bestLabels, labels, sizes, err, false )); + EXPECT_LE(err, 0.01f); + } +} + +}} // namespace diff --git a/modules/ml/test/test_knearest.cpp b/modules/ml/test/test_knearest.cpp new file mode 100644 index 0000000000..49e6b0d12a --- /dev/null +++ b/modules/ml/test/test_knearest.cpp @@ -0,0 +1,77 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +using cv::ml::TrainData; +using cv::ml::EM; +using cv::ml::KNearest; + +TEST(ML_KNearest, accuracy) +{ + int sizesArr[] = { 500, 700, 800 }; + int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2]; + + Mat trainData( pointsCount, 2, CV_32FC1 ), trainLabels; + vector sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) ); + Mat means; + vector covs; + defaultDistribs( means, covs ); + generateData( trainData, trainLabels, sizes, means, covs, CV_32FC1, CV_32FC1 ); + + Mat testData( pointsCount, 2, CV_32FC1 ); + Mat testLabels; + generateData( testData, testLabels, sizes, means, covs, CV_32FC1, CV_32FC1 ); + + { + SCOPED_TRACE("Default"); + Mat bestLabels; + float err = 1000; + Ptr knn = KNearest::create(); + knn->train(trainData, ml::ROW_SAMPLE, trainLabels); + knn->findNearest(testData, 4, bestLabels); + EXPECT_TRUE(calcErr( bestLabels, testLabels, sizes, err, true )); + EXPECT_LE(err, 0.01f); + } + { + // TODO: broken +#if 0 + SCOPED_TRACE("KDTree"); + Mat bestLabels; + float err = 1000; + Ptr knn = KNearest::create(); + knn->setAlgorithmType(KNearest::KDTREE); + knn->train(trainData, ml::ROW_SAMPLE, trainLabels); + knn->findNearest(testData, 4, bestLabels); + EXPECT_TRUE(calcErr( bestLabels, testLabels, sizes, err, true )); + EXPECT_LE(err, 0.01f); +#endif + } +} + +TEST(ML_KNearest, regression_12347) +{ + Mat xTrainData = (Mat_(5,2) << 1, 1.1, 1.1, 1, 2, 2, 2.1, 2, 2.1, 2.1); + Mat yTrainLabels = (Mat_(5,1) << 1, 1, 2, 2, 2); + Ptr knn = KNearest::create(); + knn->train(xTrainData, ml::ROW_SAMPLE, yTrainLabels); + + Mat xTestData = (Mat_(2,2) << 1.1, 1.1, 2, 2.2); + Mat zBestLabels, neighbours, dist; + // check output shapes: + int K = 16, Kexp = std::min(K, xTrainData.rows); + knn->findNearest(xTestData, K, zBestLabels, neighbours, dist); + EXPECT_EQ(xTestData.rows, zBestLabels.rows); + EXPECT_EQ(neighbours.cols, Kexp); + EXPECT_EQ(dist.cols, Kexp); + // see if the result is still correct: + K = 2; + knn->findNearest(xTestData, K, zBestLabels, neighbours, dist); + EXPECT_EQ(1, zBestLabels.at(0,0)); + EXPECT_EQ(2, zBestLabels.at(1,0)); +} + +}} // namespace diff --git a/modules/ml/test/test_lr.cpp b/modules/ml/test/test_lr.cpp index d57825152c..d68266cc6f 100644 --- a/modules/ml/test/test_lr.cpp +++ b/modules/ml/test/test_lr.cpp @@ -1,9 +1,6 @@ -/////////////////////////////////////////////////////////////////////////////////////// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. - -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. // This is a implementation of the Logistic Regression algorithm in C++ in OpenCV. @@ -11,92 +8,16 @@ // Rahul Kavi rahulkavi[at]live[at]com // -// contains a subset of data from the popular Iris Dataset (taken from "http://archive.ics.uci.edu/ml/datasets/Iris") - -// # You are free to use, change, or redistribute the code in any way you wish for -// # non-commercial purposes, but please maintain the name of the original author. -// # This code comes with no warranty of any kind. - -// # -// # You are free to use, change, or redistribute the code in any way you wish for -// # non-commercial purposes, but please maintain the name of the original author. -// # This code comes with no warranty of any kind. - -// # Logistic Regression ALGORITHM - - -// License Agreement -// For Open Source Computer Vision Library - -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. - -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: - -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. - -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. - -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. - -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. - #include "test_precomp.hpp" namespace opencv_test { namespace { -bool calculateError( const Mat& _p_labels, const Mat& _o_labels, float& error) -{ - CV_TRACE_FUNCTION(); - error = 0.0f; - float accuracy = 0.0f; - Mat _p_labels_temp; - Mat _o_labels_temp; - _p_labels.convertTo(_p_labels_temp, CV_32S); - _o_labels.convertTo(_o_labels_temp, CV_32S); - - CV_Assert(_p_labels_temp.total() == _o_labels_temp.total()); - CV_Assert(_p_labels_temp.rows == _o_labels_temp.rows); - - accuracy = (float)countNonZero(_p_labels_temp == _o_labels_temp)/_p_labels_temp.rows; - error = 1 - accuracy; - return true; -} - -//-------------------------------------------------------------------------------------------- - -class CV_LRTest : public cvtest::BaseTest -{ -public: - CV_LRTest() {} -protected: - virtual void run( int start_from ); -}; - -void CV_LRTest::run( int /*start_from*/ ) +TEST(ML_LR, accuracy) { - CV_TRACE_FUNCTION(); - // initialize variables from the popular Iris Dataset - string dataFileName = ts->get_data_path() + "iris.data"; + std::string dataFileName = findDataFile("iris.data"); Ptr tdata = TrainData::loadFromCSV(dataFileName, 0); - ASSERT_FALSE(tdata.empty()) << "Could not find test data file : " << dataFileName; + ASSERT_FALSE(tdata.empty()); - // run LR classifier train classifier Ptr p = LogisticRegression::create(); p->setLearningRate(1.0); p->setIterations(10001); @@ -105,121 +26,54 @@ void CV_LRTest::run( int /*start_from*/ ) p->setMiniBatchSize(10); p->train(tdata); - // predict using the same data Mat responses; p->predict(tdata->getSamples(), responses); - // calculate error - int test_code = cvtest::TS::OK; - float error = 0.0f; - if(!calculateError(responses, tdata->getResponses(), error)) - { - ts->printf(cvtest::TS::LOG, "Bad prediction labels\n" ); - test_code = cvtest::TS::FAIL_INVALID_OUTPUT; - } - else if(error > 0.05f) - { - ts->printf(cvtest::TS::LOG, "Bad accuracy of (%f)\n", error); - test_code = cvtest::TS::FAIL_BAD_ACCURACY; - } - - { - FileStorage s("debug.xml", FileStorage::WRITE); - s << "original" << tdata->getResponses(); - s << "predicted1" << responses; - s << "learnt" << p->get_learnt_thetas(); - s << "error" << error; - s.release(); - } - ts->set_failed_test_info(test_code); + float error = 1000; + EXPECT_TRUE(calculateError(responses, tdata->getResponses(), error)); + EXPECT_LE(error, 0.05f); } -//-------------------------------------------------------------------------------------------- -class CV_LRTest_SaveLoad : public cvtest::BaseTest -{ -public: - CV_LRTest_SaveLoad(){} -protected: - virtual void run(int start_from); -}; - +//================================================================================================== -void CV_LRTest_SaveLoad::run( int /*start_from*/ ) +TEST(ML_LR, save_load) { - CV_TRACE_FUNCTION(); - int code = cvtest::TS::OK; - - // initialize variables from the popular Iris Dataset - string dataFileName = ts->get_data_path() + "iris.data"; + string dataFileName = findDataFile("iris.data"); Ptr tdata = TrainData::loadFromCSV(dataFileName, 0); - ASSERT_FALSE(tdata.empty()) << "Could not find test data file : " << dataFileName; - + ASSERT_FALSE(tdata.empty()); Mat responses1, responses2; Mat learnt_mat1, learnt_mat2; - - // train and save the classifier String filename = tempfile(".xml"); - try { - // run LR classifier train classifier Ptr lr1 = LogisticRegression::create(); lr1->setLearningRate(1.0); lr1->setIterations(10001); lr1->setRegularization(LogisticRegression::REG_L2); lr1->setTrainMethod(LogisticRegression::BATCH); lr1->setMiniBatchSize(10); - lr1->train(tdata); - lr1->predict(tdata->getSamples(), responses1); + ASSERT_NO_THROW(lr1->train(tdata)); + ASSERT_NO_THROW(lr1->predict(tdata->getSamples(), responses1)); + ASSERT_NO_THROW(lr1->save(filename)); learnt_mat1 = lr1->get_learnt_thetas(); - lr1->save(filename); } - catch(...) - { - ts->printf(cvtest::TS::LOG, "Crash in write method.\n" ); - ts->set_failed_test_info(cvtest::TS::FAIL_EXCEPTION); - } - - // and load to another - try { - Ptr lr2 = Algorithm::load(filename); - lr2->predict(tdata->getSamples(), responses2); + Ptr lr2; + ASSERT_NO_THROW(lr2 = Algorithm::load(filename)); + ASSERT_NO_THROW(lr2->predict(tdata->getSamples(), responses2)); learnt_mat2 = lr2->get_learnt_thetas(); } - catch(...) - { - ts->printf(cvtest::TS::LOG, "Crash in write method.\n" ); - ts->set_failed_test_info(cvtest::TS::FAIL_EXCEPTION); - } - - CV_Assert(responses1.rows == responses2.rows); + // compare difference in prediction outputs and stored inputs + EXPECT_MAT_NEAR(responses1, responses2, 0.f); - // compare difference in learnt matrices before and after loading from disk Mat comp_learnt_mats; comp_learnt_mats = (learnt_mat1 == learnt_mat2); comp_learnt_mats = comp_learnt_mats.reshape(1, comp_learnt_mats.rows*comp_learnt_mats.cols); comp_learnt_mats.convertTo(comp_learnt_mats, CV_32S); comp_learnt_mats = comp_learnt_mats/255; - - // compare difference in prediction outputs and stored inputs // check if there is any difference between computed learnt mat and retrieved mat - - float errorCount = 0.0; - errorCount += 1 - (float)countNonZero(responses1 == responses2)/responses1.rows; - errorCount += 1 - (float)sum(comp_learnt_mats)[0]/comp_learnt_mats.rows; - - if(errorCount>0) - { - ts->printf( cvtest::TS::LOG, "Different prediction results before writing and after reading (errorCount=%d).\n", errorCount ); - code = cvtest::TS::FAIL_BAD_ACCURACY; - } + EXPECT_EQ(comp_learnt_mats.rows, sum(comp_learnt_mats)[0]); remove( filename.c_str() ); - - ts->set_failed_test_info( code ); } -TEST(ML_LR, accuracy) { CV_LRTest test; test.safe_run(); } -TEST(ML_LR, save_load) { CV_LRTest_SaveLoad test; test.safe_run(); } - }} // namespace diff --git a/modules/ml/test/test_mltests.cpp b/modules/ml/test/test_mltests.cpp index db82a44e28..c7353057d3 100644 --- a/modules/ml/test/test_mltests.cpp +++ b/modules/ml/test/test_mltests.cpp @@ -1,224 +1,373 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" -namespace opencv_test { +namespace opencv_test { namespace { -CV_AMLTest::CV_AMLTest( const char* _modelName ) : CV_MLBaseTest( _modelName ) +struct DatasetDesc { - validationFN = "avalidation.xml"; -} + string name; + int resp_idx; + int train_count; + int cat_num; + string type_desc; +public: + Ptr load() + { + string filename = findDataFile(name + ".data"); + Ptr data = TrainData::loadFromCSV(filename, 0, resp_idx, resp_idx + 1, type_desc); + data->setTrainTestSplit(train_count); + data->shuffleTrainTest(); + return data; + } +}; -int CV_AMLTest::run_test_case( int testCaseIdx ) -{ - CV_TRACE_FUNCTION(); - int code = cvtest::TS::OK; - code = prepare_test_case( testCaseIdx ); +// see testdata/ml/protocol.txt (?) +DatasetDesc datasets[] = { + { "mushroom", 0, 4000, 16, "cat" }, + { "adult", 14, 22561, 16, "ord[0,2,4,10-12],cat[1,3,5-9,13,14]" }, + { "vehicle", 18, 761, 4, "ord[0-17],cat[18]" }, + { "abalone", 8, 3133, 16, "ord[1-8],cat[0]" }, + { "ringnorm", 20, 300, 2, "ord[0-19],cat[20]" }, + { "spambase", 57, 3221, 3, "ord[0-56],cat[57]" }, + { "waveform", 21, 300, 3, "ord[0-20],cat[21]" }, + { "elevators", 18, 5000, 0, "ord" }, + { "letter", 16, 10000, 26, "ord[0-15],cat[16]" }, + { "twonorm", 20, 300, 3, "ord[0-19],cat[20]" }, + { "poletelecomm", 48, 2500, 0, "ord" }, +}; - if (code == cvtest::TS::OK) +static DatasetDesc & getDataset(const string & name) +{ + const int sz = sizeof(datasets)/sizeof(datasets[0]); + for (int i = 0; i < sz; ++i) { - //#define GET_STAT -#ifdef GET_STAT - const char* data_name = ((CvFileNode*)cvGetSeqElem( dataSetNames, testCaseIdx ))->data.str.ptr; - printf("%s, %s ", name, data_name); - const int icount = 100; - float res[icount]; - for (int k = 0; k < icount; k++) - { -#endif - data->shuffleTrainTest(); - code = train( testCaseIdx ); -#ifdef GET_STAT - float case_result = get_error(); - - res[k] = case_result; - } - float mean = 0, sigma = 0; - for (int k = 0; k < icount; k++) - { - mean += res[k]; - } - mean = mean /icount; - for (int k = 0; k < icount; k++) - { - sigma += (res[k] - mean)*(res[k] - mean); - } - sigma = sqrt(sigma/icount); - printf("%f, %f\n", mean, sigma); -#endif + DatasetDesc & desc = datasets[i]; + if (desc.name == name) + return desc; } - return code; + CV_Error(Error::StsInternal, ""); } -int CV_AMLTest::validate_test_results( int testCaseIdx ) +//================================================================================================== + +// interfaces and templates + +template string modelName() { return "Unknown"; }; +template Ptr tuneModel(const DatasetDesc &, Ptr m) { return m; } + +struct IModelFactory { - CV_TRACE_FUNCTION(); - int iters; - float mean, sigma; - // read validation params - FileNode resultNode = - validationFS.getFirstTopLevelNode()["validation"][modelName][dataSetNames[testCaseIdx]]["result"]; - resultNode["iter_count"] >> iters; - if ( iters > 0) - { - resultNode["mean"] >> mean; - resultNode["sigma"] >> sigma; - model->save(format("/Users/vp/tmp/dtree/testcase_%02d.cur.yml", testCaseIdx)); - float curErr = get_test_error( testCaseIdx ); - const int coeff = 4; - ts->printf( cvtest::TS::LOG, "Test case = %d; test error = %f; mean error = %f (diff=%f), %d*sigma = %f\n", - testCaseIdx, curErr, mean, abs( curErr - mean), coeff, coeff*sigma ); - if ( abs( curErr - mean) > coeff*sigma ) - { - ts->printf( cvtest::TS::LOG, "abs(%f - %f) > %f - OUT OF RANGE!\n", curErr, mean, coeff*sigma, coeff ); - return cvtest::TS::FAIL_BAD_ACCURACY; - } - else - ts->printf( cvtest::TS::LOG, ".\n" ); + virtual Ptr createNew(const DatasetDesc &dataset) const = 0; + virtual Ptr loadFromFile(const string &filename) const = 0; + virtual string name() const = 0; + virtual ~IModelFactory() {} +}; +template +struct ModelFactory : public IModelFactory +{ + Ptr createNew(const DatasetDesc &dataset) const CV_OVERRIDE + { + return tuneModel(dataset, T::create()); } - else + Ptr loadFromFile(const string & filename) const CV_OVERRIDE { - ts->printf( cvtest::TS::LOG, "validation info is not suitable" ); - return cvtest::TS::FAIL_INVALID_TEST_DATA; + return T::load(filename); } - return cvtest::TS::OK; -} + string name() const CV_OVERRIDE { return modelName(); } +}; + +// implementation -namespace { +template <> string modelName() { return "NormalBayesClassifier"; } +template <> string modelName() { return "DTrees"; } +template <> string modelName() { return "KNearest"; } +template <> string modelName() { return "RTrees"; } +template <> string modelName() { return "SVMSGD"; } -TEST(ML_DTree, regression) { CV_AMLTest test( CV_DTREE ); test.safe_run(); } -TEST(ML_Boost, regression) { CV_AMLTest test( CV_BOOST ); test.safe_run(); } -TEST(ML_RTrees, regression) { CV_AMLTest test( CV_RTREES ); test.safe_run(); } -TEST(DISABLED_ML_ERTrees, regression) { CV_AMLTest test( CV_ERTREES ); test.safe_run(); } +template<> Ptr tuneModel(const DatasetDesc &dataset, Ptr m) +{ + m->setMaxDepth(10); + m->setMinSampleCount(2); + m->setRegressionAccuracy(0); + m->setUseSurrogates(false); + m->setCVFolds(0); + m->setUse1SERule(false); + m->setTruncatePrunedTree(false); + m->setPriors(Mat()); + m->setMaxCategories(dataset.cat_num); + return m; +} -TEST(ML_NBAYES, regression_5911) +template<> Ptr tuneModel(const DatasetDesc &dataset, Ptr m) { - int N=12; - Ptr nb = cv::ml::NormalBayesClassifier::create(); + m->setMaxDepth(20); + m->setMinSampleCount(2); + m->setRegressionAccuracy(0); + m->setUseSurrogates(false); + m->setPriors(Mat()); + m->setCalculateVarImportance(true); + m->setActiveVarCount(0); + m->setTermCriteria(TermCriteria(TermCriteria::COUNT, 100, 0.0)); + m->setMaxCategories(dataset.cat_num); + return m; +} - // data: - Mat_ X(N,4); - X << 1,2,3,4, 1,2,3,4, 1,2,3,4, 1,2,3,4, - 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5, - 4,3,2,1, 4,3,2,1, 4,3,2,1, 4,3,2,1; +template<> Ptr tuneModel(const DatasetDesc &, Ptr m) +{ + m->setSvmsgdType(SVMSGD::ASGD); + m->setMarginType(SVMSGD::SOFT_MARGIN); + m->setMarginRegularization(0.00001f); + m->setInitialStepSize(0.1f); + m->setStepDecreasingPower(0.75); + m->setTermCriteria(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 10000, 0.00001)); + return m; +} - // labels: - Mat_ Y(N,1); - Y << 0,0,0,0, 1,1,1,1, 2,2,2,2; - nb->train(X, ml::ROW_SAMPLE, Y); +template <> +struct ModelFactory : public IModelFactory +{ + ModelFactory(int boostType_) : boostType(boostType_) {} + Ptr createNew(const DatasetDesc &) const CV_OVERRIDE + { + Ptr m = Boost::create(); + m->setBoostType(boostType); + m->setWeakCount(20); + m->setWeightTrimRate(0.95); + m->setMaxDepth(4); + m->setUseSurrogates(false); + m->setPriors(Mat()); + return m; + } + Ptr loadFromFile(const string &filename) const { return Boost::load(filename); } + string name() const CV_OVERRIDE { return "Boost"; } + int boostType; +}; - // single prediction: - Mat R1,P1; - for (int i=0; i +struct ModelFactory : public IModelFactory +{ + ModelFactory(int svmType_, int kernelType_, double gamma_, double c_, double nu_) + : svmType(svmType_), kernelType(kernelType_), gamma(gamma_), c(c_), nu(nu_) {} + Ptr createNew(const DatasetDesc &) const CV_OVERRIDE { - Mat r,p; - nb->predictProb(X.row(i), r, p); - R1.push_back(r); - P1.push_back(p); + Ptr m = SVM::create(); + m->setType(svmType); + m->setKernel(kernelType); + m->setDegree(0); + m->setGamma(gamma); + m->setCoef0(0); + m->setC(c); + m->setNu(nu); + m->setP(0); + return m; } + Ptr loadFromFile(const string &filename) const { return SVM::load(filename); } + string name() const CV_OVERRIDE { return "SVM"; } + int svmType; + int kernelType; + double gamma; + double c; + double nu; +}; + +//================================================================================================== - // bulk prediction (continuous memory): - Mat R2,P2; - nb->predictProb(X, R2, P2); +struct ML_Params_t +{ + Ptr factory; + string dataset; + float mean; + float sigma; +}; + +void PrintTo(const ML_Params_t & param, std::ostream *os) +{ + *os << param.factory->name() << "_" << param.dataset; +} - EXPECT_EQ(sum(R1 == R2)[0], 255 * R2.total()); - EXPECT_EQ(sum(P1 == P2)[0], 255 * P2.total()); +ML_Params_t ML_Params_List[] = { + { makePtr< ModelFactory >(), "mushroom", 0.027401f, 0.036236f }, + { makePtr< ModelFactory >(), "adult", 14.279000f, 0.354323f }, + { makePtr< ModelFactory >(), "vehicle", 29.761162f, 4.823927f }, + { makePtr< ModelFactory >(), "abalone", 7.297540f, 0.510058f }, + { makePtr< ModelFactory >(Boost::REAL), "adult", 13.894001f, 0.337763f }, + { makePtr< ModelFactory >(Boost::DISCRETE), "mushroom", 0.007274f, 0.029400f }, + { makePtr< ModelFactory >(Boost::LOGIT), "ringnorm", 9.993943f, 0.860256f }, + { makePtr< ModelFactory >(Boost::GENTLE), "spambase", 5.404347f, 0.581716f }, + { makePtr< ModelFactory >(), "waveform", 17.100641f, 0.630052f }, + { makePtr< ModelFactory >(), "mushroom", 0.006547f, 0.028248f }, + { makePtr< ModelFactory >(), "adult", 13.5129f, 0.266065f }, + { makePtr< ModelFactory >(), "abalone", 4.745199f, 0.282112f }, + { makePtr< ModelFactory >(), "vehicle", 24.964712f, 4.469287f }, + { makePtr< ModelFactory >(), "letter", 5.334999f, 0.261142f }, + { makePtr< ModelFactory >(), "ringnorm", 6.248733f, 0.904713f }, + { makePtr< ModelFactory >(), "twonorm", 4.506479f, 0.449739f }, + { makePtr< ModelFactory >(), "spambase", 5.243477f, 0.54232f }, +}; - // bulk prediction, with non-continuous memory storage - Mat R3_(N, 1+1, CV_32S), - P3_(N, 3+1, CV_32F); - nb->predictProb(X, R3_.col(0), P3_.colRange(0,3)); - Mat R3 = R3_.col(0).clone(), - P3 = P3_.colRange(0,3).clone(); +typedef testing::TestWithParam ML_Params; - EXPECT_EQ(sum(R1 == R3)[0], 255 * R3.total()); - EXPECT_EQ(sum(P1 == P3)[0], 255 * P3.total()); +TEST_P(ML_Params, accuracy) +{ + const ML_Params_t & param = GetParam(); + DatasetDesc &dataset = getDataset(param.dataset); + Ptr data = dataset.load(); + ASSERT_TRUE(data); + ASSERT_TRUE(data->getNSamples() > 0); + + Ptr m = param.factory->createNew(dataset); + ASSERT_TRUE(m); + ASSERT_TRUE(m->train(data, 0)); + + float err = m->calcError(data, true, noArray()); + EXPECT_NEAR(err, param.mean, 4 * param.sigma); } -TEST(ML_RTrees, getVotes) +INSTANTIATE_TEST_CASE_P(/**/, ML_Params, testing::ValuesIn(ML_Params_List)); + + +//================================================================================================== + +struct ML_SL_Params_t +{ + Ptr factory; + string dataset; +}; + +void PrintTo(const ML_SL_Params_t & param, std::ostream *os) { - int n = 12; - int count, i; - int label_size = 3; - int predicted_class = 0; - int max_votes = -1; - int val; - // RTrees for classification - Ptr rt = cv::ml::RTrees::create(); + *os << param.factory->name() << "_" << param.dataset; +} - //data - Mat data(n, 4, CV_32F); - randu(data, 0, 10); +ML_SL_Params_t ML_SL_Params_List[] = { + { makePtr< ModelFactory >(), "waveform" }, + { makePtr< ModelFactory >(), "waveform" }, + { makePtr< ModelFactory >(), "abalone" }, + { makePtr< ModelFactory >(SVM::C_SVC, SVM::LINEAR, 1, 0.5, 0), "waveform" }, + { makePtr< ModelFactory >(SVM::NU_SVR, SVM::RBF, 0.00225, 62.5, 0.03), "poletelecomm" }, + { makePtr< ModelFactory >(), "mushroom" }, + { makePtr< ModelFactory >(), "abalone" }, + { makePtr< ModelFactory >(Boost::REAL), "adult" }, + { makePtr< ModelFactory >(), "waveform" }, + { makePtr< ModelFactory >(), "abalone" }, + { makePtr< ModelFactory >(), "waveform" }, +}; - //labels - Mat labels = (Mat_(n,1) << 0,0,0,0, 1,1,1,1, 2,2,2,2); +typedef testing::TestWithParam ML_SL_Params; - rt->train(data, ml::ROW_SAMPLE, labels); +TEST_P(ML_SL_Params, save_load) +{ + const ML_SL_Params_t & param = GetParam(); - //run function - Mat test(1, 4, CV_32F); - Mat result; - randu(test, 0, 10); - rt->getVotes(test, result, 0); + DatasetDesc &dataset = getDataset(param.dataset); + Ptr data = dataset.load(); + ASSERT_TRUE(data); + ASSERT_TRUE(data->getNSamples() > 0); - //count vote amount and find highest vote - count = 0; - const int* result_row = result.ptr(1); - for( i = 0; i < label_size; i++ ) + Mat responses1, responses2; + string file1 = tempfile(".json.gz"); + string file2 = tempfile(".json.gz"); + { + Ptr m = param.factory->createNew(dataset); + ASSERT_TRUE(m); + ASSERT_TRUE(m->train(data, 0)); + m->calcError(data, true, responses1); + m->save(file1 + "?base64"); + } { - val = result_row[i]; - //predicted_class = max_votes < val? i; - if( max_votes < val ) + Ptr m = param.factory->loadFromFile(file1); + ASSERT_TRUE(m); + m->calcError(data, true, responses2); + m->save(file2 + "?base64"); + } + EXPECT_MAT_NEAR(responses1, responses2, 0.0); + { + ifstream f1(file1.c_str(), std::ios_base::binary); + ifstream f2(file2.c_str(), std::ios_base::binary); + ASSERT_TRUE(f1.is_open() && f2.is_open()); + const size_t BUFSZ = 10000; + vector buf1(BUFSZ, 0); + vector buf2(BUFSZ, 0); + while (true) { - max_votes = val; - predicted_class = i; + f1.read(&buf1[0], BUFSZ); + f2.read(&buf2[0], BUFSZ); + EXPECT_EQ(f1.gcount(), f2.gcount()); + EXPECT_EQ(f1.eof(), f2.eof()); + if (!f1.good() || !f2.good() || f1.gcount() != f2.gcount()) + break; + ASSERT_EQ(buf1, buf2); } - count += val; } + remove(file1.c_str()); + remove(file2.c_str()); +} + +INSTANTIATE_TEST_CASE_P(/**/, ML_SL_Params, testing::ValuesIn(ML_SL_Params_List)); + +//================================================================================================== + +TEST(TrainDataGet, layout_ROW_SAMPLE) // Details: #12236 +{ + cv::Mat test = cv::Mat::ones(150, 30, CV_32FC1) * 2; + test.col(3) += Scalar::all(3); + cv::Mat labels = cv::Mat::ones(150, 3, CV_32SC1) * 5; + labels.col(1) += 1; + cv::Ptr train_data = cv::ml::TrainData::create(test, cv::ml::ROW_SAMPLE, labels); + train_data->setTrainTestSplitRatio(0.9); + + Mat tidx = train_data->getTestSampleIdx(); + EXPECT_EQ((size_t)15, tidx.total()); + + Mat tresp = train_data->getTestResponses(); + EXPECT_EQ(15, tresp.rows); + EXPECT_EQ(labels.cols, tresp.cols); + EXPECT_EQ(5, tresp.at(0, 0)) << tresp; + EXPECT_EQ(6, tresp.at(0, 1)) << tresp; + EXPECT_EQ(6, tresp.at(14, 1)) << tresp; + EXPECT_EQ(5, tresp.at(14, 2)) << tresp; + + Mat tsamples = train_data->getTestSamples(); + EXPECT_EQ(15, tsamples.rows); + EXPECT_EQ(test.cols, tsamples.cols); + EXPECT_EQ(2, tsamples.at(0, 0)) << tsamples; + EXPECT_EQ(5, tsamples.at(0, 3)) << tsamples; + EXPECT_EQ(2, tsamples.at(14, test.cols - 1)) << tsamples; + EXPECT_EQ(5, tsamples.at(14, 3)) << tsamples; +} + +TEST(TrainDataGet, layout_COL_SAMPLE) // Details: #12236 +{ + cv::Mat test = cv::Mat::ones(30, 150, CV_32FC1) * 3; + test.row(3) += Scalar::all(3); + cv::Mat labels = cv::Mat::ones(3, 150, CV_32SC1) * 5; + labels.row(1) += 1; + cv::Ptr train_data = cv::ml::TrainData::create(test, cv::ml::COL_SAMPLE, labels); + train_data->setTrainTestSplitRatio(0.9); + + Mat tidx = train_data->getTestSampleIdx(); + EXPECT_EQ((size_t)15, tidx.total()); + + Mat tresp = train_data->getTestResponses(); // always row-based, transposed + EXPECT_EQ(15, tresp.rows); + EXPECT_EQ(labels.rows, tresp.cols); + EXPECT_EQ(5, tresp.at(0, 0)) << tresp; + EXPECT_EQ(6, tresp.at(0, 1)) << tresp; + EXPECT_EQ(6, tresp.at(14, 1)) << tresp; + EXPECT_EQ(5, tresp.at(14, 2)) << tresp; + - EXPECT_EQ(count, (int)rt->getRoots().size()); - EXPECT_EQ(result.at(0, predicted_class), rt->predict(test)); + Mat tsamples = train_data->getTestSamples(); + EXPECT_EQ(15, tsamples.cols); + EXPECT_EQ(test.rows, tsamples.rows); + EXPECT_EQ(3, tsamples.at(0, 0)) << tsamples; + EXPECT_EQ(6, tsamples.at(3, 0)) << tsamples; + EXPECT_EQ(6, tsamples.at(3, 14)) << tsamples; + EXPECT_EQ(3, tsamples.at(test.rows - 1, 14)) << tsamples; } }} // namespace -/* End of file. */ diff --git a/modules/ml/test/test_mltests2.cpp b/modules/ml/test/test_mltests2.cpp deleted file mode 100644 index f9bbf70f95..0000000000 --- a/modules/ml/test/test_mltests2.cpp +++ /dev/null @@ -1,794 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "test_precomp.hpp" - -//#define GENERATE_TESTDATA - -namespace opencv_test { namespace { - -int str_to_svm_type(String& str) -{ - if( !str.compare("C_SVC") ) - return SVM::C_SVC; - if( !str.compare("NU_SVC") ) - return SVM::NU_SVC; - if( !str.compare("ONE_CLASS") ) - return SVM::ONE_CLASS; - if( !str.compare("EPS_SVR") ) - return SVM::EPS_SVR; - if( !str.compare("NU_SVR") ) - return SVM::NU_SVR; - CV_Error( CV_StsBadArg, "incorrect svm type string" ); -} -int str_to_svm_kernel_type( String& str ) -{ - if( !str.compare("LINEAR") ) - return SVM::LINEAR; - if( !str.compare("POLY") ) - return SVM::POLY; - if( !str.compare("RBF") ) - return SVM::RBF; - if( !str.compare("SIGMOID") ) - return SVM::SIGMOID; - CV_Error( CV_StsBadArg, "incorrect svm type string" ); -} - -// 4. em -// 5. ann -int str_to_ann_train_method( String& str ) -{ - if( !str.compare("BACKPROP") ) - return ANN_MLP::BACKPROP; - if (!str.compare("RPROP")) - return ANN_MLP::RPROP; - if (!str.compare("ANNEAL")) - return ANN_MLP::ANNEAL; - CV_Error( CV_StsBadArg, "incorrect ann train method string" ); -} - -#if 0 -int str_to_ann_activation_function(String& str) -{ - if (!str.compare("IDENTITY")) - return ANN_MLP::IDENTITY; - if (!str.compare("SIGMOID_SYM")) - return ANN_MLP::SIGMOID_SYM; - if (!str.compare("GAUSSIAN")) - return ANN_MLP::GAUSSIAN; - if (!str.compare("RELU")) - return ANN_MLP::RELU; - if (!str.compare("LEAKYRELU")) - return ANN_MLP::LEAKYRELU; - CV_Error(CV_StsBadArg, "incorrect ann activation function string"); -} -#endif - -void ann_check_data( Ptr _data ) -{ - CV_TRACE_FUNCTION(); - CV_Assert(!_data.empty()); - Mat values = _data->getSamples(); - Mat var_idx = _data->getVarIdx(); - int nvars = (int)var_idx.total(); - if( nvars != 0 && nvars != values.cols ) - CV_Error( CV_StsBadArg, "var_idx is not supported" ); - if( !_data->getMissing().empty() ) - CV_Error( CV_StsBadArg, "missing values are not supported" ); -} - -// unroll the categorical responses to binary vectors -Mat ann_get_new_responses( Ptr _data, map& cls_map ) -{ - CV_TRACE_FUNCTION(); - CV_Assert(!_data.empty()); - Mat train_sidx = _data->getTrainSampleIdx(); - int* train_sidx_ptr = train_sidx.ptr(); - Mat responses = _data->getResponses(); - int cls_count = 0; - // construct cls_map - cls_map.clear(); - int nresponses = (int)responses.total(); - int si, n = !train_sidx.empty() ? (int)train_sidx.total() : nresponses; - - for( si = 0; si < n; si++ ) - { - int sidx = train_sidx_ptr ? train_sidx_ptr[si] : si; - int r = cvRound(responses.at(sidx)); - CV_DbgAssert( fabs(responses.at(sidx) - r) < FLT_EPSILON ); - map::iterator it = cls_map.find(r); - if( it == cls_map.end() ) - cls_map[r] = cls_count++; - } - Mat new_responses = Mat::zeros( nresponses, cls_count, CV_32F ); - for( si = 0; si < n; si++ ) - { - int sidx = train_sidx_ptr ? train_sidx_ptr[si] : si; - int r = cvRound(responses.at(sidx)); - int cidx = cls_map[r]; - new_responses.at(sidx, cidx) = 1.f; - } - return new_responses; -} - -float ann_calc_error( Ptr ann, Ptr _data, map& cls_map, int type, vector *resp_labels ) -{ - CV_TRACE_FUNCTION(); - CV_Assert(!ann.empty()); - CV_Assert(!_data.empty()); - float err = 0; - Mat samples = _data->getSamples(); - Mat responses = _data->getResponses(); - Mat sample_idx = (type == CV_TEST_ERROR) ? _data->getTestSampleIdx() : _data->getTrainSampleIdx(); - int* sidx = !sample_idx.empty() ? sample_idx.ptr() : 0; - ann_check_data( _data ); - int sample_count = (int)sample_idx.total(); - sample_count = (type == CV_TRAIN_ERROR && sample_count == 0) ? samples.rows : sample_count; - float* pred_resp = 0; - vector innresp; - if( sample_count > 0 ) - { - if( resp_labels ) - { - resp_labels->resize( sample_count ); - pred_resp = &((*resp_labels)[0]); - } - else - { - innresp.resize( sample_count ); - pred_resp = &(innresp[0]); - } - } - int cls_count = (int)cls_map.size(); - Mat output( 1, cls_count, CV_32FC1 ); - - for( int i = 0; i < sample_count; i++ ) - { - int si = sidx ? sidx[i] : i; - Mat sample = samples.row(si); - ann->predict( sample, output ); - Point best_cls; - minMaxLoc(output, 0, 0, 0, &best_cls, 0); - int r = cvRound(responses.at(si)); - CV_DbgAssert( fabs(responses.at(si) - r) < FLT_EPSILON ); - r = cls_map[r]; - int d = best_cls.x == r ? 0 : 1; - err += d; - pred_resp[i] = (float)best_cls.x; - } - err = sample_count ? err / (float)sample_count * 100 : -FLT_MAX; - return err; -} - -TEST(ML_ANN, ActivationFunction) -{ - String folder = string(cvtest::TS::ptr()->get_data_path()); - String original_path = folder + "waveform.data"; - String dataname = folder + "waveform"; - - Ptr tdata = TrainData::loadFromCSV(original_path, 0); - - ASSERT_FALSE(tdata.empty()) << "Could not find test data file : " << original_path; - RNG& rng = theRNG(); - rng.state = 1027401484159173092; - tdata->setTrainTestSplit(500); - - vector activationType; - activationType.push_back(ml::ANN_MLP::IDENTITY); - activationType.push_back(ml::ANN_MLP::SIGMOID_SYM); - activationType.push_back(ml::ANN_MLP::GAUSSIAN); - activationType.push_back(ml::ANN_MLP::RELU); - activationType.push_back(ml::ANN_MLP::LEAKYRELU); - vector activationName; - activationName.push_back("_identity"); - activationName.push_back("_sigmoid_sym"); - activationName.push_back("_gaussian"); - activationName.push_back("_relu"); - activationName.push_back("_leakyrelu"); - for (size_t i = 0; i < activationType.size(); i++) - { - Ptr x = ml::ANN_MLP::create(); - Mat_ layerSizes(1, 4); - layerSizes(0, 0) = tdata->getNVars(); - layerSizes(0, 1) = 100; - layerSizes(0, 2) = 100; - layerSizes(0, 3) = tdata->getResponses().cols; - x->setLayerSizes(layerSizes); - x->setActivationFunction(activationType[i]); - x->setTrainMethod(ml::ANN_MLP::RPROP, 0.01, 0.1); - x->setTermCriteria(TermCriteria(TermCriteria::COUNT, 300, 0.01)); - x->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE); - ASSERT_TRUE(x->isTrained()) << "Could not train networks with " << activationName[i]; -#ifdef GENERATE_TESTDATA - x->save(dataname + activationName[i] + ".yml"); -#else - Ptr y = Algorithm::load(dataname + activationName[i] + ".yml"); - ASSERT_TRUE(y != NULL) << "Could not load " << dataname + activationName[i] + ".yml"; - Mat testSamples = tdata->getTestSamples(); - Mat rx, ry, dst; - x->predict(testSamples, rx); - y->predict(testSamples, ry); - double n = cvtest::norm(rx, ry, NORM_INF); - EXPECT_LT(n,FLT_EPSILON) << "Predict are not equal for " << dataname + activationName[i] + ".yml and " << activationName[i]; -#endif - } -} - -CV_ENUM(ANN_MLP_METHOD, ANN_MLP::RPROP, ANN_MLP::ANNEAL) - -typedef tuple ML_ANN_METHOD_Params; -typedef TestWithParam ML_ANN_METHOD; - -TEST_P(ML_ANN_METHOD, Test) -{ - int methodType = get<0>(GetParam()); - string methodName = get<1>(GetParam()); - int N = get<2>(GetParam()); - - String folder = string(cvtest::TS::ptr()->get_data_path()); - String original_path = folder + "waveform.data"; - String dataname = folder + "waveform" + '_' + methodName; - - Ptr tdata2 = TrainData::loadFromCSV(original_path, 0); - ASSERT_FALSE(tdata2.empty()) << "Could not find test data file : " << original_path; - - Mat samples = tdata2->getSamples()(Range(0, N), Range::all()); - Mat responses(N, 3, CV_32FC1, Scalar(0)); - for (int i = 0; i < N; i++) - responses.at(i, static_cast(tdata2->getResponses().at(i, 0))) = 1; - Ptr tdata = TrainData::create(samples, ml::ROW_SAMPLE, responses); - ASSERT_FALSE(tdata.empty()); - - RNG& rng = theRNG(); - rng.state = 0; - tdata->setTrainTestSplitRatio(0.8); - - Mat testSamples = tdata->getTestSamples(); - -#ifdef GENERATE_TESTDATA - { - Ptr xx = ml::ANN_MLP_ANNEAL::create(); - Mat_ layerSizesXX(1, 4); - layerSizesXX(0, 0) = tdata->getNVars(); - layerSizesXX(0, 1) = 30; - layerSizesXX(0, 2) = 30; - layerSizesXX(0, 3) = tdata->getResponses().cols; - xx->setLayerSizes(layerSizesXX); - xx->setActivationFunction(ml::ANN_MLP::SIGMOID_SYM); - xx->setTrainMethod(ml::ANN_MLP::RPROP); - xx->setTermCriteria(TermCriteria(TermCriteria::COUNT, 1, 0.01)); - xx->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE + ml::ANN_MLP::NO_INPUT_SCALE); - FileStorage fs; - fs.open(dataname + "_init_weight.yml.gz", FileStorage::WRITE + FileStorage::BASE64); - xx->write(fs); - fs.release(); - } -#endif - { - FileStorage fs; - fs.open(dataname + "_init_weight.yml.gz", FileStorage::READ); - Ptr x = ml::ANN_MLP_ANNEAL::create(); - x->read(fs.root()); - x->setTrainMethod(methodType); - if (methodType == ml::ANN_MLP::ANNEAL) - { - x->setAnnealEnergyRNG(RNG(CV_BIG_INT(0xffffffff))); - x->setAnnealInitialT(12); - x->setAnnealFinalT(0.15); - x->setAnnealCoolingRatio(0.96); - x->setAnnealItePerStep(11); - } - x->setTermCriteria(TermCriteria(TermCriteria::COUNT, 100, 0.01)); - x->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE + ml::ANN_MLP::NO_INPUT_SCALE + ml::ANN_MLP::UPDATE_WEIGHTS); - ASSERT_TRUE(x->isTrained()) << "Could not train networks with " << methodName; - string filename = dataname + ".yml.gz"; - Mat r_gold; -#ifdef GENERATE_TESTDATA - x->save(filename); - x->predict(testSamples, r_gold); - { - FileStorage fs_response(dataname + "_response.yml.gz", FileStorage::WRITE + FileStorage::BASE64); - fs_response << "response" << r_gold; - } -#else - { - FileStorage fs_response(dataname + "_response.yml.gz", FileStorage::READ); - fs_response["response"] >> r_gold; - } -#endif - ASSERT_FALSE(r_gold.empty()); - Ptr y = Algorithm::load(filename); - ASSERT_TRUE(y != NULL) << "Could not load " << filename; - Mat rx, ry; - for (int j = 0; j < 4; j++) - { - rx = x->getWeights(j); - ry = y->getWeights(j); - double n = cvtest::norm(rx, ry, NORM_INF); - EXPECT_LT(n, FLT_EPSILON) << "Weights are not equal for layer: " << j; - } - x->predict(testSamples, rx); - y->predict(testSamples, ry); - double n = cvtest::norm(ry, rx, NORM_INF); - EXPECT_LT(n, FLT_EPSILON) << "Predict are not equal to result of the saved model"; - n = cvtest::norm(r_gold, rx, NORM_INF); - EXPECT_LT(n, FLT_EPSILON) << "Predict are not equal to 'gold' response"; - } -} - -INSTANTIATE_TEST_CASE_P(/*none*/, ML_ANN_METHOD, - testing::Values( - make_tuple(ml::ANN_MLP::RPROP, "rprop", 5000), - make_tuple(ml::ANN_MLP::ANNEAL, "anneal", 1000) - //make_pair(ml::ANN_MLP::BACKPROP, "backprop", 5000); -----> NO BACKPROP TEST - ) -); - - -// 6. dtree -// 7. boost -int str_to_boost_type( String& str ) -{ - if ( !str.compare("DISCRETE") ) - return Boost::DISCRETE; - if ( !str.compare("REAL") ) - return Boost::REAL; - if ( !str.compare("LOGIT") ) - return Boost::LOGIT; - if ( !str.compare("GENTLE") ) - return Boost::GENTLE; - CV_Error( CV_StsBadArg, "incorrect boost type string" ); -} - -// 8. rtrees -// 9. ertrees - -int str_to_svmsgd_type( String& str ) -{ - if ( !str.compare("SGD") ) - return SVMSGD::SGD; - if ( !str.compare("ASGD") ) - return SVMSGD::ASGD; - CV_Error( CV_StsBadArg, "incorrect svmsgd type string" ); -} - -int str_to_margin_type( String& str ) -{ - if ( !str.compare("SOFT_MARGIN") ) - return SVMSGD::SOFT_MARGIN; - if ( !str.compare("HARD_MARGIN") ) - return SVMSGD::HARD_MARGIN; - CV_Error( CV_StsBadArg, "incorrect svmsgd margin type string" ); -} - -} -// ---------------------------------- MLBaseTest --------------------------------------------------- - -CV_MLBaseTest::CV_MLBaseTest(const char* _modelName) -{ - int64 seeds[] = { CV_BIG_INT(0x00009fff4f9c8d52), - CV_BIG_INT(0x0000a17166072c7c), - CV_BIG_INT(0x0201b32115cd1f9a), - CV_BIG_INT(0x0513cb37abcd1234), - CV_BIG_INT(0x0001a2b3c4d5f678) - }; - - int seedCount = sizeof(seeds)/sizeof(seeds[0]); - RNG& rng = theRNG(); - - initSeed = rng.state; - rng.state = seeds[rng(seedCount)]; - - modelName = _modelName; -} - -CV_MLBaseTest::~CV_MLBaseTest() -{ - if( validationFS.isOpened() ) - validationFS.release(); - theRNG().state = initSeed; -} - -int CV_MLBaseTest::read_params( CvFileStorage* __fs ) -{ - CV_TRACE_FUNCTION(); - FileStorage _fs(__fs, false); - if( !_fs.isOpened() ) - test_case_count = -1; - else - { - FileNode fn = _fs.getFirstTopLevelNode()["run_params"][modelName]; - test_case_count = (int)fn.size(); - if( test_case_count <= 0 ) - test_case_count = -1; - if( test_case_count > 0 ) - { - dataSetNames.resize( test_case_count ); - FileNodeIterator it = fn.begin(); - for( int i = 0; i < test_case_count; i++, ++it ) - { - dataSetNames[i] = (string)*it; - } - } - } - return cvtest::TS::OK;; -} - -void CV_MLBaseTest::run( int ) -{ - CV_TRACE_FUNCTION(); - string filename = ts->get_data_path(); - filename += get_validation_filename(); - validationFS.open( filename, FileStorage::READ ); - read_params( *validationFS ); - - int code = cvtest::TS::OK; - for (int i = 0; i < test_case_count; i++) - { - CV_TRACE_REGION("iteration"); - int temp_code = run_test_case( i ); - if (temp_code == cvtest::TS::OK) - temp_code = validate_test_results( i ); - if (temp_code != cvtest::TS::OK) - code = temp_code; - } - if ( test_case_count <= 0) - { - ts->printf( cvtest::TS::LOG, "validation file is not determined or not correct" ); - code = cvtest::TS::FAIL_INVALID_TEST_DATA; - } - ts->set_failed_test_info( code ); -} - -int CV_MLBaseTest::prepare_test_case( int test_case_idx ) -{ - CV_TRACE_FUNCTION(); - clear(); - - string dataPath = ts->get_data_path(); - if ( dataPath.empty() ) - { - ts->printf( cvtest::TS::LOG, "data path is empty" ); - return cvtest::TS::FAIL_INVALID_TEST_DATA; - } - - string dataName = dataSetNames[test_case_idx], - filename = dataPath + dataName + ".data"; - - FileNode dataParamsNode = validationFS.getFirstTopLevelNode()["validation"][modelName][dataName]["data_params"]; - CV_DbgAssert( !dataParamsNode.empty() ); - - CV_DbgAssert( !dataParamsNode["LS"].empty() ); - int trainSampleCount = (int)dataParamsNode["LS"]; - - CV_DbgAssert( !dataParamsNode["resp_idx"].empty() ); - int respIdx = (int)dataParamsNode["resp_idx"]; - - CV_DbgAssert( !dataParamsNode["types"].empty() ); - String varTypes = (String)dataParamsNode["types"]; - - data = TrainData::loadFromCSV(filename, 0, respIdx, respIdx+1, varTypes); - if( data.empty() ) - { - ts->printf( cvtest::TS::LOG, "file %s can not be read\n", filename.c_str() ); - return cvtest::TS::FAIL_INVALID_TEST_DATA; - } - - data->setTrainTestSplit(trainSampleCount); - return cvtest::TS::OK; -} - -string& CV_MLBaseTest::get_validation_filename() -{ - return validationFN; -} - -int CV_MLBaseTest::train( int testCaseIdx ) -{ - CV_TRACE_FUNCTION(); - bool is_trained = false; - FileNode modelParamsNode = - validationFS.getFirstTopLevelNode()["validation"][modelName][dataSetNames[testCaseIdx]]["model_params"]; - - if( modelName == CV_NBAYES ) - model = NormalBayesClassifier::create(); - else if( modelName == CV_KNEAREST ) - { - model = KNearest::create(); - } - else if( modelName == CV_SVM ) - { - String svm_type_str, kernel_type_str; - modelParamsNode["svm_type"] >> svm_type_str; - modelParamsNode["kernel_type"] >> kernel_type_str; - Ptr m = SVM::create(); - m->setType(str_to_svm_type( svm_type_str )); - m->setKernel(str_to_svm_kernel_type( kernel_type_str )); - m->setDegree(modelParamsNode["degree"]); - m->setGamma(modelParamsNode["gamma"]); - m->setCoef0(modelParamsNode["coef0"]); - m->setC(modelParamsNode["C"]); - m->setNu(modelParamsNode["nu"]); - m->setP(modelParamsNode["p"]); - model = m; - } - else if( modelName == CV_EM ) - { - assert( 0 ); - } - else if( modelName == CV_ANN ) - { - String train_method_str; - double param1, param2; - modelParamsNode["train_method"] >> train_method_str; - modelParamsNode["param1"] >> param1; - modelParamsNode["param2"] >> param2; - Mat new_responses = ann_get_new_responses( data, cls_map ); - // binarize the responses - data = TrainData::create(data->getSamples(), data->getLayout(), new_responses, - data->getVarIdx(), data->getTrainSampleIdx()); - int layer_sz[] = { data->getNAllVars(), 100, 100, (int)cls_map.size() }; - Mat layer_sizes( 1, (int)(sizeof(layer_sz)/sizeof(layer_sz[0])), CV_32S, layer_sz ); - Ptr m = ANN_MLP::create(); - m->setLayerSizes(layer_sizes); - m->setActivationFunction(ANN_MLP::SIGMOID_SYM, 0, 0); - m->setTermCriteria(TermCriteria(TermCriteria::COUNT,300,0.01)); - m->setTrainMethod(str_to_ann_train_method(train_method_str), param1, param2); - model = m; - - } - else if( modelName == CV_DTREE ) - { - int MAX_DEPTH, MIN_SAMPLE_COUNT, MAX_CATEGORIES, CV_FOLDS; - float REG_ACCURACY = 0; - bool USE_SURROGATE = false, IS_PRUNED; - modelParamsNode["max_depth"] >> MAX_DEPTH; - modelParamsNode["min_sample_count"] >> MIN_SAMPLE_COUNT; - //modelParamsNode["use_surrogate"] >> USE_SURROGATE; - modelParamsNode["max_categories"] >> MAX_CATEGORIES; - modelParamsNode["cv_folds"] >> CV_FOLDS; - modelParamsNode["is_pruned"] >> IS_PRUNED; - - Ptr m = DTrees::create(); - m->setMaxDepth(MAX_DEPTH); - m->setMinSampleCount(MIN_SAMPLE_COUNT); - m->setRegressionAccuracy(REG_ACCURACY); - m->setUseSurrogates(USE_SURROGATE); - m->setMaxCategories(MAX_CATEGORIES); - m->setCVFolds(CV_FOLDS); - m->setUse1SERule(false); - m->setTruncatePrunedTree(IS_PRUNED); - m->setPriors(Mat()); - model = m; - } - else if( modelName == CV_BOOST ) - { - int BOOST_TYPE, WEAK_COUNT, MAX_DEPTH; - float WEIGHT_TRIM_RATE; - bool USE_SURROGATE = false; - String typeStr; - modelParamsNode["type"] >> typeStr; - BOOST_TYPE = str_to_boost_type( typeStr ); - modelParamsNode["weak_count"] >> WEAK_COUNT; - modelParamsNode["weight_trim_rate"] >> WEIGHT_TRIM_RATE; - modelParamsNode["max_depth"] >> MAX_DEPTH; - //modelParamsNode["use_surrogate"] >> USE_SURROGATE; - - Ptr m = Boost::create(); - m->setBoostType(BOOST_TYPE); - m->setWeakCount(WEAK_COUNT); - m->setWeightTrimRate(WEIGHT_TRIM_RATE); - m->setMaxDepth(MAX_DEPTH); - m->setUseSurrogates(USE_SURROGATE); - m->setPriors(Mat()); - model = m; - } - else if( modelName == CV_RTREES ) - { - int MAX_DEPTH, MIN_SAMPLE_COUNT, MAX_CATEGORIES, CV_FOLDS, NACTIVE_VARS, MAX_TREES_NUM; - float REG_ACCURACY = 0, OOB_EPS = 0.0; - bool USE_SURROGATE = false, IS_PRUNED; - modelParamsNode["max_depth"] >> MAX_DEPTH; - modelParamsNode["min_sample_count"] >> MIN_SAMPLE_COUNT; - //modelParamsNode["use_surrogate"] >> USE_SURROGATE; - modelParamsNode["max_categories"] >> MAX_CATEGORIES; - modelParamsNode["cv_folds"] >> CV_FOLDS; - modelParamsNode["is_pruned"] >> IS_PRUNED; - modelParamsNode["nactive_vars"] >> NACTIVE_VARS; - modelParamsNode["max_trees_num"] >> MAX_TREES_NUM; - - Ptr m = RTrees::create(); - m->setMaxDepth(MAX_DEPTH); - m->setMinSampleCount(MIN_SAMPLE_COUNT); - m->setRegressionAccuracy(REG_ACCURACY); - m->setUseSurrogates(USE_SURROGATE); - m->setMaxCategories(MAX_CATEGORIES); - m->setPriors(Mat()); - m->setCalculateVarImportance(true); - m->setActiveVarCount(NACTIVE_VARS); - m->setTermCriteria(TermCriteria(TermCriteria::COUNT, MAX_TREES_NUM, OOB_EPS)); - model = m; - } - - else if( modelName == CV_SVMSGD ) - { - String svmsgdTypeStr; - modelParamsNode["svmsgdType"] >> svmsgdTypeStr; - - Ptr m = SVMSGD::create(); - int svmsgdType = str_to_svmsgd_type( svmsgdTypeStr ); - m->setSvmsgdType(svmsgdType); - - String marginTypeStr; - modelParamsNode["marginType"] >> marginTypeStr; - int marginType = str_to_margin_type( marginTypeStr ); - m->setMarginType(marginType); - - m->setMarginRegularization(modelParamsNode["marginRegularization"]); - m->setInitialStepSize(modelParamsNode["initialStepSize"]); - m->setStepDecreasingPower(modelParamsNode["stepDecreasingPower"]); - m->setTermCriteria(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 10000, 0.00001)); - model = m; - } - - if( !model.empty() ) - is_trained = model->train(data, 0); - - if( !is_trained ) - { - ts->printf( cvtest::TS::LOG, "in test case %d model training was failed", testCaseIdx ); - return cvtest::TS::FAIL_INVALID_OUTPUT; - } - return cvtest::TS::OK; -} - -float CV_MLBaseTest::get_test_error( int /*testCaseIdx*/, vector *resp ) -{ - CV_TRACE_FUNCTION(); - int type = CV_TEST_ERROR; - float err = 0; - Mat _resp; - if( modelName == CV_EM ) - assert( 0 ); - else if( modelName == CV_ANN ) - err = ann_calc_error( model, data, cls_map, type, resp ); - else if( modelName == CV_DTREE || modelName == CV_BOOST || modelName == CV_RTREES || - modelName == CV_SVM || modelName == CV_NBAYES || modelName == CV_KNEAREST || modelName == CV_SVMSGD ) - err = model->calcError( data, true, _resp ); - if( !_resp.empty() && resp ) - _resp.convertTo(*resp, CV_32F); - return err; -} - -void CV_MLBaseTest::save( const char* filename ) -{ - CV_TRACE_FUNCTION(); - model->save( filename ); -} - -void CV_MLBaseTest::load( const char* filename ) -{ - CV_TRACE_FUNCTION(); - if( modelName == CV_NBAYES ) - model = Algorithm::load( filename ); - else if( modelName == CV_KNEAREST ) - model = Algorithm::load( filename ); - else if( modelName == CV_SVM ) - model = Algorithm::load( filename ); - else if( modelName == CV_ANN ) - model = Algorithm::load( filename ); - else if( modelName == CV_DTREE ) - model = Algorithm::load( filename ); - else if( modelName == CV_BOOST ) - model = Algorithm::load( filename ); - else if( modelName == CV_RTREES ) - model = Algorithm::load( filename ); - else if( modelName == CV_SVMSGD ) - model = Algorithm::load( filename ); - else - CV_Error( CV_StsNotImplemented, "invalid stat model name"); -} - - - -TEST(TrainDataGet, layout_ROW_SAMPLE) // Details: #12236 -{ - cv::Mat test = cv::Mat::ones(150, 30, CV_32FC1) * 2; - test.col(3) += Scalar::all(3); - cv::Mat labels = cv::Mat::ones(150, 3, CV_32SC1) * 5; - labels.col(1) += 1; - cv::Ptr train_data = cv::ml::TrainData::create(test, cv::ml::ROW_SAMPLE, labels); - train_data->setTrainTestSplitRatio(0.9); - - Mat tidx = train_data->getTestSampleIdx(); - EXPECT_EQ((size_t)15, tidx.total()); - - Mat tresp = train_data->getTestResponses(); - EXPECT_EQ(15, tresp.rows); - EXPECT_EQ(labels.cols, tresp.cols); - EXPECT_EQ(5, tresp.at(0, 0)) << tresp; - EXPECT_EQ(6, tresp.at(0, 1)) << tresp; - EXPECT_EQ(6, tresp.at(14, 1)) << tresp; - EXPECT_EQ(5, tresp.at(14, 2)) << tresp; - - Mat tsamples = train_data->getTestSamples(); - EXPECT_EQ(15, tsamples.rows); - EXPECT_EQ(test.cols, tsamples.cols); - EXPECT_EQ(2, tsamples.at(0, 0)) << tsamples; - EXPECT_EQ(5, tsamples.at(0, 3)) << tsamples; - EXPECT_EQ(2, tsamples.at(14, test.cols - 1)) << tsamples; - EXPECT_EQ(5, tsamples.at(14, 3)) << tsamples; -} - -TEST(TrainDataGet, layout_COL_SAMPLE) // Details: #12236 -{ - cv::Mat test = cv::Mat::ones(30, 150, CV_32FC1) * 3; - test.row(3) += Scalar::all(3); - cv::Mat labels = cv::Mat::ones(3, 150, CV_32SC1) * 5; - labels.row(1) += 1; - cv::Ptr train_data = cv::ml::TrainData::create(test, cv::ml::COL_SAMPLE, labels); - train_data->setTrainTestSplitRatio(0.9); - - Mat tidx = train_data->getTestSampleIdx(); - EXPECT_EQ((size_t)15, tidx.total()); - - Mat tresp = train_data->getTestResponses(); // always row-based, transposed - EXPECT_EQ(15, tresp.rows); - EXPECT_EQ(labels.rows, tresp.cols); - EXPECT_EQ(5, tresp.at(0, 0)) << tresp; - EXPECT_EQ(6, tresp.at(0, 1)) << tresp; - EXPECT_EQ(6, tresp.at(14, 1)) << tresp; - EXPECT_EQ(5, tresp.at(14, 2)) << tresp; - - - Mat tsamples = train_data->getTestSamples(); - EXPECT_EQ(15, tsamples.cols); - EXPECT_EQ(test.rows, tsamples.rows); - EXPECT_EQ(3, tsamples.at(0, 0)) << tsamples; - EXPECT_EQ(6, tsamples.at(3, 0)) << tsamples; - EXPECT_EQ(6, tsamples.at(3, 14)) << tsamples; - EXPECT_EQ(3, tsamples.at(test.rows - 1, 14)) << tsamples; -} - - - -} // namespace -/* End of file. */ diff --git a/modules/ml/test/test_precomp.hpp b/modules/ml/test/test_precomp.hpp index 142bf6a2bb..e2d36d2c2d 100644 --- a/modules/ml/test/test_precomp.hpp +++ b/modules/ml/test/test_precomp.hpp @@ -2,10 +2,15 @@ #define __OPENCV_TEST_PRECOMP_HPP__ #include "opencv2/ts.hpp" +#include // EXPECT_MAT_NEAR #include "opencv2/ml.hpp" #include "opencv2/core/core_c.h" +#include +using std::ifstream; + namespace opencv_test { + using namespace cv::ml; #define CV_NBAYES "nbayes" @@ -19,8 +24,6 @@ using namespace cv::ml; #define CV_ERTREES "ertrees" #define CV_SVMSGD "svmsgd" -enum { CV_TRAIN_ERROR=0, CV_TEST_ERROR=1 }; - using cv::Ptr; using cv::ml::StatModel; using cv::ml::TrainData; @@ -34,58 +37,14 @@ using cv::ml::Boost; using cv::ml::RTrees; using cv::ml::SVMSGD; -class CV_MLBaseTest : public cvtest::BaseTest -{ -public: - CV_MLBaseTest( const char* _modelName ); - virtual ~CV_MLBaseTest(); -protected: - virtual int read_params( CvFileStorage* fs ); - virtual void run( int startFrom ); - virtual int prepare_test_case( int testCaseIdx ); - virtual std::string& get_validation_filename(); - virtual int run_test_case( int testCaseIdx ) = 0; - virtual int validate_test_results( int testCaseIdx ) = 0; - - int train( int testCaseIdx ); - float get_test_error( int testCaseIdx, std::vector *resp = 0 ); - void save( const char* filename ); - void load( const char* filename ); - - Ptr data; - std::string modelName, validationFN; - std::vector dataSetNames; - cv::FileStorage validationFS; - - Ptr model; - - std::map cls_map; - - int64 initSeed; -}; - -class CV_AMLTest : public CV_MLBaseTest -{ -public: - CV_AMLTest( const char* _modelName ); - virtual ~CV_AMLTest() {} -protected: - virtual int run_test_case( int testCaseIdx ); - virtual int validate_test_results( int testCaseIdx ); -}; - -class CV_SLMLTest : public CV_MLBaseTest -{ -public: - CV_SLMLTest( const char* _modelName ); - virtual ~CV_SLMLTest() {} -protected: - virtual int run_test_case( int testCaseIdx ); - virtual int validate_test_results( int testCaseIdx ); +void defaultDistribs( Mat& means, vector& covs, int type=CV_32FC1 ); +void generateData( Mat& data, Mat& labels, const vector& sizes, const Mat& _means, const vector& covs, int dataType, int labelType ); +int maxIdx( const vector& count ); +bool getLabelsMap( const Mat& labels, const vector& sizes, vector& labelsMap, bool checkClusterUniq=true ); +bool calcErr( const Mat& labels, const Mat& origLabels, const vector& sizes, float& err, bool labelsEquivalent = true, bool checkClusterUniq=true ); - std::vector test_resps1, test_resps2; // predicted responses for test data - std::string fname1, fname2; -}; +// used in LR test +bool calculateError( const Mat& _p_labels, const Mat& _o_labels, float& error); } // namespace diff --git a/modules/ml/test/test_rtrees.cpp b/modules/ml/test/test_rtrees.cpp new file mode 100644 index 0000000000..ebf0c46557 --- /dev/null +++ b/modules/ml/test/test_rtrees.cpp @@ -0,0 +1,54 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +TEST(ML_RTrees, getVotes) +{ + int n = 12; + int count, i; + int label_size = 3; + int predicted_class = 0; + int max_votes = -1; + int val; + // RTrees for classification + Ptr rt = cv::ml::RTrees::create(); + + //data + Mat data(n, 4, CV_32F); + randu(data, 0, 10); + + //labels + Mat labels = (Mat_(n,1) << 0,0,0,0, 1,1,1,1, 2,2,2,2); + + rt->train(data, ml::ROW_SAMPLE, labels); + + //run function + Mat test(1, 4, CV_32F); + Mat result; + randu(test, 0, 10); + rt->getVotes(test, result, 0); + + //count vote amount and find highest vote + count = 0; + const int* result_row = result.ptr(1); + for( i = 0; i < label_size; i++ ) + { + val = result_row[i]; + //predicted_class = max_votes < val? i; + if( max_votes < val ) + { + max_votes = val; + predicted_class = i; + } + count += val; + } + + EXPECT_EQ(count, (int)rt->getRoots().size()); + EXPECT_EQ(result.at(0, predicted_class), rt->predict(test)); +} + +}} // namespace diff --git a/modules/ml/test/test_save_load.cpp b/modules/ml/test/test_save_load.cpp index 5be010d657..201e6303f5 100644 --- a/modules/ml/test/test_save_load.cpp +++ b/modules/ml/test/test_save_load.cpp @@ -1,267 +1,100 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" -namespace opencv_test { +namespace opencv_test { namespace { -CV_SLMLTest::CV_SLMLTest( const char* _modelName ) : CV_MLBaseTest( _modelName ) -{ - validationFN = "slvalidation.xml"; -} -int CV_SLMLTest::run_test_case( int testCaseIdx ) +void randomFillCategories(const string & filename, Mat & input) { - int code = cvtest::TS::OK; - code = prepare_test_case( testCaseIdx ); - - if( code == cvtest::TS::OK ) - { - data->setTrainTestSplit(data->getNTrainSamples(), true); - code = train( testCaseIdx ); - if( code == cvtest::TS::OK ) - { - get_test_error( testCaseIdx, &test_resps1 ); - fname1 = tempfile(".json.gz"); - save( (fname1 + "?base64").c_str() ); - load( fname1.c_str() ); - get_test_error( testCaseIdx, &test_resps2 ); - fname2 = tempfile(".json.gz"); - save( (fname2 + "?base64").c_str() ); - } - else - ts->printf( cvtest::TS::LOG, "model can not be trained" ); - } - return code; -} - -int CV_SLMLTest::validate_test_results( int testCaseIdx ) -{ - int code = cvtest::TS::OK; - - // 1. compare files - FILE *fs1 = fopen(fname1.c_str(), "rb"), *fs2 = fopen(fname2.c_str(), "rb"); - size_t sz1 = 0, sz2 = 0; - if( !fs1 || !fs2 ) - code = cvtest::TS::FAIL_MISSING_TEST_DATA; - if( code >= 0 ) - { - fseek(fs1, 0, SEEK_END); fseek(fs2, 0, SEEK_END); - sz1 = ftell(fs1); - sz2 = ftell(fs2); - fseek(fs1, 0, SEEK_SET); fseek(fs2, 0, SEEK_SET); - } - - if( sz1 != sz2 ) - code = cvtest::TS::FAIL_INVALID_OUTPUT; - - if( code >= 0 ) + Mat catMap; + Mat catCount; + std::vector varTypes; + + FileStorage fs(filename, FileStorage::READ); + FileNode root = fs.getFirstTopLevelNode(); + root["cat_map"] >> catMap; + root["cat_count"] >> catCount; + root["var_type"] >> varTypes; + + int offset = 0; + int countOffset = 0; + uint var = 0, varCount = (uint)varTypes.size(); + for (; var < varCount; ++var) { - const int BUFSZ = 1024; - uchar buf1[BUFSZ], buf2[BUFSZ]; - for( size_t pos = 0; pos < sz1; ) + if (varTypes[var] == ml::VAR_CATEGORICAL) { - size_t r1 = fread(buf1, 1, BUFSZ, fs1); - size_t r2 = fread(buf2, 1, BUFSZ, fs2); - if( r1 != r2 || memcmp(buf1, buf2, r1) != 0 ) + int size = catCount.at(0, countOffset); + for (int row = 0; row < input.rows; ++row) { - ts->printf( cvtest::TS::LOG, - "in test case %d first (%s) and second (%s) saved files differ in %d-th kb\n", - testCaseIdx, fname1.c_str(), fname2.c_str(), - (int)pos ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - break; + int randomChosenIndex = offset + ((uint)cv::theRNG()) % size; + int value = catMap.at(0, randomChosenIndex); + input.at(row, var) = (float)value; } - pos += r1; + offset += size; + ++countOffset; } } - - if(fs1) - fclose(fs1); - if(fs2) - fclose(fs2); - - // delete temporary files - if( code >= 0 ) - { - remove( fname1.c_str() ); - remove( fname2.c_str() ); - } - - if( code >= 0 ) - { - // 2. compare responses - CV_Assert( test_resps1.size() == test_resps2.size() ); - vector::const_iterator it1 = test_resps1.begin(), it2 = test_resps2.begin(); - for( ; it1 != test_resps1.end(); ++it1, ++it2 ) - { - if( fabs(*it1 - *it2) > FLT_EPSILON ) - { - ts->printf( cvtest::TS::LOG, "in test case %d responses predicted before saving and after loading is different", testCaseIdx ); - code = cvtest::TS::FAIL_INVALID_OUTPUT; - break; - } - } - } - return code; } -namespace { +//================================================================================================== -TEST(ML_NaiveBayes, save_load) { CV_SLMLTest test( CV_NBAYES ); test.safe_run(); } -TEST(ML_KNearest, save_load) { CV_SLMLTest test( CV_KNEAREST ); test.safe_run(); } -TEST(ML_SVM, save_load) { CV_SLMLTest test( CV_SVM ); test.safe_run(); } -TEST(ML_ANN, save_load) { CV_SLMLTest test( CV_ANN ); test.safe_run(); } -TEST(ML_DTree, save_load) { CV_SLMLTest test( CV_DTREE ); test.safe_run(); } -TEST(ML_Boost, save_load) { CV_SLMLTest test( CV_BOOST ); test.safe_run(); } -TEST(ML_RTrees, save_load) { CV_SLMLTest test( CV_RTREES ); test.safe_run(); } -TEST(DISABLED_ML_ERTrees, save_load) { CV_SLMLTest test( CV_ERTREES ); test.safe_run(); } -TEST(MV_SVMSGD, save_load){ CV_SLMLTest test( CV_SVMSGD ); test.safe_run(); } +typedef tuple ML_Legacy_Param; +typedef testing::TestWithParam< ML_Legacy_Param > ML_Legacy_Params; -class CV_LegacyTest : public cvtest::BaseTest +TEST_P(ML_Legacy_Params, legacy_load) { -public: - CV_LegacyTest(const std::string &_modelName, const std::string &_suffixes = std::string()) - : cvtest::BaseTest(), modelName(_modelName), suffixes(_suffixes) - { - } - virtual ~CV_LegacyTest() {} -protected: - void run(int) - { - unsigned int idx = 0; - for (;;) - { - if (idx >= suffixes.size()) - break; - int found = (int)suffixes.find(';', idx); - string piece = suffixes.substr(idx, found - idx); - if (piece.empty()) - break; - oneTest(piece); - idx += (unsigned int)piece.size() + 1; - } - } - void oneTest(const string & suffix) - { - using namespace cv::ml; - - int code = cvtest::TS::OK; - string filename = ts->get_data_path() + "legacy/" + modelName + suffix; - bool isTree = modelName == CV_BOOST || modelName == CV_DTREE || modelName == CV_RTREES; - Ptr model; - if (modelName == CV_BOOST) - model = Algorithm::load(filename); - else if (modelName == CV_ANN) - model = Algorithm::load(filename); - else if (modelName == CV_DTREE) - model = Algorithm::load(filename); - else if (modelName == CV_NBAYES) - model = Algorithm::load(filename); - else if (modelName == CV_SVM) - model = Algorithm::load(filename); - else if (modelName == CV_RTREES) - model = Algorithm::load(filename); - else if (modelName == CV_SVMSGD) - model = Algorithm::load(filename); - if (!model) - { - code = cvtest::TS::FAIL_INVALID_TEST_DATA; - } - else - { - Mat input = Mat(isTree ? 10 : 1, model->getVarCount(), CV_32F); - ts->get_rng().fill(input, RNG::UNIFORM, 0, 40); - - if (isTree) - randomFillCategories(filename, input); - - Mat output; - model->predict(input, output, StatModel::RAW_OUTPUT | (isTree ? DTrees::PREDICT_SUM : 0)); - // just check if no internal assertions or errors thrown - } - ts->set_failed_test_info(code); - } - void randomFillCategories(const string & filename, Mat & input) - { - Mat catMap; - Mat catCount; - std::vector varTypes; - - FileStorage fs(filename, FileStorage::READ); - FileNode root = fs.getFirstTopLevelNode(); - root["cat_map"] >> catMap; - root["cat_count"] >> catCount; - root["var_type"] >> varTypes; + const string modelName = get<0>(GetParam()); + const string dataName = get<1>(GetParam()); + const string filename = findDataFile("legacy/" + modelName + "_" + dataName + ".xml"); + const bool isTree = modelName == CV_BOOST || modelName == CV_DTREE || modelName == CV_RTREES; + + Ptr model; + if (modelName == CV_BOOST) + model = Algorithm::load(filename); + else if (modelName == CV_ANN) + model = Algorithm::load(filename); + else if (modelName == CV_DTREE) + model = Algorithm::load(filename); + else if (modelName == CV_NBAYES) + model = Algorithm::load(filename); + else if (modelName == CV_SVM) + model = Algorithm::load(filename); + else if (modelName == CV_RTREES) + model = Algorithm::load(filename); + else if (modelName == CV_SVMSGD) + model = Algorithm::load(filename); + ASSERT_TRUE(model); + + Mat input = Mat(isTree ? 10 : 1, model->getVarCount(), CV_32F); + cv::theRNG().fill(input, RNG::UNIFORM, 0, 40); + + if (isTree) + randomFillCategories(filename, input); + + Mat output; + EXPECT_NO_THROW(model->predict(input, output, StatModel::RAW_OUTPUT | (isTree ? DTrees::PREDICT_SUM : 0))); + // just check if no internal assertions or errors thrown +} - int offset = 0; - int countOffset = 0; - uint var = 0, varCount = (uint)varTypes.size(); - for (; var < varCount; ++var) - { - if (varTypes[var] == ml::VAR_CATEGORICAL) - { - int size = catCount.at(0, countOffset); - for (int row = 0; row < input.rows; ++row) - { - int randomChosenIndex = offset + ((uint)ts->get_rng()) % size; - int value = catMap.at(0, randomChosenIndex); - input.at(row, var) = (float)value; - } - offset += size; - ++countOffset; - } - } - } - string modelName; - string suffixes; +ML_Legacy_Param param_list[] = { + ML_Legacy_Param(CV_ANN, "waveform"), + ML_Legacy_Param(CV_BOOST, "adult"), + ML_Legacy_Param(CV_BOOST, "1"), + ML_Legacy_Param(CV_BOOST, "2"), + ML_Legacy_Param(CV_BOOST, "3"), + ML_Legacy_Param(CV_DTREE, "abalone"), + ML_Legacy_Param(CV_DTREE, "mushroom"), + ML_Legacy_Param(CV_NBAYES, "waveform"), + ML_Legacy_Param(CV_SVM, "poletelecomm"), + ML_Legacy_Param(CV_SVM, "waveform"), + ML_Legacy_Param(CV_RTREES, "waveform"), + ML_Legacy_Param(CV_SVMSGD, "waveform"), }; -TEST(ML_ANN, legacy_load) { CV_LegacyTest test(CV_ANN, "_waveform.xml"); test.safe_run(); } -TEST(ML_Boost, legacy_load) { CV_LegacyTest test(CV_BOOST, "_adult.xml;_1.xml;_2.xml;_3.xml"); test.safe_run(); } -TEST(ML_DTree, legacy_load) { CV_LegacyTest test(CV_DTREE, "_abalone.xml;_mushroom.xml"); test.safe_run(); } -TEST(ML_NBayes, legacy_load) { CV_LegacyTest test(CV_NBAYES, "_waveform.xml"); test.safe_run(); } -TEST(ML_SVM, legacy_load) { CV_LegacyTest test(CV_SVM, "_poletelecomm.xml;_waveform.xml"); test.safe_run(); } -TEST(ML_RTrees, legacy_load) { CV_LegacyTest test(CV_RTREES, "_waveform.xml"); test.safe_run(); } -TEST(ML_SVMSGD, legacy_load) { CV_LegacyTest test(CV_SVMSGD, "_waveform.xml"); test.safe_run(); } +INSTANTIATE_TEST_CASE_P(/**/, ML_Legacy_Params, testing::ValuesIn(param_list)); /*TEST(ML_SVM, throw_exception_when_save_untrained_model) { @@ -271,33 +104,4 @@ TEST(ML_SVMSGD, legacy_load) { CV_LegacyTest test(CV_SVMSGD, "_waveform.xml"); t remove(filename.c_str()); }*/ -TEST(DISABLED_ML_SVM, linear_save_load) -{ - Ptr svm1, svm2, svm3; - - svm1 = Algorithm::load("SVM45_X_38-1.xml"); - svm2 = Algorithm::load("SVM45_X_38-2.xml"); - string tname = tempfile("a.json"); - svm2->save(tname + "?base64"); - svm3 = Algorithm::load(tname); - - ASSERT_EQ(svm1->getVarCount(), svm2->getVarCount()); - ASSERT_EQ(svm1->getVarCount(), svm3->getVarCount()); - - int m = 10000, n = svm1->getVarCount(); - Mat samples(m, n, CV_32F), r1, r2, r3; - randu(samples, 0., 1.); - - svm1->predict(samples, r1); - svm2->predict(samples, r2); - svm3->predict(samples, r3); - - double eps = 1e-4; - EXPECT_LE(cvtest::norm(r1, r2, NORM_INF), eps); - EXPECT_LE(cvtest::norm(r1, r3, NORM_INF), eps); - - remove(tname.c_str()); -} - }} // namespace -/* End of file. */ diff --git a/modules/ml/test/test_svmsgd.cpp b/modules/ml/test/test_svmsgd.cpp index 6eea637d6e..038fca0d40 100644 --- a/modules/ml/test/test_svmsgd.cpp +++ b/modules/ml/test/test_svmsgd.cpp @@ -1,281 +1,119 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" namespace opencv_test { namespace { -using cv::ml::SVMSGD; -using cv::ml::TrainData; - -class CV_SVMSGDTrainTest : public cvtest::BaseTest +static const int TEST_VALUE_LIMIT = 500; +enum { -public: - enum TrainDataType - { - UNIFORM_SAME_SCALE, - UNIFORM_DIFFERENT_SCALES - }; - - CV_SVMSGDTrainTest(const Mat &_weights, float shift, TrainDataType type, double precision = 0.01); -private: - virtual void run( int start_from ); - static float decisionFunction(const Mat &sample, const Mat &weights, float shift); - void makeData(int samplesCount, const Mat &weights, float shift, RNG &rng, Mat &samples, Mat & responses); - void generateSameBorders(int featureCount); - void generateDifferentBorders(int featureCount); - - TrainDataType type; - double precision; - std::vector > borders; - cv::Ptr data; - cv::Mat testSamples; - cv::Mat testResponses; - static const int TEST_VALUE_LIMIT = 500; + UNIFORM_SAME_SCALE, + UNIFORM_DIFFERENT_SCALES }; -void CV_SVMSGDTrainTest::generateSameBorders(int featureCount) -{ - float lowerLimit = -TEST_VALUE_LIMIT; - float upperLimit = TEST_VALUE_LIMIT; - - for (int featureIndex = 0; featureIndex < featureCount; featureIndex++) - { - borders.push_back(std::pair(lowerLimit, upperLimit)); - } -} - -void CV_SVMSGDTrainTest::generateDifferentBorders(int featureCount) -{ - float lowerLimit = -TEST_VALUE_LIMIT; - float upperLimit = TEST_VALUE_LIMIT; - cv::RNG rng(0); - - for (int featureIndex = 0; featureIndex < featureCount; featureIndex++) - { - int crit = rng.uniform(0, 2); - - if (crit > 0) - { - borders.push_back(std::pair(lowerLimit, upperLimit)); - } - else - { - borders.push_back(std::pair(lowerLimit/1000, upperLimit/1000)); - } - } -} +CV_ENUM(SVMSGD_TYPE, UNIFORM_SAME_SCALE, UNIFORM_DIFFERENT_SCALES) -float CV_SVMSGDTrainTest::decisionFunction(const Mat &sample, const Mat &weights, float shift) -{ - return static_cast(sample.dot(weights)) + shift; -} +typedef std::vector< std::pair > BorderList; -void CV_SVMSGDTrainTest::makeData(int samplesCount, const Mat &weights, float shift, RNG &rng, Mat &samples, Mat & responses) +static void makeData(RNG &rng, int samplesCount, const Mat &weights, float shift, const BorderList & borders, Mat &samples, Mat & responses) { int featureCount = weights.cols; - samples.create(samplesCount, featureCount, CV_32FC1); for (int featureIndex = 0; featureIndex < featureCount; featureIndex++) - { rng.fill(samples.col(featureIndex), RNG::UNIFORM, borders[featureIndex].first, borders[featureIndex].second); - } - responses.create(samplesCount, 1, CV_32FC1); - for (int i = 0 ; i < samplesCount; i++) { - responses.at(i) = decisionFunction(samples.row(i), weights, shift) > 0 ? 1.f : -1.f; + double res = samples.row(i).dot(weights) + shift; + responses.at(i) = res > 0 ? 1.f : -1.f; } - } -CV_SVMSGDTrainTest::CV_SVMSGDTrainTest(const Mat &weights, float shift, TrainDataType _type, double _precision) +//================================================================================================== + +typedef tuple ML_SVMSGD_Param; +typedef testing::TestWithParam ML_SVMSGD_Params; + +TEST_P(ML_SVMSGD_Params, scale_and_features) { - type = _type; - precision = _precision; + const int type = get<0>(GetParam()); + const int featureCount = get<1>(GetParam()); + const double precision = get<2>(GetParam()); - int featureCount = weights.cols; + RNG &rng = cv::theRNG(); - switch(type) + Mat_ weights(1, featureCount); + rng.fill(weights, RNG::UNIFORM, -1, 1); + const float shift = static_cast(rng.uniform(-featureCount, featureCount)); + + BorderList borders; + float lowerLimit = -TEST_VALUE_LIMIT; + float upperLimit = TEST_VALUE_LIMIT; + if (type == UNIFORM_SAME_SCALE) { - case UNIFORM_SAME_SCALE: - generateSameBorders(featureCount); - break; - case UNIFORM_DIFFERENT_SCALES: - generateDifferentBorders(featureCount); - break; - default: - CV_Error(CV_StsBadArg, "Unknown train data type"); + for (int featureIndex = 0; featureIndex < featureCount; featureIndex++) + borders.push_back(std::pair(lowerLimit, upperLimit)); } - - RNG rng(0); + else if (type == UNIFORM_DIFFERENT_SCALES) + { + for (int featureIndex = 0; featureIndex < featureCount; featureIndex++) + { + int crit = rng.uniform(0, 2); + if (crit > 0) + borders.push_back(std::pair(lowerLimit, upperLimit)); + else + borders.push_back(std::pair(lowerLimit/1000, upperLimit/1000)); + } + } + ASSERT_FALSE(borders.empty()); Mat trainSamples; Mat trainResponses; int trainSamplesCount = 10000; - makeData(trainSamplesCount, weights, shift, rng, trainSamples, trainResponses); - data = TrainData::create(trainSamples, cv::ml::ROW_SAMPLE, trainResponses); + makeData(rng, trainSamplesCount, weights, shift, borders, trainSamples, trainResponses); + ASSERT_EQ(trainResponses.type(), CV_32FC1); + Mat testSamples; + Mat testResponses; int testSamplesCount = 100000; - makeData(testSamplesCount, weights, shift, rng, testSamples, testResponses); -} + makeData(rng, testSamplesCount, weights, shift, borders, testSamples, testResponses); + ASSERT_EQ(testResponses.type(), CV_32FC1); + + Ptr data = TrainData::create(trainSamples, cv::ml::ROW_SAMPLE, trainResponses); + ASSERT_TRUE(data); -void CV_SVMSGDTrainTest::run( int /*start_from*/ ) -{ cv::Ptr svmsgd = SVMSGD::create(); + ASSERT_TRUE(svmsgd); svmsgd->train(data); Mat responses; - svmsgd->predict(testSamples, responses); + ASSERT_EQ(responses.type(), CV_32FC1); + ASSERT_EQ(responses.rows, testSamplesCount); int errCount = 0; - int testSamplesCount = testSamples.rows; - - CV_Assert((responses.type() == CV_32FC1) && (testResponses.type() == CV_32FC1)); for (int i = 0; i < testSamplesCount; i++) - { if (responses.at(i) * testResponses.at(i) < 0) errCount++; - } - float err = (float)errCount / testSamplesCount; - - if ( err > precision ) - { - ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY); - } -} - -void makeWeightsAndShift(int featureCount, Mat &weights, float &shift) -{ - weights.create(1, featureCount, CV_32FC1); - cv::RNG rng(0); - double lowerLimit = -1; - double upperLimit = 1; - - rng.fill(weights, RNG::UNIFORM, lowerLimit, upperLimit); - shift = static_cast(rng.uniform(-featureCount, featureCount)); + EXPECT_LE(err, precision); } +ML_SVMSGD_Param params_list[] = { + ML_SVMSGD_Param(UNIFORM_SAME_SCALE, 2, 0.01), + ML_SVMSGD_Param(UNIFORM_SAME_SCALE, 5, 0.01), + ML_SVMSGD_Param(UNIFORM_SAME_SCALE, 100, 0.02), + ML_SVMSGD_Param(UNIFORM_DIFFERENT_SCALES, 2, 0.01), + ML_SVMSGD_Param(UNIFORM_DIFFERENT_SCALES, 5, 0.01), + ML_SVMSGD_Param(UNIFORM_DIFFERENT_SCALES, 100, 0.01), +}; -TEST(ML_SVMSGD, trainSameScale2) -{ - int featureCount = 2; - - Mat weights; - - float shift = 0; - makeWeightsAndShift(featureCount, weights, shift); - - CV_SVMSGDTrainTest test(weights, shift, CV_SVMSGDTrainTest::UNIFORM_SAME_SCALE); - test.safe_run(); -} - -TEST(ML_SVMSGD, trainSameScale5) -{ - int featureCount = 5; - - Mat weights; - - float shift = 0; - makeWeightsAndShift(featureCount, weights, shift); - - CV_SVMSGDTrainTest test(weights, shift, CV_SVMSGDTrainTest::UNIFORM_SAME_SCALE); - test.safe_run(); -} - -TEST(ML_SVMSGD, trainSameScale100) -{ - int featureCount = 100; - - Mat weights; - - float shift = 0; - makeWeightsAndShift(featureCount, weights, shift); - - CV_SVMSGDTrainTest test(weights, shift, CV_SVMSGDTrainTest::UNIFORM_SAME_SCALE, 0.02); - test.safe_run(); -} - -TEST(ML_SVMSGD, trainDifferentScales2) -{ - int featureCount = 2; - - Mat weights; - - float shift = 0; - makeWeightsAndShift(featureCount, weights, shift); - - CV_SVMSGDTrainTest test(weights, shift, CV_SVMSGDTrainTest::UNIFORM_DIFFERENT_SCALES, 0.01); - test.safe_run(); -} - -TEST(ML_SVMSGD, trainDifferentScales5) -{ - int featureCount = 5; - - Mat weights; - - float shift = 0; - makeWeightsAndShift(featureCount, weights, shift); - - CV_SVMSGDTrainTest test(weights, shift, CV_SVMSGDTrainTest::UNIFORM_DIFFERENT_SCALES, 0.01); - test.safe_run(); -} - -TEST(ML_SVMSGD, trainDifferentScales100) -{ - int featureCount = 100; - - Mat weights; - - float shift = 0; - makeWeightsAndShift(featureCount, weights, shift); +INSTANTIATE_TEST_CASE_P(/**/, ML_SVMSGD_Params, testing::ValuesIn(params_list)); - CV_SVMSGDTrainTest test(weights, shift, CV_SVMSGDTrainTest::UNIFORM_DIFFERENT_SCALES, 0.01); - test.safe_run(); -} +//================================================================================================== TEST(ML_SVMSGD, twoPoints) { diff --git a/modules/ml/test/test_svmtrainauto.cpp b/modules/ml/test/test_svmtrainauto.cpp index fcd83d3533..9d78762c4c 100644 --- a/modules/ml/test/test_svmtrainauto.cpp +++ b/modules/ml/test/test_svmtrainauto.cpp @@ -1,43 +1,6 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" @@ -46,21 +9,11 @@ namespace opencv_test { namespace { using cv::ml::SVM; using cv::ml::TrainData; -//-------------------------------------------------------------------------------------------- -class CV_SVMTrainAutoTest : public cvtest::BaseTest { -public: - CV_SVMTrainAutoTest() {} -protected: - virtual void run( int start_from ); -}; - -void CV_SVMTrainAutoTest::run( int /*start_from*/ ) +static Ptr makeRandomData(int datasize) { - int datasize = 100; cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 ); cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S ); - - RNG rng(0); + RNG &rng = cv::theRNG(); for (int i = 0; i < datasize; ++i) { int response = rng.uniform(0, 2); // Random from {0, 1}. @@ -68,36 +21,14 @@ void CV_SVMTrainAutoTest::run( int /*start_from*/ ) samples.at( i, 1 ) = rng.uniform(0.f, 0.5f) + response * 0.5f; responses.at( i, 0 ) = response; } - - cv::Ptr data = TrainData::create( samples, cv::ml::ROW_SAMPLE, responses ); - cv::Ptr svm = SVM::create(); - svm->trainAuto( data, 10 ); // 2-fold cross validation. - - float test_data0[2] = {0.25f, 0.25f}; - cv::Mat test_point0 = cv::Mat( 1, 2, CV_32FC1, test_data0 ); - float result0 = svm->predict( test_point0 ); - float test_data1[2] = {0.75f, 0.75f}; - cv::Mat test_point1 = cv::Mat( 1, 2, CV_32FC1, test_data1 ); - float result1 = svm->predict( test_point1 ); - - if ( fabs( result0 - 0 ) > 0.001 || fabs( result1 - 1 ) > 0.001 ) - { - ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY ); - } + return TrainData::create( samples, cv::ml::ROW_SAMPLE, responses ); } -TEST(ML_SVM, trainauto) { CV_SVMTrainAutoTest test; test.safe_run(); } - -TEST(ML_SVM, trainauto_sigmoid) +static Ptr makeCircleData(int datasize, float scale_factor, float radius) { - const int datasize = 100; + // Populate samples with data that can be split into two concentric circles cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 ); cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S ); - - const float scale_factor = 0.5; - const float radius = 2.0; - - // Populate samples with data that can be split into two concentric circles for (int i = 0; i < datasize; i+=2) { const float pi = 3.14159f; @@ -115,41 +46,74 @@ TEST(ML_SVM, trainauto_sigmoid) samples.at( i + 1, 1 ) = y * scale_factor; responses.at( i + 1, 0 ) = 1; } + return TrainData::create( samples, cv::ml::ROW_SAMPLE, responses ); +} + +static Ptr makeRandomData2(int datasize) +{ + cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 ); + cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S ); + RNG &rng = cv::theRNG(); + for (int i = 0; i < datasize; ++i) + { + int response = rng.uniform(0, 2); // Random from {0, 1}. + samples.at( i, 0 ) = 0; + samples.at( i, 1 ) = (0.5f - response) * rng.uniform(0.f, 1.2f) + response; + responses.at( i, 0 ) = response; + } + return TrainData::create( samples, cv::ml::ROW_SAMPLE, responses ); +} + +//================================================================================================== - cv::Ptr data = TrainData::create( samples, cv::ml::ROW_SAMPLE, responses ); +TEST(ML_SVM, trainauto) +{ + const int datasize = 100; + cv::Ptr data = makeRandomData(datasize); + ASSERT_TRUE(data); cv::Ptr svm = SVM::create(); - svm->setKernel(SVM::SIGMOID); + ASSERT_TRUE(svm); + svm->trainAuto( data, 10 ); // 2-fold cross validation. + + float test_data0[2] = {0.25f, 0.25f}; + cv::Mat test_point0 = cv::Mat( 1, 2, CV_32FC1, test_data0 ); + float result0 = svm->predict( test_point0 ); + float test_data1[2] = {0.75f, 0.75f}; + cv::Mat test_point1 = cv::Mat( 1, 2, CV_32FC1, test_data1 ); + float result1 = svm->predict( test_point1 ); + + EXPECT_NEAR(result0, 0, 0.001); + EXPECT_NEAR(result1, 1, 0.001); +} + +TEST(ML_SVM, trainauto_sigmoid) +{ + const int datasize = 100; + const float scale_factor = 0.5; + const float radius = 2.0; + cv::Ptr data = makeCircleData(datasize, scale_factor, radius); + ASSERT_TRUE(data); + cv::Ptr svm = SVM::create(); + ASSERT_TRUE(svm); + svm->setKernel(SVM::SIGMOID); svm->setGamma(10.0); svm->setCoef0(-10.0); svm->trainAuto( data, 10 ); // 2-fold cross validation. float test_data0[2] = {radius, radius}; cv::Mat test_point0 = cv::Mat( 1, 2, CV_32FC1, test_data0 ); - ASSERT_EQ(0, svm->predict( test_point0 )); + EXPECT_FLOAT_EQ(svm->predict( test_point0 ), 0); float test_data1[2] = {scale_factor * radius, scale_factor * radius}; cv::Mat test_point1 = cv::Mat( 1, 2, CV_32FC1, test_data1 ); - ASSERT_EQ(1, svm->predict( test_point1 )); + EXPECT_FLOAT_EQ(svm->predict( test_point1 ), 1); } - TEST(ML_SVM, trainAuto_regression_5369) { - int datasize = 100; - cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 ); - cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S ); - - RNG rng(0); // fixed! - for (int i = 0; i < datasize; ++i) - { - int response = rng.uniform(0, 2); // Random from {0, 1}. - samples.at( i, 0 ) = 0; - samples.at( i, 1 ) = (0.5f - response) * rng.uniform(0.f, 1.2f) + response; - responses.at( i, 0 ) = response; - } - - cv::Ptr data = TrainData::create( samples, cv::ml::ROW_SAMPLE, responses ); + const int datasize = 100; + Ptr data = makeRandomData2(datasize); cv::Ptr svm = SVM::create(); svm->trainAuto( data, 10 ); // 2-fold cross validation. @@ -164,16 +128,8 @@ TEST(ML_SVM, trainAuto_regression_5369) EXPECT_EQ(1., result1); } -class CV_SVMGetSupportVectorsTest : public cvtest::BaseTest { -public: - CV_SVMGetSupportVectorsTest() {} -protected: - virtual void run( int startFrom ); -}; -void CV_SVMGetSupportVectorsTest::run(int /*startFrom*/ ) +TEST(ML_SVM, getSupportVectors) { - int code = cvtest::TS::OK; - // Set up training data int labels[4] = {1, -1, -1, -1}; float trainingData[4][2] = { {501, 10}, {255, 10}, {501, 255}, {10, 501} }; @@ -181,19 +137,18 @@ void CV_SVMGetSupportVectorsTest::run(int /*startFrom*/ ) Mat labelsMat(4, 1, CV_32SC1, labels); Ptr svm = SVM::create(); + ASSERT_TRUE(svm); svm->setType(SVM::C_SVC); svm->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, 100, 1e-6)); - // Test retrieval of SVs and compressed SVs on linear SVM svm->setKernel(SVM::LINEAR); svm->train(trainingDataMat, cv::ml::ROW_SAMPLE, labelsMat); Mat sv = svm->getSupportVectors(); - CV_Assert(sv.rows == 1); // by default compressed SV returned + EXPECT_EQ(1, sv.rows); // by default compressed SV returned sv = svm->getUncompressedSupportVectors(); - CV_Assert(sv.rows == 3); - + EXPECT_EQ(3, sv.rows); // Test retrieval of SVs and compressed SVs on non-linear SVM svm->setKernel(SVM::POLY); @@ -201,15 +156,9 @@ void CV_SVMGetSupportVectorsTest::run(int /*startFrom*/ ) svm->train(trainingDataMat, cv::ml::ROW_SAMPLE, labelsMat); sv = svm->getSupportVectors(); - CV_Assert(sv.rows == 3); + EXPECT_EQ(3, sv.rows); sv = svm->getUncompressedSupportVectors(); - CV_Assert(sv.rows == 0); // inapplicable for non-linear SVMs - - - ts->set_failed_test_info(code); + EXPECT_EQ(0, sv.rows); // inapplicable for non-linear SVMs } - -TEST(ML_SVM, getSupportVectors) { CV_SVMGetSupportVectorsTest test; test.safe_run(); } - }} // namespace diff --git a/modules/ml/test/test_utils.cpp b/modules/ml/test/test_utils.cpp new file mode 100644 index 0000000000..8717d9f301 --- /dev/null +++ b/modules/ml/test/test_utils.cpp @@ -0,0 +1,189 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "test_precomp.hpp" + +namespace opencv_test { + +void defaultDistribs( Mat& means, vector& covs, int type) +{ + float mp0[] = {0.0f, 0.0f}, cp0[] = {0.67f, 0.0f, 0.0f, 0.67f}; + float mp1[] = {5.0f, 0.0f}, cp1[] = {1.0f, 0.0f, 0.0f, 1.0f}; + float mp2[] = {1.0f, 5.0f}, cp2[] = {1.0f, 0.0f, 0.0f, 1.0f}; + means.create(3, 2, type); + Mat m0( 1, 2, CV_32FC1, mp0 ), c0( 2, 2, CV_32FC1, cp0 ); + Mat m1( 1, 2, CV_32FC1, mp1 ), c1( 2, 2, CV_32FC1, cp1 ); + Mat m2( 1, 2, CV_32FC1, mp2 ), c2( 2, 2, CV_32FC1, cp2 ); + means.resize(3), covs.resize(3); + + Mat mr0 = means.row(0); + m0.convertTo(mr0, type); + c0.convertTo(covs[0], type); + + Mat mr1 = means.row(1); + m1.convertTo(mr1, type); + c1.convertTo(covs[1], type); + + Mat mr2 = means.row(2); + m2.convertTo(mr2, type); + c2.convertTo(covs[2], type); +} + +// generate points sets by normal distributions +void generateData( Mat& data, Mat& labels, const vector& sizes, const Mat& _means, const vector& covs, int dataType, int labelType ) +{ + vector::const_iterator sit = sizes.begin(); + int total = 0; + for( ; sit != sizes.end(); ++sit ) + total += *sit; + CV_Assert( _means.rows == (int)sizes.size() && covs.size() == sizes.size() ); + CV_Assert( !data.empty() && data.rows == total ); + CV_Assert( data.type() == dataType ); + + labels.create( data.rows, 1, labelType ); + + randn( data, Scalar::all(-1.0), Scalar::all(1.0) ); + vector means(sizes.size()); + for(int i = 0; i < _means.rows; i++) + means[i] = _means.row(i); + vector::const_iterator mit = means.begin(), cit = covs.begin(); + int bi, ei = 0; + sit = sizes.begin(); + for( int p = 0, l = 0; sit != sizes.end(); ++sit, ++mit, ++cit, l++ ) + { + bi = ei; + ei = bi + *sit; + CV_Assert( mit->rows == 1 && mit->cols == data.cols ); + CV_Assert( cit->rows == data.cols && cit->cols == data.cols ); + for( int i = bi; i < ei; i++, p++ ) + { + Mat r = data.row(i); + r = r * (*cit) + *mit; + if( labelType == CV_32FC1 ) + labels.at(p, 0) = (float)l; + else if( labelType == CV_32SC1 ) + labels.at(p, 0) = l; + else + { + CV_DbgAssert(0); + } + } + } +} + +int maxIdx( const vector& count ) +{ + int idx = -1; + int maxVal = -1; + vector::const_iterator it = count.begin(); + for( int i = 0; it != count.end(); ++it, i++ ) + { + if( *it > maxVal) + { + maxVal = *it; + idx = i; + } + } + CV_Assert( idx >= 0); + return idx; +} + +bool getLabelsMap( const Mat& labels, const vector& sizes, vector& labelsMap, bool checkClusterUniq) +{ + size_t total = 0, nclusters = sizes.size(); + for(size_t i = 0; i < sizes.size(); i++) + total += sizes[i]; + + CV_Assert( !labels.empty() ); + CV_Assert( labels.total() == total && (labels.cols == 1 || labels.rows == 1)); + CV_Assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 ); + + bool isFlt = labels.type() == CV_32FC1; + + labelsMap.resize(nclusters); + + vector buzy(nclusters, false); + int startIndex = 0; + for( size_t clusterIndex = 0; clusterIndex < sizes.size(); clusterIndex++ ) + { + vector count( nclusters, 0 ); + for( int i = startIndex; i < startIndex + sizes[clusterIndex]; i++) + { + int lbl = isFlt ? (int)labels.at(i) : labels.at(i); + CV_Assert(lbl < (int)nclusters); + count[lbl]++; + CV_Assert(count[lbl] < (int)total); + } + startIndex += sizes[clusterIndex]; + + int cls = maxIdx( count ); + CV_Assert( !checkClusterUniq || !buzy[cls] ); + + labelsMap[clusterIndex] = cls; + + buzy[cls] = true; + } + + if(checkClusterUniq) + { + for(size_t i = 0; i < buzy.size(); i++) + if(!buzy[i]) + return false; + } + + return true; +} + +bool calcErr( const Mat& labels, const Mat& origLabels, const vector& sizes, float& err, bool labelsEquivalent, bool checkClusterUniq) +{ + err = 0; + CV_Assert( !labels.empty() && !origLabels.empty() ); + CV_Assert( labels.rows == 1 || labels.cols == 1 ); + CV_Assert( origLabels.rows == 1 || origLabels.cols == 1 ); + CV_Assert( labels.total() == origLabels.total() ); + CV_Assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 ); + CV_Assert( origLabels.type() == labels.type() ); + + vector labelsMap; + bool isFlt = labels.type() == CV_32FC1; + if( !labelsEquivalent ) + { + if( !getLabelsMap( labels, sizes, labelsMap, checkClusterUniq ) ) + return false; + + for( int i = 0; i < labels.rows; i++ ) + if( isFlt ) + err += labels.at(i) != labelsMap[(int)origLabels.at(i)] ? 1.f : 0.f; + else + err += labels.at(i) != labelsMap[origLabels.at(i)] ? 1.f : 0.f; + } + else + { + for( int i = 0; i < labels.rows; i++ ) + if( isFlt ) + err += labels.at(i) != origLabels.at(i) ? 1.f : 0.f; + else + err += labels.at(i) != origLabels.at(i) ? 1.f : 0.f; + } + err /= (float)labels.rows; + return true; +} + +bool calculateError( const Mat& _p_labels, const Mat& _o_labels, float& error) +{ + error = 0.0f; + float accuracy = 0.0f; + Mat _p_labels_temp; + Mat _o_labels_temp; + _p_labels.convertTo(_p_labels_temp, CV_32S); + _o_labels.convertTo(_o_labels_temp, CV_32S); + + CV_Assert(_p_labels_temp.total() == _o_labels_temp.total()); + CV_Assert(_p_labels_temp.rows == _o_labels_temp.rows); + + accuracy = (float)countNonZero(_p_labels_temp == _o_labels_temp)/_p_labels_temp.rows; + error = 1 - accuracy; + return true; +} + +} // namespace