/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2008-2012, Willow Garage Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include #include #include #include // ============ Octave ============ // sft::Octave::Octave(cv::Rect bb, int np, int nn, int ls, int shr) : logScale(ls), boundingBox(bb), npositives(np), nnegatives(nn), shrinkage(shr) { int maxSample = npositives + nnegatives; responses.create(maxSample, 1, CV_32FC1); CvBoostParams _params; { // tree params _params.max_categories = 10; _params.max_depth = 2; _params.cv_folds = 0; _params.truncate_pruned_tree = false; _params.use_surrogates = false; _params.use_1se_rule = false; _params.regression_accuracy = 1.0e-6; // boost params _params.boost_type = CvBoost::GENTLE; _params.split_criteria = CvBoost::SQERR; _params.weight_trim_rate = 0.95; // simple defaults _params.min_sample_count = 2; _params.weak_count = 1; } params = _params; } sft::Octave::~Octave(){} bool sft::Octave::train( const cv::Mat& _trainData, const cv::Mat& _responses, const cv::Mat& varIdx, const cv::Mat& sampleIdx, const cv::Mat& varType, const cv::Mat& missingDataMask) { bool update = false; return cv::Boost::train(_trainData, CV_COL_SAMPLE, _responses, varIdx, sampleIdx, varType, missingDataMask, params, update); } void sft::Octave::setRejectThresholds(cv::Mat& thresholds) { dprintf("set thresholds according to DBP strategy\n"); // labels desided by classifier cv::Mat desisions(responses.cols, responses.rows, responses.type()); float* dptr = desisions.ptr(0); // mask of samples satisfying the condition cv::Mat ppmask(responses.cols, responses.rows, CV_8UC1); uchar* mptr = ppmask.ptr(0); int nsamples = npositives + nnegatives; cv::Mat stab; for (int si = 0; si < nsamples; ++si) { float decision = dptr[si] = predict(trainData.col(si), stab, false, false); mptr[si] = cv::saturate_cast((uint)( (responses.ptr(si)[0] == 1.f) && (decision == 1.f))); } int weaks = weak->total; thresholds.create(1, weaks, CV_64FC1); double* thptr = thresholds.ptr(0); cv::Mat traces(weaks, nsamples, CV_64FC1, cv::Scalar::all(FLT_MAX)); for (int w = 0; w < weaks; ++w) { double* rptr = traces.ptr(w); for (int si = 0; si < nsamples; ++si) { cv::Range curr(0, w + 1); if (mptr[si]) { float trace = predict(trainData.col(si), curr); rptr[si] = trace; } } double mintrace = 0.; cv::minMaxLoc(traces.row(w), &mintrace); thptr[w] = mintrace; } } namespace { using namespace sft; } void sft::Octave::processPositives(const Dataset& dataset, const FeaturePool* pool) { int w = boundingBox.width; int h = boundingBox.height; integrals.create(pool->size(), (w / shrinkage + 1) * (h / shrinkage * 10 + 1), CV_32SC1); int total = 0; // for (svector::const_iterator it = dataset.pos.begin(); it != dataset.pos.end(); ++it) for (int curr = 0; curr < dataset.available( Dataset::POSITIVE); ++curr) { cv::Mat sample = dataset.get( Dataset::POSITIVE, curr); cv::Mat channels = integrals.row(total).reshape(0, h / shrinkage * 10 + 1); sample = sample(boundingBox); pool->preprocess(sample, channels); responses.ptr(total)[0] = 1.f; if (++total >= npositives) break; } dprintf("Processing positives finished:\n\trequested %d positives, collected %d samples.\n", npositives, total); npositives = total; nnegatives = cvRound(nnegatives * total / (double)npositives); } void sft::Octave::generateNegatives(const Dataset& dataset, const FeaturePool* pool) { // ToDo: set seed, use offsets sft::Random::engine eng(65633343L); sft::Random::engine idxEng(764224349868L); // int w = boundingBox.width; int h = boundingBox.height; int nimages = dataset.available(Dataset::NEGATIVE); sft::Random::uniform iRand(0, nimages - 1); int total = 0; Mat sum; for (int i = npositives; i < nnegatives + npositives; ++total) { int curr = iRand(idxEng); Mat frame = dataset.get(Dataset::NEGATIVE, curr); int maxW = frame.cols - 2 * boundingBox.x - boundingBox.width; int maxH = frame.rows - 2 * boundingBox.y - boundingBox.height; sft::Random::uniform wRand(0, maxW -1); sft::Random::uniform hRand(0, maxH -1); int dx = wRand(eng); int dy = hRand(eng); frame = frame(cv::Rect(dx, dy, boundingBox.width, boundingBox.height)); cv::Mat channels = integrals.row(i).reshape(0, h / shrinkage * 10 + 1); pool->preprocess(frame, channels); dprintf("generated %d %d\n", dx, dy); // // if (predict(sum)) { responses.ptr(i)[0] = 0.f; ++i; } } dprintf("Processing negatives finished:\n\trequested %d negatives, viewed %d samples.\n", nnegatives, total); } template int sgn(T val) { return (T(0) < val) - (val < T(0)); } void sft::Octave::traverse(const CvBoostTree* tree, cv::FileStorage& fs, int& nfeatures, int* used, const double* th) const { std::queue nodes; nodes.push( tree->get_root()); const CvDTreeNode* tempNode; int leafValIdx = 0; int internalNodeIdx = 1; float* leafs = new float[(int)pow(2.f, get_params().max_depth)]; fs << "{"; fs << "treeThreshold" << *th; fs << "internalNodes" << "["; while (!nodes.empty()) { tempNode = nodes.front(); CV_Assert( tempNode->left ); if ( !tempNode->left->left && !tempNode->left->right) { leafs[-leafValIdx] = (float)tempNode->left->value; fs << leafValIdx-- ; } else { nodes.push( tempNode->left ); fs << internalNodeIdx++; } CV_Assert( tempNode->right ); if ( !tempNode->right->left && !tempNode->right->right) { leafs[-leafValIdx] = (float)tempNode->right->value; fs << leafValIdx--; } else { nodes.push( tempNode->right ); fs << internalNodeIdx++; } int fidx = tempNode->split->var_idx; fs << nfeatures; used[nfeatures++] = fidx; fs << tempNode->split->ord.c; nodes.pop(); } fs << "]"; fs << "leafValues" << "["; for (int ni = 0; ni < -leafValIdx; ni++) fs << leafs[ni]; fs << "]"; fs << "}"; } void sft::Octave::write( cv::FileStorage &fso, const FeaturePool* pool, const Mat& thresholds) const { CV_Assert(!thresholds.empty()); cv::Mat used( 1, weak->total * (pow(2, params.max_depth) - 1), CV_32SC1); int* usedPtr = used.ptr(0); int nfeatures = 0; fso << "{" << "scale" << logScale << "weaks" << weak->total << "trees" << "["; // should be replased with the H.L. one CvSeqReader reader; cvStartReadSeq( weak, &reader); for(int i = 0; i < weak->total; i++ ) { CvBoostTree* tree; CV_READ_SEQ_ELEM( tree, reader ); traverse(tree, fso, nfeatures, usedPtr, thresholds.ptr(0) + i); } fso << "]"; // features fso << "features" << "["; for (int i = 0; i < nfeatures; ++i) pool->write(fso, usedPtr[i]); fso << "]" << "}"; } void sft::Octave::initial_weights(double (&p)[2]) { double n = data->sample_count; p[0] = n / (2. * (double)(nnegatives)); p[1] = n / (2. * (double)(npositives)); } bool sft::Octave::train(const Dataset& dataset, const FeaturePool* pool, int weaks, int treeDepth) { CV_Assert(treeDepth == 2); CV_Assert(weaks > 0); params.max_depth = treeDepth; params.weak_count = weaks; // 1. fill integrals and classes processPositives(dataset, pool); generateNegatives(dataset, pool); // 2. only sumple case (all features used) int nfeatures = pool->size(); cv::Mat varIdx(1, nfeatures, CV_32SC1); int* ptr = varIdx.ptr(0); for (int x = 0; x < nfeatures; ++x) ptr[x] = x; // 3. only sumple case (all samples used) int nsamples = npositives + nnegatives; cv::Mat sampleIdx(1, nsamples, CV_32SC1); ptr = sampleIdx.ptr(0); for (int x = 0; x < nsamples; ++x) ptr[x] = x; // 4. ICF has an orderable responce. cv::Mat varType(1, nfeatures + 1, CV_8UC1); uchar* uptr = varType.ptr(0); for (int x = 0; x < nfeatures; ++x) uptr[x] = CV_VAR_ORDERED; uptr[nfeatures] = CV_VAR_CATEGORICAL; trainData.create(nfeatures, nsamples, CV_32FC1); for (int fi = 0; fi < nfeatures; ++fi) { float* dptr = trainData.ptr(fi); for (int si = 0; si < nsamples; ++si) { dptr[si] = pool->apply(fi, si, integrals); } } cv::Mat missingMask; bool ok = train(trainData, responses, varIdx, sampleIdx, varType, missingMask); if (!ok) std::cout << "ERROR: tree can not be trained " << std::endl; return ok; } float sft::Octave::predict( const Mat& _sample, Mat& _votes, bool raw_mode, bool return_sum ) const { CvMat sample = _sample, votes = _votes; return CvBoost::predict(&sample, 0, (_votes.empty())? 0 : &votes, CV_WHOLE_SEQ, raw_mode, return_sum); } float sft::Octave::predict( const Mat& _sample, const cv::Range range) const { CvMat sample = _sample; return CvBoost::predict(&sample, 0, 0, range, false, true); } void sft::Octave::write( CvFileStorage* fs, string name) const { CvBoost::write(fs, name.c_str()); } // ========= FeaturePool ========= // sft::ICFFeaturePool::ICFFeaturePool(cv::Size m, int n) : FeaturePool(), model(m), nfeatures(n) { CV_Assert(m != cv::Size() && n > 0); fill(nfeatures); } void sft::ICFFeaturePool::preprocess(const Mat& frame, Mat& integrals) const { preprocessor.apply(frame, integrals); } float sft::ICFFeaturePool::apply(int fi, int si, const Mat& integrals) const { return pool[fi](integrals.row(si), model); } void sft::ICFFeaturePool::write( cv::FileStorage& fs, int index) const { CV_Assert((index > 0) && (index < (int)pool.size())); fs << pool[index]; } void sft::write(cv::FileStorage& fs, const string&, const ICF& f) { fs << "{" << "channel" << f.channel << "rect" << f.bb << "}"; } sft::ICFFeaturePool::~ICFFeaturePool(){} void sft::ICFFeaturePool::fill(int desired) { int mw = model.width; int mh = model.height; int maxPoolSize = (mw -1) * mw / 2 * (mh - 1) * mh / 2 * N_CHANNELS; nfeatures = std::min(desired, maxPoolSize); dprintf("Requeste feature pool %d max %d suggested %d\n", desired, maxPoolSize, nfeatures); pool.reserve(nfeatures); sft::Random::engine eng(8854342234L); sft::Random::engine eng_ch(314152314L); sft::Random::uniform chRand(0, N_CHANNELS - 1); sft::Random::uniform xRand(0, model.width - 2); sft::Random::uniform yRand(0, model.height - 2); sft::Random::uniform wRand(1, model.width - 1); sft::Random::uniform hRand(1, model.height - 1); while (pool.size() < size_t(nfeatures)) { int x = xRand(eng); int y = yRand(eng); int w = 1 + wRand(eng, model.width - x - 1); int h = 1 + hRand(eng, model.height - y - 1); CV_Assert(w > 0); CV_Assert(h > 0); CV_Assert(w + x < model.width); CV_Assert(h + y < model.height); int ch = chRand(eng_ch); sft::ICF f(x, y, w, h, ch); if (std::find(pool.begin(), pool.end(),f) == pool.end()) { pool.push_back(f); } } } std::ostream& sft::operator<<(std::ostream& out, const sft::ICF& m) { out << m.channel << " " << m.bb; return out; } // ============ Dataset ============ // namespace { using namespace sft; string itoa(long i) { char s[65]; sprintf(s, "%ld", i); return std::string(s); } void glob(const string& path, svector& ret) { glob_t glob_result; glob(path.c_str(), GLOB_TILDE, 0, &glob_result); ret.clear(); ret.reserve(glob_result.gl_pathc); for(uint i = 0; i < glob_result.gl_pathc; ++i) { ret.push_back(std::string(glob_result.gl_pathv[i])); dprintf("%s\n", ret[i].c_str()); } globfree(&glob_result); } } // in the default case data folders should be alligned as following: // 1. positives: /octave_/pos/*.png // 2. negatives: /octave_/neg/*.png Dataset::Dataset(const string& path, const int oct) { dprintf("%s\n", "get dataset file names..."); dprintf("%s\n", "Positives globbing..."); glob(path + "/pos/octave_" + itoa(oct) + "/*.png", pos); dprintf("%s\n", "Negatives globbing..."); glob(path + "/neg/octave_" + itoa(oct) + "/*.png", neg); // Check: files not empty CV_Assert(pos.size() != size_t(0)); CV_Assert(neg.size() != size_t(0)); } cv::Mat Dataset::get(SampleType type, int idx) const { const std::string& src = (type == POSITIVE)? pos[idx]: neg[idx]; return cv::imread(src); } int Dataset::available(SampleType type) const { return (int)((type == POSITIVE)? pos.size():neg.size()); }