Open Source Computer Vision Library
#include <sft/octave.hpp>
#include <sft/random.hpp>
#include <glob.h>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <queue>
// ============ Octave ============ //
sft::Octave::Octave(cv::Rect bb, int np, int nn, int ls, int shr)
: logScale(ls), boundingBox(bb), npositives(np), nnegatives(nn), shrinkage(shr)
int maxSample = npositives + nnegatives;
responses.create(maxSample, 1, CV_32FC1);
CvBoostParams _params;
// tree params
_params.max_categories = 10;
_params.max_depth = 2;
_params.cv_folds = 0;
_params.truncate_pruned_tree = false;
_params.use_surrogates = false;
_params.use_1se_rule = false;
_params.regression_accuracy = 1.0e-6;
// boost params
_params.boost_type = CvBoost::GENTLE;
_params.split_criteria = CvBoost::SQERR;
_params.weight_trim_rate = 0.95;
// simple defaults
_params.min_sample_count = 2;
_params.weak_count = 1;
params = _params;
bool sft::Octave::train( const cv::Mat& _trainData, const cv::Mat& _responses, const cv::Mat& varIdx,
const cv::Mat& sampleIdx, const cv::Mat& varType, const cv::Mat& missingDataMask)
// std::cout << "WARNING: sampleIdx " << sampleIdx << std::endl;
// std::cout << "WARNING: trainData " << _trainData << std::endl;
// std::cout << "WARNING: _responses " << _responses << std::endl;
// std::cout << "WARNING: varIdx" << varIdx << std::endl;
// std::cout << "WARNING: varType" << varType << std::endl;
bool update = false;
return cv::Boost::train(_trainData, CV_COL_SAMPLE, _responses, varIdx, sampleIdx, varType, missingDataMask, params,
void sft::Octave::setRejectThresholds(cv::Mat& thresholds)
dprintf("set thresholds according to DBP strategy\n");
// labels desided by classifier
cv::Mat desisions(responses.cols, responses.rows, responses.type());
float* dptr = desisions.ptr<float>(0);
// mask of samples satisfying the condition
cv::Mat ppmask(responses.cols, responses.rows, CV_8UC1);
uchar* mptr = ppmask.ptr<uchar>(0);
int nsamples = npositives + nnegatives;
cv::Mat stab;
for (int si = 0; si < nsamples; ++si)
float decision = dptr[si] = predict(trainData.col(si), stab, false, false);
mptr[si] = cv::saturate_cast<uchar>((uint)(responses.ptr<float>(si)[0] == 1.f && decision == 1.f));
// std::cout << "WARNING: responses " << responses << std::endl;
// std::cout << "WARNING: desisions " << desisions << std::endl;
// std::cout << "WARNING: ppmask " << ppmask << std::endl;
int weaks = weak->total;
thresholds.create(1, weaks, CV_64FC1);
double* thptr = thresholds.ptr<double>(0);
cv::Mat traces(weaks, nsamples, CV_64FC1, cv::Scalar::all(FLT_MAX));
for (int w = 0; w < weaks; ++w)
double* rptr = traces.ptr<double>(w);
for (int si = 0; si < nsamples; ++si)
cv::Range curr(0, w + 1);
if (mptr[si])
float trace = predict(trainData.col(si), curr);
rptr[si] = trace;
double mintrace = 0.;
cv::minMaxLoc(traces.row(w), &mintrace);
thptr[w] = mintrace;
// std::cout << "mintrace " << mintrace << std::endl << traces.colRange(0, npositives) << std::endl;
namespace {
using namespace sft;
class Preprocessor
Preprocessor(int shr) : shrinkage(shr) {}
void apply(const Mat& frame, Mat& integrals)
CV_Assert(frame.type() == CV_8UC3);
int h = frame.rows;
int w = frame.cols;
cv::Mat channels, gray;
channels.create(h * BINS, w, CV_8UC1);
cvtColor(frame, gray, CV_BGR2GRAY);
cv::Mat df_dx, df_dy, mag, angle;
cv::Sobel(gray, df_dx, CV_32F, 1, 0);
cv::Sobel(gray, df_dy, CV_32F, 0, 1);
cv::cartToPolar(df_dx, df_dy, mag, angle, true);
mag *= (1.f / (8 * sqrt(2.f)));
cv::Mat nmag;
mag.convertTo(nmag, CV_8UC1);
angle *= 6 / 360.f;
for (int y = 0; y < h; ++y)
uchar* magnitude = nmag.ptr<uchar>(y);
float* ang = angle.ptr<float>(y);
for (int x = 0; x < w; ++x)
channels.ptr<uchar>(y + (h * (int)ang[x]))[x] = magnitude[x];
cv::Mat luv, shrunk;
cv::cvtColor(frame, luv, CV_BGR2Luv);
std::vector<cv::Mat> splited;
for (int i = 0; i < 3; ++i)
splited.push_back(channels(cv::Rect(0, h * (7 + i), w, h)));
split(luv, splited);
cv::resize(channels, shrunk, cv::Size(), 1.0 / shrinkage, 1.0 / shrinkage, CV_INTER_AREA);
cv::integral(shrunk, integrals, cv::noArray(), CV_32S);
int shrinkage;
enum {BINS = 10};
// ToDo: parallelize it, fix curring
// ToDo: sunch model size and shrinced model size usage/ Now model size mean already shrinked model
void sft::Octave::processPositives(const Dataset& dataset, const FeaturePool& pool)
Preprocessor prepocessor(shrinkage);
int w = boundingBox.width;
int h = boundingBox.height;
integrals.create(pool.size(), (w / shrinkage + 1) * (h / shrinkage * 10 + 1), CV_32SC1);
int total = 0;
for (svector::const_iterator it = dataset.pos.begin(); it != dataset.pos.end(); ++it)
const string& curr = *it;
// dprintf("Process candidate positive image %s\n", curr.c_str());
cv::Mat sample = cv::imread(curr);
cv::Mat channels = integrals.row(total).reshape(0, h / shrinkage * 10 + 1);
sample = sample(boundingBox);
prepocessor.apply(sample, channels);
responses.ptr<float>(total)[0] = 1.f;
if (++total >= npositives) break;
dprintf("Processing positives finished:\n\trequested %d positives, collected %d samples.\n", npositives, total);
npositives = total;
nnegatives = cvRound(nnegatives * total / (double)npositives);
void sft::Octave::generateNegatives(const Dataset& dataset)
// ToDo: set seed, use offsets
sft::Random::engine eng(65633343L);
sft::Random::engine idxEng(764224349868L);
// int w = boundingBox.width;
int h = boundingBox.height;
Preprocessor prepocessor(shrinkage);
int nimages = (int)dataset.neg.size();
sft::Random::uniform iRand(0, nimages - 1);
int total = 0;
Mat sum;
for (int i = npositives; i < nnegatives + npositives; ++total)
int curr = iRand(idxEng);
// dprintf("View %d-th sample\n", curr);
// dprintf("Process %s\n", dataset.neg[curr].c_str());
Mat frame = cv::imread(dataset.neg[curr]);
int maxW = frame.cols - 2 * boundingBox.x - boundingBox.width;
int maxH = frame.rows - 2 * boundingBox.y - boundingBox.height;
sft::Random::uniform wRand(0, maxW -1);
sft::Random::uniform hRand(0, maxH -1);
int dx = wRand(eng);
int dy = hRand(eng);
frame = frame(cv::Rect(dx, dy, boundingBox.width, boundingBox.height));
cv::Mat channels = integrals.row(i).reshape(0, h / shrinkage * 10 + 1);
prepocessor.apply(frame, channels);
dprintf("generated %d %d\n", dx, dy);
// // if (predict(sum))
responses.ptr<float>(i)[0] = 0.f;
dprintf("Processing negatives finished:\n\trequested %d negatives, viewed %d samples.\n", nnegatives, total);
template <typename T> int sgn(T val) {
return (T(0) < val) - (val < T(0));
void sft::Octave::traverse(const CvBoostTree* tree, cv::FileStorage& fs, int& nfeatures, int* used, const float* th) const
std::queue<const CvDTreeNode*> nodes;
nodes.push( tree->get_root());
const CvDTreeNode* tempNode;
int leafValIdx = 0;
int internalNodeIdx = 1;
float* leafs = new float[(int)pow(2.f, get_params().max_depth)];
fs << "{";
fs << "internalNodes" << "[";
while (!nodes.empty())
tempNode = nodes.front();
CV_Assert( tempNode->left );
if ( !tempNode->left->left && !tempNode->left->right)
leafs[-leafValIdx] = (float)tempNode->left->value;
fs << leafValIdx-- ;
nodes.push( tempNode->left );
fs << internalNodeIdx++;
CV_Assert( tempNode->right );
if ( !tempNode->right->left && !tempNode->right->right)
leafs[-leafValIdx] = (float)tempNode->right->value;
fs << leafValIdx--;
nodes.push( tempNode->right );
fs << internalNodeIdx++;
int fidx = tempNode->split->var_idx;
fs << nfeatures;
used[nfeatures++] = fidx;
fs << tempNode->split->ord.c;
fs << "]";
fs << "leafValues" << "[";
for (int ni = 0; ni < -leafValIdx; ni++)
fs << ( (!th) ? leafs[ni] : (sgn(leafs[ni]) * *th));
fs << "]";
fs << "}";
void sft::Octave::write( cv::FileStorage &fso, const FeaturePool& pool, const Mat& thresholds) const
cv::Mat used( 1, weak->total * (pow(2, params.max_depth) - 1), CV_32SC1);
int* usedPtr = used.ptr<int>(0);
int nfeatures = 0;
fso << "{"
<< "scale" << logScale
<< "weaks" << weak->total
<< "trees" << "[";
// should be replased with the H.L. one
CvSeqReader reader;
cvStartReadSeq( weak, &reader);
for(int i = 0; i < weak->total; i++ )
CvBoostTree* tree;
CV_READ_SEQ_ELEM( tree, reader );
if (!thresholds.empty())
traverse(tree, fso, nfeatures, usedPtr, thresholds.ptr<float>(0)+ i);
traverse(tree, fso, nfeatures, usedPtr);
fso << "]";
// features
fso << "features" << "[";
for (int i = 0; i < nfeatures; ++i)
// fso << usedPtr[i];
pool.write(fso, usedPtr[i]);
fso << "]"
<< "}";
void sft::Octave::initial_weights(double (&p)[2])
double n = data->sample_count;
p[0] = n / (2. * (double)(nnegatives));
p[1] = n / (2. * (double)(npositives));
bool sft::Octave::train(const Dataset& dataset, const FeaturePool& pool, int weaks, int treeDepth)
CV_Assert(treeDepth == 2);
CV_Assert(weaks > 0);
params.max_depth = treeDepth;
params.weak_count = weaks;
// 1. fill integrals and classes
processPositives(dataset, pool);
// exit(0);
// 2. only sumple case (all features used)
int nfeatures = pool.size();
cv::Mat varIdx(1, nfeatures, CV_32SC1);
int* ptr = varIdx.ptr<int>(0);
for (int x = 0; x < nfeatures; ++x)
ptr[x] = x;
// 3. only sumple case (all samples used)
int nsamples = npositives + nnegatives;
cv::Mat sampleIdx(1, nsamples, CV_32SC1);
ptr = sampleIdx.ptr<int>(0);
for (int x = 0; x < nsamples; ++x)
ptr[x] = x;
// 4. ICF has an orderable responce.
cv::Mat varType(1, nfeatures + 1, CV_8UC1);
uchar* uptr = varType.ptr<uchar>(0);
for (int x = 0; x < nfeatures; ++x)
uptr[x] = CV_VAR_ORDERED;
uptr[nfeatures] = CV_VAR_CATEGORICAL;
trainData.create(nfeatures, nsamples, CV_32FC1);
for (int fi = 0; fi < nfeatures; ++fi)
float* dptr = trainData.ptr<float>(fi);
for (int si = 0; si < nsamples; ++si)
dptr[si] = pool.apply(fi, si, integrals);
cv::Mat missingMask;
bool ok = train(trainData, responses, varIdx, sampleIdx, varType, missingMask);
if (!ok)
std::cout << "ERROR: tree can not be trained " << std::endl;
#if defined SELF_TEST
cv::Mat a(1, nfeatures, CV_32FC1);
cv::Mat votes(1, cvSliceLength( CV_WHOLE_SEQ, weak ), CV_32FC1, cv::Scalar::all(0));
// std::cout << a.cols << " " << a.rows << " !!!!!!!!!!! " << data->var_all << std::endl;
for (int si = 0; si < nsamples; ++si)
// trainData.col(si).copyTo(a.reshape(0,trainData.rows));
float desision = predict(trainData.col(si), votes, false, true);
// std::cout << "desision " << desision << " class " <<<float>(si, 0) << votes <<std::endl;
return ok;
float sft::Octave::predict( const Mat& _sample, Mat& _votes, bool raw_mode, bool return_sum ) const
CvMat sample = _sample, votes = _votes;
return CvBoost::predict(&sample, 0, (_votes.empty())? 0 : &votes, CV_WHOLE_SEQ, raw_mode, return_sum);
float sft::Octave::predict( const Mat& _sample, const cv::Range range) const
CvMat sample = _sample;
return CvBoost::predict(&sample, 0, 0, range, false, true);
void sft::Octave::write( CvFileStorage* fs, string name) const
CvBoost::write(fs, name.c_str());
// ========= FeaturePool ========= //
sft::FeaturePool::FeaturePool(cv::Size m, int n) : model(m), nfeatures(n)
CV_Assert(m != cv::Size() && n > 0);
float sft::FeaturePool::apply(int fi, int si, const Mat& integrals) const
return pool[fi](integrals.row(si), model);
void sft::FeaturePool::write( cv::FileStorage& fs, int index) const
CV_Assert((index > 0) && (index < (int)pool.size()));
fs << pool[index];
void sft::write(cv::FileStorage& fs, const string&, const ICF& f)
fs << "{" << "channel" << << "rect" << << "}";
void sft::FeaturePool::fill(int desired)
int mw = model.width;
int mh = model.height;
int maxPoolSize = (mw -1) * mw / 2 * (mh - 1) * mh / 2 * N_CHANNELS;
nfeatures = std::min(desired, maxPoolSize);
dprintf("Requeste feature pool %d max %d suggested %d\n", desired, maxPoolSize, nfeatures);
sft::Random::engine eng(8854342234L);
sft::Random::engine eng_ch(314152314L);
sft::Random::uniform chRand(0, N_CHANNELS - 1);
sft::Random::uniform xRand(0, model.width - 2);
sft::Random::uniform yRand(0, model.height - 2);
sft::Random::uniform wRand(1, model.width - 1);
sft::Random::uniform hRand(1, model.height - 1);
while (pool.size() < size_t(nfeatures))
int x = xRand(eng);
int y = yRand(eng);
int w = 1 + wRand(eng, model.width - x - 1);
int h = 1 + hRand(eng, model.height - y - 1);
CV_Assert(w > 0);
CV_Assert(h > 0);
CV_Assert(w + x < model.width);
CV_Assert(h + y < model.height);
int ch = chRand(eng_ch);
sft::ICF f(x, y, w, h, ch);
if (std::find(pool.begin(), pool.end(),f) == pool.end())
// std::cout << f << std::endl;
std::ostream& sft::operator<<(std::ostream& out, const sft::ICF& m)
out << << " " <<;
return out;
// ============ Dataset ============ //
namespace {
using namespace sft;
string itoa(long i)
char s[65];
sprintf(s, "%ld", i);
return std::string(s);
void glob(const string& path, svector& ret)
glob_t glob_result;
glob(path.c_str(), GLOB_TILDE, 0, &glob_result);
for(uint i = 0; i < glob_result.gl_pathc; ++i)
dprintf("%s\n", ret[i].c_str());
// in the default case data folders should be alligned as following:
// 1. positives: <train or test path>/octave_<octave number>/pos/*.png
// 2. negatives: <train or test path>/octave_<octave number>/neg/*.png
Dataset::Dataset(const string& path, const int oct)
dprintf("%s\n", "get dataset file names...");
dprintf("%s\n", "Positives globbing...");
glob(path + "/pos/octave_" + itoa(oct) + "/*.png", pos);
dprintf("%s\n", "Negatives globbing...");
glob(path + "/neg/octave_" + itoa(oct) + "/*.png", neg);
// Check: files not empty
CV_Assert(pos.size() != size_t(0));
CV_Assert(neg.size() != size_t(0));