Open Source Computer Vision Library
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
445 lines
14 KiB
445 lines
14 KiB
#include <opencv2/opencv.hpp> |
#include <string> |
#include <iostream> |
#include <fstream> |
#include <vector> |
#include <time.h> |
using namespace cv; |
using namespace cv::ml; |
using namespace std; |
void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector ); |
void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData ); |
void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst ); |
void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size ); |
Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorValues, const Size & size ); |
void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, const Size & size ); |
void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels ); |
void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color ); |
void test_it( const Size & size ); |
void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector ) |
{ |
// get the support vectors |
Mat sv = svm->getSupportVectors(); |
const int sv_total = sv.rows; |
// get the decision function |
Mat alpha, svidx; |
double rho = svm->getDecisionFunction(0, alpha, svidx); |
CV_Assert( == 1 && == 1 && sv_total == 1 ); |
CV_Assert( (alpha.type() == CV_64F &&<double>(0) == 1.) || |
(alpha.type() == CV_32F &&<float>(0) == 1.f) ); |
CV_Assert( sv.type() == CV_32F ); |
hog_detector.clear(); |
hog_detector.resize(sv.cols + 1); |
memcpy(&hog_detector[0], sv.ptr(), sv.cols*sizeof(hog_detector[0])); |
hog_detector[sv.cols] = (float)-rho; |
} |
/* |
* Convert training/testing set to be used by OpenCV Machine Learning algorithms. |
* TrainData is a matrix of size (#samples x max(#cols,#rows) per samples), in 32FC1. |
* Transposition of samples are made if needed. |
*/ |
void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData ) |
{ |
//--Convert data |
const int rows = (int)train_samples.size(); |
const int cols = (int)std::max( train_samples[0].cols, train_samples[0].rows ); |
cv::Mat tmp(1, cols, CV_32FC1); //< used for transposition if needed |
trainData = cv::Mat(rows, cols, CV_32FC1 ); |
vector< Mat >::const_iterator itr = train_samples.begin(); |
vector< Mat >::const_iterator end = train_samples.end(); |
for( int i = 0 ; itr != end ; ++itr, ++i ) |
{ |
CV_Assert( itr->cols == 1 || |
itr->rows == 1 ); |
if( itr->cols == 1 ) |
{ |
transpose( *(itr), tmp ); |
tmp.copyTo( trainData.row( i ) ); |
} |
else if( itr->rows == 1 ) |
{ |
itr->copyTo( trainData.row( i ) ); |
} |
} |
} |
void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst ) |
{ |
string line; |
ifstream file; |
| (prefix+filename).c_str() ); |
if( !file.is_open() ) |
{ |
cerr << "Unable to open the list of images from " << filename << " filename." << endl; |
exit( -1 ); |
} |
bool end_of_parsing = false; |
while( !end_of_parsing ) |
{ |
getline( file, line ); |
if( line.empty() ) // no more file to read |
{ |
end_of_parsing = true; |
break; |
} |
Mat img = imread( (prefix+line).c_str() ); // load the image |
if( img.empty() ) // invalid image, just skip it. |
continue; |
#ifdef _DEBUG |
imshow( "image", img ); |
waitKey( 10 ); |
#endif |
img_lst.push_back( img.clone() ); |
} |
} |
void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size ) |
{ |
Rect box; |
box.width = size.width; |
box.height = size.height; |
const int size_x = box.width; |
const int size_y = box.height; |
srand( (unsigned int)time( NULL ) ); |
vector< Mat >::const_iterator img = full_neg_lst.begin(); |
vector< Mat >::const_iterator end = full_neg_lst.end(); |
for( ; img != end ; ++img ) |
{ |
box.x = rand() % (img->cols - size_x); |
box.y = rand() % (img->rows - size_y); |
Mat roi = (*img)(box); |
neg_lst.push_back( roi.clone() ); |
#ifdef _DEBUG |
imshow( "img", roi.clone() ); |
waitKey( 10 ); |
#endif |
} |
} |
// From |
Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorValues, const Size & size ) |
{ |
const int DIMX = size.width; |
const int DIMY = size.height; |
float zoomFac = 3; |
Mat visu; |
resize(color_origImg, visu, Size( (int)(color_origImg.cols*zoomFac), (int)(color_origImg.rows*zoomFac) ) ); |
int cellSize = 8; |
int gradientBinSize = 9; |
float radRangeForOneBin = (float)(CV_PI/(float)gradientBinSize); // dividing 180 into 9 bins, how large (in rad) is one bin? |
// prepare data structure: 9 orientation / gradient strenghts for each cell |
int cells_in_x_dir = DIMX / cellSize; |
int cells_in_y_dir = DIMY / cellSize; |
float*** gradientStrengths = new float**[cells_in_y_dir]; |
int** cellUpdateCounter = new int*[cells_in_y_dir]; |
for (int y=0; y<cells_in_y_dir; y++) |
{ |
gradientStrengths[y] = new float*[cells_in_x_dir]; |
cellUpdateCounter[y] = new int[cells_in_x_dir]; |
for (int x=0; x<cells_in_x_dir; x++) |
{ |
gradientStrengths[y][x] = new float[gradientBinSize]; |
cellUpdateCounter[y][x] = 0; |
for (int bin=0; bin<gradientBinSize; bin++) |
gradientStrengths[y][x][bin] = 0.0; |
} |
} |
// nr of blocks = nr of cells - 1 |
// since there is a new block on each cell (overlapping blocks!) but the last one |
int blocks_in_x_dir = cells_in_x_dir - 1; |
int blocks_in_y_dir = cells_in_y_dir - 1; |
// compute gradient strengths per cell |
int descriptorDataIdx = 0; |
int cellx = 0; |
int celly = 0; |
for (int blockx=0; blockx<blocks_in_x_dir; blockx++) |
{ |
for (int blocky=0; blocky<blocks_in_y_dir; blocky++) |
{ |
// 4 cells per block ... |
for (int cellNr=0; cellNr<4; cellNr++) |
{ |
// compute corresponding cell nr |
cellx = blockx; |
celly = blocky; |
if (cellNr==1) celly++; |
if (cellNr==2) cellx++; |
if (cellNr==3) |
{ |
cellx++; |
celly++; |
} |
for (int bin=0; bin<gradientBinSize; bin++) |
{ |
float gradientStrength = descriptorValues[ descriptorDataIdx ]; |
descriptorDataIdx++; |
gradientStrengths[celly][cellx][bin] += gradientStrength; |
} // for (all bins) |
// note: overlapping blocks lead to multiple updates of this sum! |
// we therefore keep track how often a cell was updated, |
// to compute average gradient strengths |
cellUpdateCounter[celly][cellx]++; |
} // for (all cells) |
} // for (all block x pos) |
} // for (all block y pos) |
// compute average gradient strengths |
for (celly=0; celly<cells_in_y_dir; celly++) |
{ |
for (cellx=0; cellx<cells_in_x_dir; cellx++) |
{ |
float NrUpdatesForThisCell = (float)cellUpdateCounter[celly][cellx]; |
// compute average gradient strenghts for each gradient bin direction |
for (int bin=0; bin<gradientBinSize; bin++) |
{ |
gradientStrengths[celly][cellx][bin] /= NrUpdatesForThisCell; |
} |
} |
} |
// draw cells |
for (celly=0; celly<cells_in_y_dir; celly++) |
{ |
for (cellx=0; cellx<cells_in_x_dir; cellx++) |
{ |
int drawX = cellx * cellSize; |
int drawY = celly * cellSize; |
int mx = drawX + cellSize/2; |
int my = drawY + cellSize/2; |
rectangle(visu, Point((int)(drawX*zoomFac), (int)(drawY*zoomFac)), Point((int)((drawX+cellSize)*zoomFac), (int)((drawY+cellSize)*zoomFac)), Scalar(100,100,100), 1); |
// draw in each cell all 9 gradient strengths |
for (int bin=0; bin<gradientBinSize; bin++) |
{ |
float currentGradStrength = gradientStrengths[celly][cellx][bin]; |
// no line to draw? |
if (currentGradStrength==0) |
continue; |
float currRad = bin * radRangeForOneBin + radRangeForOneBin/2; |
float dirVecX = cos( currRad ); |
float dirVecY = sin( currRad ); |
float maxVecLen = (float)(cellSize/2.f); |
float scale = 2.5; // just a visualization scale, to see the lines better |
// compute line coordinates |
float x1 = mx - dirVecX * currentGradStrength * maxVecLen * scale; |
float y1 = my - dirVecY * currentGradStrength * maxVecLen * scale; |
float x2 = mx + dirVecX * currentGradStrength * maxVecLen * scale; |
float y2 = my + dirVecY * currentGradStrength * maxVecLen * scale; |
// draw gradient visualization |
line(visu, Point((int)(x1*zoomFac),(int)(y1*zoomFac)), Point((int)(x2*zoomFac),(int)(y2*zoomFac)), Scalar(0,255,0), 1); |
} // for (all bins) |
} // for (cellx) |
} // for (celly) |
// don't forget to free memory allocated by helper data structures! |
for (int y=0; y<cells_in_y_dir; y++) |
{ |
for (int x=0; x<cells_in_x_dir; x++) |
{ |
delete[] gradientStrengths[y][x]; |
} |
delete[] gradientStrengths[y]; |
delete[] cellUpdateCounter[y]; |
} |
delete[] gradientStrengths; |
delete[] cellUpdateCounter; |
return visu; |
} // get_hogdescriptor_visu |
void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, const Size & size ) |
{ |
HOGDescriptor hog; |
hog.winSize = size; |
Mat gray; |
vector< Point > location; |
vector< float > descriptors; |
vector< Mat >::const_iterator img = img_lst.begin(); |
vector< Mat >::const_iterator end = img_lst.end(); |
for( ; img != end ; ++img ) |
{ |
cvtColor( *img, gray, COLOR_BGR2GRAY ); |
hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ), location ); |
gradient_lst.push_back( Mat( descriptors ).clone() ); |
#ifdef _DEBUG |
imshow( "gradient", get_hogdescriptor_visu( img->clone(), descriptors, size ) ); |
waitKey( 10 ); |
#endif |
} |
} |
void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels ) |
{ |
Mat train_data; |
convert_to_ml( gradient_lst, train_data ); |
clog << "Start training..."; |
Ptr<SVM> svm = SVM::create(); |
/* Default values to train SVM */ |
svm->setCoef0(0.0); |
svm->setDegree(3); |
svm->setTermCriteria(TermCriteria( CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 1e-3 )); |
svm->setGamma(0); |
svm->setKernel(SVM::LINEAR); |
svm->setNu(0.5); |
svm->setP(0.1); // for EPSILON_SVR, epsilon in loss function? |
svm->setC(0.01); // From paper, soft classifier |
svm->setType(SVM::EPS_SVR); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task |
svm->train(train_data, ROW_SAMPLE, Mat(labels)); |
clog << "...[done]" << endl; |
svm->save( "my_people_detector.yml" ); |
} |
void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color ) |
{ |
if( !locations.empty() ) |
{ |
vector< Rect >::const_iterator loc = locations.begin(); |
vector< Rect >::const_iterator end = locations.end(); |
for( ; loc != end ; ++loc ) |
{ |
rectangle( img, *loc, color, 2 ); |
} |
} |
} |
void test_it( const Size & size ) |
{ |
char key = 27; |
Scalar reference( 0, 255, 0 ); |
Scalar trained( 0, 0, 255 ); |
Mat img, draw; |
Ptr<SVM> svm; |
HOGDescriptor hog; |
HOGDescriptor my_hog; |
my_hog.winSize = size; |
VideoCapture video; |
vector< Rect > locations; |
// Load the trained SVM. |
svm = StatModel::load<SVM>( "my_people_detector.yml" ); |
// Set the trained svm to my_hog |
vector< float > hog_detector; |
get_svm_detector( svm, hog_detector ); |
my_hog.setSVMDetector( hog_detector ); |
// Set the people detector. |
hog.setSVMDetector( hog.getDefaultPeopleDetector() ); |
// Open the camera. |
|; |
if( !video.isOpened() ) |
{ |
cerr << "Unable to open the device 0" << endl; |
exit( -1 ); |
} |
bool end_of_process = false; |
while( !end_of_process ) |
{ |
video >> img; |
if( img.empty() ) |
break; |
draw = img.clone(); |
locations.clear(); |
hog.detectMultiScale( img, locations ); |
draw_locations( draw, locations, reference ); |
locations.clear(); |
my_hog.detectMultiScale( img, locations ); |
draw_locations( draw, locations, trained ); |
imshow( "Video", draw ); |
key = (char)waitKey( 10 ); |
if( 27 == key ) |
end_of_process = true; |
} |
} |
int main( int argc, char** argv ) |
{ |
cv::CommandLineParser parser(argc, argv, "{help h|| show help message}" |
"{pd||pos_dir}{p||pos.lst}{nd||neg_dir}{n||neg.lst}"); |
if (parser.has("help")) |
{ |
parser.printMessage(); |
exit(0); |
} |
vector< Mat > pos_lst; |
vector< Mat > full_neg_lst; |
vector< Mat > neg_lst; |
vector< Mat > gradient_lst; |
vector< int > labels; |
string pos_dir = parser.get<string>("pd"); |
string pos = parser.get<string>("p"); |
string neg_dir = parser.get<string>("nd"); |
string neg = parser.get<string>("n"); |
if( pos_dir.empty() || pos.empty() || neg_dir.empty() || neg.empty() ) |
{ |
cout << "Wrong number of parameters." << endl |
<< "Usage: " << argv[0] << " --pd=pos_dir -p=pos.lst --nd=neg_dir -n=neg.lst" << endl |
<< "example: " << argv[0] << " --pd=/INRIA_dataset/ -p=Train/pos.lst --nd=/INRIA_dataset/ -n=Train/neg.lst" << endl; |
exit( -1 ); |
} |
load_images( pos_dir, pos, pos_lst ); |
labels.assign( pos_lst.size(), +1 ); |
const unsigned int old = (unsigned int)labels.size(); |
load_images( neg_dir, neg, full_neg_lst ); |
sample_neg( full_neg_lst, neg_lst, Size( 96,160 ) ); |
labels.insert( labels.end(), neg_lst.size(), -1 ); |
CV_Assert( old < labels.size() ); |
compute_hog( pos_lst, gradient_lst, Size( 96, 160 ) ); |
compute_hog( neg_lst, gradient_lst, Size( 96, 160 ) ); |
train_svm( gradient_lst, labels ); |
test_it( Size( 96, 160 ) ); // change with your parameters |
return 0; |