# include "opencv2/highgui/highgui.hpp"
# include "opencv2/imgproc/imgproc.hpp"
# include "opencv2/features2d/features2d.hpp"
# include "opencv2/nonfree/nonfree.hpp"
# include "opencv2/ml/ml.hpp"
# include <fstream>
# include <iostream>
# include <memory>
# if defined WIN32 || defined _WIN32
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
# undef min
# undef max
# include "sys/types.h"
# endif
# include <sys/stat.h>
# define DEBUG_DESC_PROGRESS
using namespace cv ;
using namespace std ;
const string paramsFile = " params.xml " ;
const string vocabularyFile = " vocabulary.xml.gz " ;
const string bowImageDescriptorsDir = " /bowImageDescriptors " ;
const string svmsDir = " /svms " ;
const string plotsDir = " /plots " ;
static void help ( char * * argv )
{
cout < < " \n This program shows how to read in, train on and produce test results for the PASCAL VOC (Visual Object Challenge) data. \n "
< < " It shows how to use detectors, descriptors and recognition methods \n "
" Using OpenCV version %s \n " < < CV_VERSION < < " \n "
< < " Call: \n "
< < " Format: \n ./ " < < argv [ 0 ] < < " [VOC path] [result directory] \n "
< < " or: \n "
< < " ./ " < < argv [ 0 ] < < " [VOC path] [result directory] [feature detector] [descriptor extractor] [descriptor matcher] \n "
< < " \n "
< < " Input parameters: \n "
< < " [VOC path] Path to Pascal VOC data (e.g. /home/my/VOCdevkit/VOC2010). Note: VOC2007-VOC2010 are supported. \n "
< < " [result directory] Path to result diractory. Following folders will be created in [result directory]: \n "
< < " bowImageDescriptors - to store image descriptors, \n "
< < " svms - to store trained svms, \n "
< < " plots - to store files for plots creating. \n "
< < " [feature detector] Feature detector name (e.g. SURF, FAST...) - see createFeatureDetector() function in detectors.cpp \n "
< < " Currently 12/2010, this is FAST, STAR, SIFT, SURF, MSER, GFTT, HARRIS \n "
< < " [descriptor extractor] Descriptor extractor name (e.g. SURF, SIFT) - see createDescriptorExtractor() function in descriptors.cpp \n "
< < " Currently 12/2010, this is SURF, OpponentSIFT, SIFT, OpponentSURF, BRIEF \n "
< < " [descriptor matcher] Descriptor matcher name (e.g. BruteForce) - see createDescriptorMatcher() function in matchers.cpp \n "
< < " Currently 12/2010, this is BruteForce, BruteForce-L1, FlannBased, BruteForce-Hamming, BruteForce-HammingLUT \n "
< < " \n " ;
}
static void makeDir ( const string & dir )
{
# if defined WIN32 || defined _WIN32
CreateDirectory ( dir . c_str ( ) , 0 ) ;
# else
mkdir ( dir . c_str ( ) , S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH ) ;
# endif
}
static void makeUsedDirs ( const string & rootPath )
{
makeDir ( rootPath + bowImageDescriptorsDir ) ;
makeDir ( rootPath + svmsDir ) ;
makeDir ( rootPath + plotsDir ) ;
}
/****************************************************************************************\
* Classes to work with PASCAL VOC dataset *
\ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
//
// TODO: refactor this part of the code
//
//used to specify the (sub-)dataset over which operations are performed
enum ObdDatasetType { CV_OBD_TRAIN , CV_OBD_TEST } ;
class ObdObject
{
public :
string object_class ;
Rect boundingBox ;
} ;
//extended object data specific to VOC
enum VocPose { CV_VOC_POSE_UNSPECIFIED , CV_VOC_POSE_FRONTAL , CV_VOC_POSE_REAR , CV_VOC_POSE_LEFT , CV_VOC_POSE_RIGHT } ;
class VocObjectData
{
public :
bool difficult ;
bool occluded ;
bool truncated ;
VocPose pose ;
} ;
//enum VocDataset {CV_VOC2007, CV_VOC2008, CV_VOC2009, CV_VOC2010};
enum VocPlotType { CV_VOC_PLOT_SCREEN , CV_VOC_PLOT_PNG } ;
enum VocGT { CV_VOC_GT_NONE , CV_VOC_GT_DIFFICULT , CV_VOC_GT_PRESENT } ;
enum VocConfCond { CV_VOC_CCOND_RECALL , CV_VOC_CCOND_SCORETHRESH } ;
enum VocTask { CV_VOC_TASK_CLASSIFICATION , CV_VOC_TASK_DETECTION } ;
class ObdImage
{
public :
ObdImage ( string p_id , string p_path ) : id ( p_id ) , path ( p_path ) { }
string id ;
string path ;
} ;
//used by getDetectorGroundTruth to sort a two dimensional list of floats in descending order
class ObdScoreIndexSorter
{
public :
float score ;
int image_idx ;
int obj_idx ;
bool operator < ( const ObdScoreIndexSorter & compare ) const { return ( score < compare . score ) ; }
} ;
class VocData
{
public :
VocData ( const string & vocPath , bool useTestDataset )
{ initVoc ( vocPath , useTestDataset ) ; }
~ VocData ( ) { }
/* functions for returning classification/object data for multiple images given an object class */
void getClassImages ( const string & obj_class , const ObdDatasetType dataset , vector < ObdImage > & images , vector < char > & object_present ) ;
void getClassObjects ( const string & obj_class , const ObdDatasetType dataset , vector < ObdImage > & images , vector < vector < ObdObject > > & objects ) ;
void getClassObjects ( const string & obj_class , const ObdDatasetType dataset , vector < ObdImage > & images , vector < vector < ObdObject > > & objects , vector < vector < VocObjectData > > & object_data , vector < VocGT > & ground_truth ) ;
/* functions for returning object data for a single image given an image id */
ObdImage getObjects ( const string & id , vector < ObdObject > & objects ) ;
ObdImage getObjects ( const string & id , vector < ObdObject > & objects , vector < VocObjectData > & object_data ) ;
ObdImage getObjects ( const string & obj_class , const string & id , vector < ObdObject > & objects , vector < VocObjectData > & object_data , VocGT & ground_truth ) ;
/* functions for returning the ground truth (present/absent) for groups of images */
void getClassifierGroundTruth ( const string & obj_class , const vector < ObdImage > & images , vector < char > & ground_truth ) ;
void getClassifierGroundTruth ( const string & obj_class , const vector < string > & images , vector < char > & ground_truth ) ;
int getDetectorGroundTruth ( const string & obj_class , const ObdDatasetType dataset , const vector < ObdImage > & images , const vector < vector < Rect > > & bounding_boxes , const vector < vector < float > > & scores , vector < vector < char > > & ground_truth , vector < vector < char > > & detection_difficult , bool ignore_difficult = true ) ;
/* functions for writing VOC-compatible results files */
void writeClassifierResultsFile ( const string & out_dir , const string & obj_class , const ObdDatasetType dataset , const vector < ObdImage > & images , const vector < float > & scores , const int competition = 1 , const bool overwrite_ifexists = false ) ;
/* functions for calculating metrics from a set of classification/detection results */
string getResultsFilename ( const string & obj_class , const VocTask task , const ObdDatasetType dataset , const int competition = - 1 , const int number = - 1 ) ;
void calcClassifierPrecRecall ( const string & obj_class , const vector < ObdImage > & images , const vector < float > & scores , vector < float > & precision , vector < float > & recall , float & ap , vector < size_t > & ranking ) ;
void calcClassifierPrecRecall ( const string & obj_class , const vector < ObdImage > & images , const vector < float > & scores , vector < float > & precision , vector < float > & recall , float & ap ) ;
void calcClassifierPrecRecall ( const string & input_file , vector < float > & precision , vector < float > & recall , float & ap , bool outputRankingFile = false ) ;
/* functions for calculating confusion matrices */
void calcClassifierConfMatRow ( const string & obj_class , const vector < ObdImage > & images , const vector < float > & scores , const VocConfCond cond , const float threshold , vector < string > & output_headers , vector < float > & output_values ) ;
void calcDetectorConfMatRow ( const string & obj_class , const ObdDatasetType dataset , const vector < ObdImage > & images , const vector < vector < float > > & scores , const vector < vector < Rect > > & bounding_boxes , const VocConfCond cond , const float threshold , vector < string > & output_headers , vector < float > & output_values , bool ignore_difficult = true ) ;
/* functions for outputting gnuplot output files */
void savePrecRecallToGnuplot ( const string & output_file , const vector < float > & precision , const vector < float > & recall , const float ap , const string title = string ( ) , const VocPlotType plot_type = CV_VOC_PLOT_SCREEN ) ;
/* functions for reading in result/ground truth files */
void readClassifierGroundTruth ( const string & obj_class , const ObdDatasetType dataset , vector < ObdImage > & images , vector < char > & object_present ) ;
void readClassifierResultsFile ( const std : : string & input_file , vector < ObdImage > & images , vector < float > & scores ) ;
void readDetectorResultsFile ( const string & input_file , vector < ObdImage > & images , vector < vector < float > > & scores , vector < vector < Rect > > & bounding_boxes ) ;
/* functions for getting dataset info */
const vector < string > & getObjectClasses ( ) ;
string getResultsDirectory ( ) ;
protected :
void initVoc ( const string & vocPath , const bool useTestDataset ) ;
void initVoc2007to2010 ( const string & vocPath , const bool useTestDataset ) ;
void readClassifierGroundTruth ( const string & filename , vector < string > & image_codes , vector < char > & object_present ) ;
void readClassifierResultsFile ( const string & input_file , vector < string > & image_codes , vector < float > & scores ) ;
void readDetectorResultsFile ( const string & input_file , vector < string > & image_codes , vector < vector < float > > & scores , vector < vector < Rect > > & bounding_boxes ) ;
void extractVocObjects ( const string filename , vector < ObdObject > & objects , vector < VocObjectData > & object_data ) ;
string getImagePath ( const string & input_str ) ;
void getClassImages_impl ( const string & obj_class , const string & dataset_str , vector < ObdImage > & images , vector < char > & object_present ) ;
void calcPrecRecall_impl ( const vector < char > & ground_truth , const vector < float > & scores , vector < float > & precision , vector < float > & recall , float & ap , vector < size_t > & ranking , int recall_normalization = - 1 ) ;
//test two bounding boxes to see if they meet the overlap criteria defined in the VOC documentation
float testBoundingBoxesForOverlap ( const Rect detection , const Rect ground_truth ) ;
//extract class and dataset name from a VOC-standard classification/detection results filename
void extractDataFromResultsFilename ( const string & input_file , string & class_name , string & dataset_name ) ;
//get classifier ground truth for a single image
bool getClassifierGroundTruthImage ( const string & obj_class , const string & id ) ;
//utility functions
void getSortOrder ( const vector < float > & values , vector < size_t > & order , bool descending = true ) ;
int stringToInteger ( const string input_str ) ;
void readFileToString ( const string filename , string & file_contents ) ;
string integerToString ( const int input_int ) ;
string checkFilenamePathsep ( const string filename , bool add_trailing_slash = false ) ;
void convertImageCodesToObdImages ( const vector < string > & image_codes , vector < ObdImage > & images ) ;
int extractXMLBlock ( const string src , const string tag , const int searchpos , string & tag_contents ) ;
//utility sorter
struct orderingSorter
{
bool operator ( ) ( std : : pair < size_t , vector < float > : : const_iterator > const & a , std : : pair < size_t , vector < float > : : const_iterator > const & b )
{
return ( * a . second ) > ( * b . second ) ;
}
} ;
//data members
string m_vocPath ;
string m_vocName ;
//string m_resPath;
string m_annotation_path ;
string m_image_path ;
string m_imageset_path ;
string m_class_imageset_path ;
vector < string > m_classifier_gt_all_ids ;
vector < char > m_classifier_gt_all_present ;
string m_classifier_gt_class ;
//data members
string m_train_set ;
string m_test_set ;
vector < string > m_object_classes ;
float m_min_overlap ;
bool m_sampled_ap ;
} ;
//Return the classification ground truth data for all images of a given VOC object class
//--------------------------------------------------------------------------------------
//INPUTS:
// - obj_class The VOC object class identifier string
// - dataset Specifies whether to extract images from the training or test set
//OUTPUTS:
// - images An array of ObdImage containing info of all images extracted from the ground truth file
// - object_present An array of bools specifying whether the object defined by 'obj_class' is present in each image or not
//NOTES:
// This function is primarily useful for the classification task, where only
// whether a given object is present or not in an image is required, and not each object instance's
// position etc.
void VocData : : getClassImages ( const string & obj_class , const ObdDatasetType dataset , vector < ObdImage > & images , vector < char > & object_present )
{
string dataset_str ;
//generate the filename of the classification ground-truth textfile for the object class
if ( dataset = = CV_OBD_TRAIN )
{
dataset_str = m_train_set ;
} else {
dataset_str = m_test_set ;
}
getClassImages_impl ( obj_class , dataset_str , images , object_present ) ;
}
void VocData : : getClassImages_impl ( const string & obj_class , const string & dataset_str , vector < ObdImage > & images , vector < char > & object_present )
{
//generate the filename of the classification ground-truth textfile for the object class
string gtFilename = m_class_imageset_path ;
gtFilename . replace ( gtFilename . find ( " %s " ) , 2 , obj_class ) ;
gtFilename . replace ( gtFilename . find ( " %s " ) , 2 , dataset_str ) ;
//parse the ground truth file, storing in two separate vectors
//for the image code and the ground truth value
vector < string > image_codes ;
readClassifierGroundTruth ( gtFilename , image_codes , object_present ) ;
//prepare output arrays
images . clear ( ) ;
convertImageCodesToObdImages ( image_codes , images ) ;
}
//Return the object data for all images of a given VOC object class
//-----------------------------------------------------------------
//INPUTS:
// - obj_class The VOC object class identifier string
// - dataset Specifies whether to extract images from the training or test set
//OUTPUTS:
// - images An array of ObdImage containing info of all images in chosen dataset (tag, path etc.)
// - objects Contains the extended object info (bounding box etc.) for each object instance in each image
// - object_data Contains VOC-specific extended object info (marked difficult etc.)
// - ground_truth Specifies whether there are any difficult/non-difficult instances of the current
// object class within each image
//NOTES:
// This function returns extended object information in addition to the absent/present
// classification data returned by getClassImages. The objects returned for each image in the 'objects'
// array are of all object classes present in the image, and not just the class defined by 'obj_class'.
// 'ground_truth' can be used to determine quickly whether an object instance of the given class is present
// in an image or not.
void VocData : : getClassObjects ( const string & obj_class , const ObdDatasetType dataset , vector < ObdImage > & images , vector < vector < ObdObject > > & objects )
{
vector < vector < VocObjectData > > object_data ;
vector < VocGT > ground_truth ;
getClassObjects ( obj_class , dataset , images , objects , object_data , ground_truth ) ;
}
void VocData : : getClassObjects ( const string & obj_class , const ObdDatasetType dataset , vector < ObdImage > & images , vector < vector < ObdObject > > & objects , vector < vector < VocObjectData > > & object_data , vector < VocGT > & ground_truth )
{
//generate the filename of the classification ground-truth textfile for the object class
string gtFilename = m_class_imageset_path ;
gtFilename . replace ( gtFilename . find ( " %s " ) , 2 , obj_class ) ;
if ( dataset = = CV_OBD_TRAIN )
{
gtFilename . replace ( gtFilename . find ( " %s " ) , 2 , m_train_set ) ;
} else {
gtFilename . replace ( gtFilename . find ( " %s " ) , 2 , m_test_set ) ;
}
//parse the ground truth file, storing in two separate vectors
//for the image code and the ground truth value
vector < string > image_codes ;
vector < char > object_present ;
readClassifierGroundTruth ( gtFilename , image_codes , object_present ) ;
//prepare output arrays
images . clear ( ) ;
objects . clear ( ) ;
object_data . clear ( ) ;
ground_truth . clear ( ) ;
string annotationFilename ;
vector < ObdObject > image_objects ;
vector < VocObjectData > image_object_data ;
VocGT image_gt ;
//transfer to output arrays and read in object data for each image
for ( size_t i = 0 ; i < image_codes . size ( ) ; + + i )
{
ObdImage image = getObjects ( obj_class , image_codes [ i ] , image_objects , image_object_data , image_gt ) ;
images . push_back ( image ) ;
objects . push_back ( image_objects ) ;
object_data . push_back ( image_object_data ) ;
ground_truth . push_back ( image_gt ) ;
}
}
//Return ground truth data for the objects present in an image with a given UID
//-----------------------------------------------------------------------------
//INPUTS:
// - id VOC Dataset unique identifier (string code in form YYYY_XXXXXX where YYYY is the year)
//OUTPUTS:
// - obj_class (*3) Specifies the object class to use to resolve 'ground_truth'
// - objects Contains the extended object info (bounding box etc.) for each object in the image
// - object_data (*2,3) Contains VOC-specific extended object info (marked difficult etc.)
// - ground_truth (*3) Specifies whether there are any difficult/non-difficult instances of the current
// object class within the image
//RETURN VALUE:
// ObdImage containing path and other details of image file with given code
//NOTES:
// There are three versions of this function
// * One returns a simple array of objects given an id [1]
// * One returns the same as (1) plus VOC specific object data [2]
// * One returns the same as (2) plus the ground_truth flag. This also requires an extra input obj_class [3]
ObdImage VocData : : getObjects ( const string & id , vector < ObdObject > & objects )
{
vector < VocObjectData > object_data ;
ObdImage image = getObjects ( id , objects , object_data ) ;
return image ;
}
ObdImage VocData : : getObjects ( const string & id , vector < ObdObject > & objects , vector < VocObjectData > & object_data )
{
//first generate the filename of the annotation file
string annotationFilename = m_annotation_path ;
annotationFilename . replace ( annotationFilename . find ( " %s " ) , 2 , id ) ;
//extract objects contained in the current image from the xml
extractVocObjects ( annotationFilename , objects , object_data ) ;
//generate image path from extracted string code
string path = getImagePath ( id ) ;
ObdImage image ( id , path ) ;
return image ;
}
ObdImage VocData : : getObjects ( const string & obj_class , const string & id , vector < ObdObject > & objects , vector < VocObjectData > & object_data , VocGT & ground_truth )
{
//extract object data (except for ground truth flag)
ObdImage image = getObjects ( id , objects , object_data ) ;
//pregenerate a flag to indicate whether the current class is present or not in the image
ground_truth = CV_VOC_GT_NONE ;
//iterate through all objects in current image
for ( size_t j = 0 ; j < objects . size ( ) ; + + j )
{
if ( objects [ j ] . object_class = = obj_class )
{
if ( object_data [ j ] . difficult = = false )
{
//if at least one non-difficult example is present, this flag is always set to CV_VOC_GT_PRESENT
ground_truth = CV_VOC_GT_PRESENT ;
break ;
} else {
//set if at least one object instance is present, but it is marked difficult
ground_truth = CV_VOC_GT_DIFFICULT ;
}
}
}
return image ;
}
//Return ground truth data for the presence/absence of a given object class in an arbitrary array of images
//---------------------------------------------------------------------------------------------------------
//INPUTS:
// - obj_class The VOC object class identifier string
// - images An array of ObdImage OR strings containing the images for which ground truth
// will be computed
//OUTPUTS:
// - ground_truth An output array indicating the presence/absence of obj_class within each image
void VocData : : getClassifierGroundTruth ( const string & obj_class , const vector < ObdImage > & images , vector < char > & ground_truth )
{
vector < char > ( images . size ( ) ) . swap ( ground_truth ) ;
vector < ObdObject > objects ;
vector < VocObjectData > object_data ;
vector < char > : : iterator gt_it = ground_truth . begin ( ) ;
for ( vector < ObdImage > : : const_iterator it = images . begin ( ) ; it ! = images . end ( ) ; + + it , + + gt_it )
{
//getObjects(obj_class, it->id, objects, object_data, voc_ground_truth);
( * gt_it ) = ( getClassifierGroundTruthImage ( obj_class , it - > id ) ) ;
}
}
void VocData : : getClassifierGroundTruth ( const string & obj_class , const vector < string > & images , vector < char > & ground_truth )
{
vector < char > ( images . size ( ) ) . swap ( ground_truth ) ;
vector < ObdObject > objects ;
vector < VocObjectData > object_data ;
vector < char > : : iterator gt_it = ground_truth . begin ( ) ;
for ( vector < string > : : const_iterator it = images . begin ( ) ; it ! = images . end ( ) ; + + it , + + gt_it )
{
//getObjects(obj_class, (*it), objects, object_data, voc_ground_truth);
( * gt_it ) = ( getClassifierGroundTruthImage ( obj_class , ( * it ) ) ) ;
}
}
//Return ground truth data for the accuracy of detection results
//--------------------------------------------------------------
//INPUTS:
// - obj_class The VOC object class identifier string
// - images An array of ObdImage containing the images for which ground truth
// will be computed
// - bounding_boxes A 2D input array containing the bounding box rects of the objects of
// obj_class which were detected in each image
//OUTPUTS:
// - ground_truth A 2D output array indicating whether each object detection was accurate
// or not
// - detection_difficult A 2D output array indicating whether the detection fired on an object
// marked as 'difficult'. This allows it to be ignored if necessary
// (the voc documentation specifies objects marked as difficult
// have no effects on the results and are effectively ignored)
// - (ignore_difficult) If set to true, objects marked as difficult will be ignored when returning
// the number of hits for p-r normalization (default = true)
//RETURN VALUE:
// Returns the number of object hits in total in the gt to allow proper normalization
// of a p-r curve
//NOTES:
// As stated in the VOC documentation, multiple detections of the same object in an image are
// considered FALSE detections e.g. 5 detections of a single object is counted as 1 correct
// detection and 4 false detections - it is the responsibility of the participant's system
// to filter multiple detections from its output
int VocData : : getDetectorGroundTruth ( const string & obj_class , const ObdDatasetType dataset , const vector < ObdImage > & images , const vector < vector < Rect > > & bounding_boxes , const vector < vector < float > > & scores , vector < vector < char > > & ground_truth , vector < vector < char > > & detection_difficult , bool ignore_difficult )
{
int recall_normalization = 0 ;
/* first create a list of indices referring to the elements of bounding_boxes and scores in
* descending order of scores */
vector < ObdScoreIndexSorter > sorted_ids ;
{
/* first count how many objects to allow preallocation */
size_t obj_count = 0 ;
CV_Assert ( images . size ( ) = = bounding_boxes . size ( ) ) ;
CV_Assert ( scores . size ( ) = = bounding_boxes . size ( ) ) ;
for ( size_t im_idx = 0 ; im_idx < scores . size ( ) ; + + im_idx )
{
CV_Assert ( scores [ im_idx ] . size ( ) = = bounding_boxes [ im_idx ] . size ( ) ) ;
obj_count + = scores [ im_idx ] . size ( ) ;
}
/* preallocate id vector */
sorted_ids . resize ( obj_count ) ;
/* now copy across scores and indexes to preallocated vector */
int flat_pos = 0 ;
for ( size_t im_idx = 0 ; im_idx < scores . size ( ) ; + + im_idx )
{
for ( size_t ob_idx = 0 ; ob_idx < scores [ im_idx ] . size ( ) ; + + ob_idx )
{
sorted_ids [ flat_pos ] . score = scores [ im_idx ] [ ob_idx ] ;
sorted_ids [ flat_pos ] . image_idx = ( int ) im_idx ;
sorted_ids [ flat_pos ] . obj_idx = ( int ) ob_idx ;
+ + flat_pos ;
}
}
/* and sort the vector in descending order of score */
std : : sort ( sorted_ids . begin ( ) , sorted_ids . end ( ) ) ;
std : : reverse ( sorted_ids . begin ( ) , sorted_ids . end ( ) ) ;
}
/* prepare ground truth + difficult vector (1st dimension) */
vector < vector < char > > ( images . size ( ) ) . swap ( ground_truth ) ;
vector < vector < char > > ( images . size ( ) ) . swap ( detection_difficult ) ;
vector < vector < char > > detected ( images . size ( ) ) ;
vector < vector < ObdObject > > img_objects ( images . size ( ) ) ;
vector < vector < VocObjectData > > img_object_data ( images . size ( ) ) ;
/* preload object ground truth bounding box data */
{
vector < vector < ObdObject > > img_objects_all ( images . size ( ) ) ;
vector < vector < VocObjectData > > img_object_data_all ( images . size ( ) ) ;
for ( size_t image_idx = 0 ; image_idx < images . size ( ) ; + + image_idx )
{
/* prepopulate ground truth bounding boxes */
getObjects ( images [ image_idx ] . id , img_objects_all [ image_idx ] , img_object_data_all [ image_idx ] ) ;
/* meanwhile, also set length of target ground truth + difficult vector to same as number of object detections (2nd dimension) */
ground_truth [ image_idx ] . resize ( bounding_boxes [ image_idx ] . size ( ) ) ;
detection_difficult [ image_idx ] . resize ( bounding_boxes [ image_idx ] . size ( ) ) ;
}
/* save only instances of the object class concerned */
for ( size_t image_idx = 0 ; image_idx < images . size ( ) ; + + image_idx )
{
for ( size_t obj_idx = 0 ; obj_idx < img_objects_all [ image_idx ] . size ( ) ; + + obj_idx )
{
if ( img_objects_all [ image_idx ] [ obj_idx ] . object_class = = obj_class )
{
img_objects [ image_idx ] . push_back ( img_objects_all [ image_idx ] [ obj_idx ] ) ;
img_object_data [ image_idx ] . push_back ( img_object_data_all [ image_idx ] [ obj_idx ] ) ;
}
}
detected [ image_idx ] . resize ( img_objects [ image_idx ] . size ( ) , false ) ;
}
}
/* calculate the total number of objects in the ground truth for the current dataset */
{
vector < ObdImage > gt_images ;
vector < char > gt_object_present ;
getClassImages ( obj_class , dataset , gt_images , gt_object_present ) ;
for ( size_t image_idx = 0 ; image_idx < gt_images . size ( ) ; + + image_idx )
{
vector < ObdObject > gt_img_objects ;
vector < VocObjectData > gt_img_object_data ;
getObjects ( gt_images [ image_idx ] . id , gt_img_objects , gt_img_object_data ) ;
for ( size_t obj_idx = 0 ; obj_idx < gt_img_objects . size ( ) ; + + obj_idx )
{
if ( gt_img_objects [ obj_idx ] . object_class = = obj_class )
{
if ( ( gt_img_object_data [ obj_idx ] . difficult = = false ) | | ( ignore_difficult = = false ) )
+ + recall_normalization ;
}
}
}
}
# ifdef PR_DEBUG
int printed_count = 0 ;
# endif
/* now iterate through detections in descending order of score, assigning to ground truth bounding boxes if possible */
for ( size_t detect_idx = 0 ; detect_idx < sorted_ids . size ( ) ; + + detect_idx )
{
//read in indexes to make following code easier to read
int im_idx = sorted_ids [ detect_idx ] . image_idx ;
int ob_idx = sorted_ids [ detect_idx ] . obj_idx ;
//set ground truth for the current object to false by default
ground_truth [ im_idx ] [ ob_idx ] = false ;
detection_difficult [ im_idx ] [ ob_idx ] = false ;
float maxov = - 1.0 ;
bool max_is_difficult = false ;
int max_gt_obj_idx = - 1 ;
//-- for each detected object iterate through objects present in the bounding box ground truth --
for ( size_t gt_obj_idx = 0 ; gt_obj_idx < img_objects [ im_idx ] . size ( ) ; + + gt_obj_idx )
{
if ( detected [ im_idx ] [ gt_obj_idx ] = = false )
{
//check if the detected object and ground truth object overlap by a sufficient margin
float ov = testBoundingBoxesForOverlap ( bounding_boxes [ im_idx ] [ ob_idx ] , img_objects [ im_idx ] [ gt_obj_idx ] . boundingBox ) ;
if ( ov ! = - 1.0 )
{
//if all conditions are met store the overlap score and index (as objects are assigned to the highest scoring match)
if ( ov > maxov )
{
maxov = ov ;
max_gt_obj_idx = ( int ) gt_obj_idx ;
//store whether the maximum detection is marked as difficult or not
max_is_difficult = ( img_object_data [ im_idx ] [ gt_obj_idx ] . difficult ) ;
}
}
}
}
//-- if a match was found, set the ground truth of the current object to true --
if ( maxov ! = - 1.0 )
{
CV_Assert ( max_gt_obj_idx ! = - 1 ) ;
ground_truth [ im_idx ] [ ob_idx ] = true ;
//store whether the maximum detection was marked as 'difficult' or not
detection_difficult [ im_idx ] [ ob_idx ] = max_is_difficult ;
//remove the ground truth object so it doesn't match with subsequent detected objects
//** this is the behaviour defined by the voc documentation **
detected [ im_idx ] [ max_gt_obj_idx ] = true ;
}
# ifdef PR_DEBUG
if ( printed_count < 10 )
{
cout < < printed_count < < " : id= " < < images [ im_idx ] . id < < " , score= " < < scores [ im_idx ] [ ob_idx ] < < " ( " < < ob_idx < < " ) [ " < < bounding_boxes [ im_idx ] [ ob_idx ] . x < < " , " < <
bounding_boxes [ im_idx ] [ ob_idx ] . y < < " , " < < bounding_boxes [ im_idx ] [ ob_idx ] . width + bounding_boxes [ im_idx ] [ ob_idx ] . x < <
" , " < < bounding_boxes [ im_idx ] [ ob_idx ] . height + bounding_boxes [ im_idx ] [ ob_idx ] . y < < " ] detected= " < < ground_truth [ im_idx ] [ ob_idx ] < <
" , difficult= " < < detection_difficult [ im_idx ] [ ob_idx ] < < endl ;
+ + printed_count ;
/* print ground truth */
for ( int gt_obj_idx = 0 ; gt_obj_idx < img_objects [ im_idx ] . size ( ) ; + + gt_obj_idx )
{
cout < < " GT: [ " < < img_objects [ im_idx ] [ gt_obj_idx ] . boundingBox . x < < " , " < <
img_objects [ im_idx ] [ gt_obj_idx ] . boundingBox . y < < " , " < < img_objects [ im_idx ] [ gt_obj_idx ] . boundingBox . width + img_objects [ im_idx ] [ gt_obj_idx ] . boundingBox . x < <
" , " < < img_objects [ im_idx ] [ gt_obj_idx ] . boundingBox . height + img_objects [ im_idx ] [ gt_obj_idx ] . boundingBox . y < < " ] " ;
if ( gt_obj_idx = = max_gt_obj_idx ) cout < < " <--- ( " < < maxov < < " overlap) " ;
cout < < endl ;
}
}
# endif
}
return recall_normalization ;
}
//Write VOC-compliant classifier results file
//-------------------------------------------
//INPUTS:
// - obj_class The VOC object class identifier string
// - dataset Specifies whether working with the training or test set
// - images An array of ObdImage containing the images for which data will be saved to the result file
// - scores A corresponding array of confidence scores given a query
// - (competition) If specified, defines which competition the results are for (see VOC documentation - default 1)
//NOTES:
// The result file path and filename are determined automatically using m_results_directory as a base
void VocData : : writeClassifierResultsFile ( const string & out_dir , const string & obj_class , const ObdDatasetType dataset , const vector < ObdImage > & images , const vector < float > & scores , const int competition , const bool overwrite_ifexists )
{
CV_Assert ( images . size ( ) = = scores . size ( ) ) ;
string output_file_base , output_file ;
if ( dataset = = CV_OBD_TRAIN )
{
output_file_base = out_dir + " /comp " + integerToString ( competition ) + " _cls_ " + m_train_set + " _ " + obj_class ;
} else {
output_file_base = out_dir + " /comp " + integerToString ( competition ) + " _cls_ " + m_test_set + " _ " + obj_class ;
}
output_file = output_file_base + " .txt " ;
//check if file exists, and if so create a numbered new file instead
if ( overwrite_ifexists = = false )
{
struct stat stFileInfo ;
if ( stat ( output_file . c_str ( ) , & stFileInfo ) = = 0 )
{
string output_file_new ;
int filenum = 0 ;
do
{
+ + filenum ;
output_file_new = output_file_base + " _ " + integerToString ( filenum ) ;
output_file = output_file_new + " .txt " ;
} while ( stat ( output_file . c_str ( ) , & stFileInfo ) = = 0 ) ;
}
}
//output data to file
std : : ofstream result_file ( output_file . c_str ( ) ) ;
if ( result_file . is_open ( ) )
{
for ( size_t i = 0 ; i < images . size ( ) ; + + i )
{
result_file < < images [ i ] . id < < " " < < scores [ i ] < < endl ;
}
result_file . close ( ) ;
} else {
string err_msg = " could not open classifier results file ' " + output_file + " ' for writing. Before running for the first time, a 'results' subdirectory should be created within the VOC dataset base directory. e.g. if the VOC data is stored in /VOC/VOC2010 then the path /VOC/results must be created. " ;
CV_Error ( CV_StsError , err_msg . c_str ( ) ) ;
}
}
//---------------------------------------
//CALCULATE METRICS FROM VOC RESULTS DATA
//---------------------------------------
//Utility function to construct a VOC-standard classification results filename
//----------------------------------------------------------------------------
//INPUTS:
// - obj_class The VOC object class identifier string
// - task Specifies whether to generate a filename for the classification or detection task
// - dataset Specifies whether working with the training or test set
// - (competition) If specified, defines which competition the results are for (see VOC documentation
// default of -1 means this is set to 1 for the classification task and 3 for the detection task)
// - (number) If specified and above 0, defines which of a number of duplicate results file produced for a given set of
// of settings should be used (this number will be added as a postfix to the filename)
//NOTES:
// This is primarily useful for returning the filename of a classification file previously computed using writeClassifierResultsFile
// for example when calling calcClassifierPrecRecall
string VocData : : getResultsFilename ( const string & obj_class , const VocTask task , const ObdDatasetType dataset , const int competition , const int number )
{
if ( ( competition < 1 ) & & ( competition ! = - 1 ) )
CV_Error ( CV_StsBadArg , " competition argument should be a positive non-zero number or -1 to accept the default " ) ;
if ( ( number < 1 ) & & ( number ! = - 1 ) )
CV_Error ( CV_StsBadArg , " number argument should be a positive non-zero number or -1 to accept the default " ) ;
string dset , task_type ;
if ( dataset = = CV_OBD_TRAIN )
{
dset = m_train_set ;
} else {
dset = m_test_set ;
}
int comp = competition ;
if ( task = = CV_VOC_TASK_CLASSIFICATION )
{
task_type = " cls " ;
if ( comp = = - 1 ) comp = 1 ;
} else {
task_type = " det " ;
if ( comp = = - 1 ) comp = 3 ;
}
stringstream ss ;
if ( number < 1 )
{
ss < < " comp " < < comp < < " _ " < < task_type < < " _ " < < dset < < " _ " < < obj_class < < " .txt " ;
} else {
ss < < " comp " < < comp < < " _ " < < task_type < < " _ " < < dset < < " _ " < < obj_class < < " _ " < < number < < " .txt " ;
}
string filename = ss . str ( ) ;
return filename ;
}
//Calculate metrics for classification results
//--------------------------------------------
//INPUTS:
// - ground_truth A vector of booleans determining whether the currently tested class is present in each input image
// - scores A vector containing the similarity score for each input image (higher is more similar)
//OUTPUTS:
// - precision A vector containing the precision calculated at each datapoint of a p-r curve generated from the result set
// - recall A vector containing the recall calculated at each datapoint of a p-r curve generated from the result set
// - ap The ap metric calculated from the result set
// - (ranking) A vector of the same length as 'ground_truth' and 'scores' containing the order of the indices in both of
// these arrays when sorting by the ranking score in descending order
//NOTES:
// The result file path and filename are determined automatically using m_results_directory as a base
void VocData : : calcClassifierPrecRecall ( const string & obj_class , const vector < ObdImage > & images , const vector < float > & scores , vector < float > & precision , vector < float > & recall , float & ap , vector < size_t > & ranking )
{
vector < char > res_ground_truth ;
getClassifierGroundTruth ( obj_class , images , res_ground_truth ) ;
calcPrecRecall_impl ( res_ground_truth , scores , precision , recall , ap , ranking ) ;
}
void VocData : : calcClassifierPrecRecall ( const string & obj_class , const vector < ObdImage > & images , const vector < float > & scores , vector < float > & precision , vector < float > & recall , float & ap )
{
vector < char > res_ground_truth ;
getClassifierGroundTruth ( obj_class , images , res_ground_truth ) ;
vector < size_t > ranking ;
calcPrecRecall_impl ( res_ground_truth , scores , precision , recall , ap , ranking ) ;
}
//< Overloaded version which accepts VOC classification result file input instead of array of scores/ground truth >
//INPUTS:
// - input_file The path to the VOC standard results file to use for calculating precision/recall
// If a full path is not specified, it is assumed this file is in the VOC standard results directory
// A VOC standard filename can be retrieved (as used by writeClassifierResultsFile) by calling getClassifierResultsFilename
void VocData : : calcClassifierPrecRecall ( const string & input_file , vector < float > & precision , vector < float > & recall , float & ap , bool outputRankingFile )
{
//read in classification results file
vector < string > res_image_codes ;
vector < float > res_scores ;
string input_file_std = checkFilenamePathsep ( input_file ) ;
readClassifierResultsFile ( input_file_std , res_image_codes , res_scores ) ;
//extract the object class and dataset from the results file filename
string class_name , dataset_name ;
extractDataFromResultsFilename ( input_file_std , class_name , dataset_name ) ;
//generate the ground truth for the images extracted from the results file
vector < char > res_ground_truth ;
getClassifierGroundTruth ( class_name , res_image_codes , res_ground_truth ) ;
if ( outputRankingFile )
{
/* 1. store sorting order by score (descending) in 'order' */
vector < std : : pair < size_t , vector < float > : : const_iterator > > order ( res_scores . size ( ) ) ;
size_t n = 0 ;
for ( vector < float > : : const_iterator it = res_scores . begin ( ) ; it ! = res_scores . end ( ) ; + + it , + + n )
order [ n ] = make_pair ( n , it ) ;
std : : sort ( order . begin ( ) , order . end ( ) , orderingSorter ( ) ) ;
/* 2. save ranking results to text file */
string input_file_std1 = checkFilenamePathsep ( input_file ) ;
size_t fnamestart = input_file_std1 . rfind ( " / " ) ;
string scoregt_file_str = input_file_std1 . substr ( 0 , fnamestart + 1 ) + " scoregt_ " + class_name + " .txt " ;
std : : ofstream scoregt_file ( scoregt_file_str . c_str ( ) ) ;
if ( scoregt_file . is_open ( ) )
{
for ( size_t i = 0 ; i < res_scores . size ( ) ; + + i )
{
scoregt_file < < res_image_codes [ order [ i ] . first ] < < " " < < res_scores [ order [ i ] . first ] < < " " < < res_ground_truth [ order [ i ] . first ] < < endl ;
}
scoregt_file . close ( ) ;
} else {
string err_msg = " could not open scoregt file ' " + scoregt_file_str + " ' for writing. " ;
CV_Error ( CV_StsError , err_msg . c_str ( ) ) ;
}
}
//finally, calculate precision+recall+ap
vector < size_t > ranking ;
calcPrecRecall_impl ( res_ground_truth , res_scores , precision , recall , ap , ranking ) ;
}
//< Protected implementation of Precision-Recall calculation used by both calcClassifierPrecRecall and calcDetectorPrecRecall >
void VocData : : calcPrecRecall_impl ( const vector < char > & ground_truth , const vector < float > & scores , vector < float > & precision , vector < float > & recall , float & ap , vector < size_t > & ranking , int recall_normalization )
{
CV_Assert ( ground_truth . size ( ) = = scores . size ( ) ) ;
//add extra element for p-r at 0 recall (in case that first retrieved is positive)
vector < float > ( scores . size ( ) + 1 ) . swap ( precision ) ;
vector < float > ( scores . size ( ) + 1 ) . swap ( recall ) ;
// SORT RESULTS BY THEIR SCORE
/* 1. store sorting order in 'order' */
VocData : : getSortOrder ( scores , ranking ) ;
# ifdef PR_DEBUG
std : : ofstream scoregt_file ( " D:/pr.txt " ) ;
if ( scoregt_file . is_open ( ) )
{
for ( int i = 0 ; i < scores . size ( ) ; + + i )
{
scoregt_file < < scores [ ranking [ i ] ] < < " " < < ground_truth [ ranking [ i ] ] < < endl ;
}
scoregt_file . close ( ) ;
}
# endif
// CALCULATE PRECISION+RECALL
int retrieved_hits = 0 ;
int recall_norm ;
if ( recall_normalization ! = - 1 )
{
recall_norm = recall_normalization ;
} else {
recall_norm = ( int ) std : : count_if ( ground_truth . begin ( ) , ground_truth . end ( ) , std : : bind2nd ( std : : equal_to < char > ( ) , ( char ) 1 ) ) ;
}
ap = 0 ;
recall [ 0 ] = 0 ;
for ( size_t idx = 0 ; idx < ground_truth . size ( ) ; + + idx )
{
if ( ground_truth [ ranking [ idx ] ] ! = 0 ) + + retrieved_hits ;
precision [ idx + 1 ] = static_cast < float > ( retrieved_hits ) / static_cast < float > ( idx + 1 ) ;
recall [ idx + 1 ] = static_cast < float > ( retrieved_hits ) / static_cast < float > ( recall_norm ) ;
if ( idx = = 0 )
{
//add further point at 0 recall with the same precision value as the first computed point
precision [ idx ] = precision [ idx + 1 ] ;
}
if ( recall [ idx + 1 ] = = 1.0 )
{
//if recall = 1, then end early as all positive images have been found
recall . resize ( idx + 2 ) ;
precision . resize ( idx + 2 ) ;
break ;
}
}
/* ap calculation */
if ( m_sampled_ap = = false )
{
// FOR VOC2010+ AP IS CALCULATED FROM ALL DATAPOINTS
/* make precision monotonically decreasing for purposes of calculating ap */
vector < float > precision_monot ( precision . size ( ) ) ;
vector < float > : : iterator prec_m_it = precision_monot . begin ( ) ;
for ( vector < float > : : iterator prec_it = precision . begin ( ) ; prec_it ! = precision . end ( ) ; + + prec_it , + + prec_m_it )
{
vector < float > : : iterator max_elem ;
max_elem = std : : max_element ( prec_it , precision . end ( ) ) ;
( * prec_m_it ) = ( * max_elem ) ;
}
/* calculate ap */
for ( size_t idx = 0 ; idx < ( recall . size ( ) - 1 ) ; + + idx )
{
ap + = ( recall [ idx + 1 ] - recall [ idx ] ) * precision_monot [ idx + 1 ] + //no need to take min of prec - is monotonically decreasing
0.5f * ( recall [ idx + 1 ] - recall [ idx ] ) * std : : abs ( precision_monot [ idx + 1 ] - precision_monot [ idx ] ) ;
}
} else {
// FOR BEFORE VOC2010 AP IS CALCULATED BY SAMPLING PRECISION AT RECALL 0.0,0.1,..,1.0
for ( float recall_pos = 0.f ; recall_pos < = 1.f ; recall_pos + = 0.1f )
{
//find iterator of the precision corresponding to the first recall >= recall_pos
vector < float > : : iterator recall_it = recall . begin ( ) ;
vector < float > : : iterator prec_it = precision . begin ( ) ;
while ( ( * recall_it ) < recall_pos )
{
+ + recall_it ;
+ + prec_it ;
if ( recall_it = = recall . end ( ) ) break ;
}
/* if no recall >= recall_pos found, this level of recall is never reached so stop adding to ap */
if ( recall_it = = recall . end ( ) ) break ;
/* if the prec_it is valid, compute the max precision at this level of recall or higher */
vector < float > : : iterator max_prec = std : : max_element ( prec_it , precision . end ( ) ) ;
ap + = ( * max_prec ) / 11 ;
}
}
}
/* functions for calculating confusion matrix rows */
//Calculate rows of a confusion matrix
//------------------------------------
//INPUTS:
// - obj_class The VOC object class identifier string for the confusion matrix row to compute
// - images An array of ObdImage containing the images to use for the computation
// - scores A corresponding array of confidence scores for the presence of obj_class in each image
// - cond Defines whether to use a cut off point based on recall (CV_VOC_CCOND_RECALL) or score
// (CV_VOC_CCOND_SCORETHRESH) the latter is useful for classifier detections where positive
// values are positive detections and negative values are negative detections
// - threshold Threshold value for cond. In case of CV_VOC_CCOND_RECALL, is proportion recall (e.g. 0.5).
// In the case of CV_VOC_CCOND_SCORETHRESH is the value above which to count results.
//OUTPUTS:
// - output_headers An output vector of object class headers for the confusion matrix row
// - output_values An output vector of values for the confusion matrix row corresponding to the classes
// defined in output_headers
//NOTES:
// The methodology used by the classifier version of this function is that true positives have a single unit
// added to the obj_class column in the confusion matrix row, whereas false positives have a single unit
// distributed in proportion between all the columns in the confusion matrix row corresponding to the objects
// present in the image.
void VocData : : calcClassifierConfMatRow ( const string & obj_class , const vector < ObdImage > & images , const vector < float > & scores , const VocConfCond cond , const float threshold , vector < string > & output_headers , vector < float > & output_values )
{
CV_Assert ( images . size ( ) = = scores . size ( ) ) ;
// SORT RESULTS BY THEIR SCORE
/* 1. store sorting order in 'ranking' */
vector < size_t > ranking ;
VocData : : getSortOrder ( scores , ranking ) ;
// CALCULATE CONFUSION MATRIX ENTRIES
/* prepare object category headers */
output_headers = m_object_classes ;
vector < float > ( output_headers . size ( ) , 0.0 ) . swap ( output_values ) ;
/* find the index of the target object class in the headers for later use */
int target_idx ;
{
vector < string > : : iterator target_idx_it = std : : find ( output_headers . begin ( ) , output_headers . end ( ) , obj_class ) ;
/* if the target class can not be found, raise an exception */
if ( target_idx_it = = output_headers . end ( ) )
{
string err_msg = " could not find the target object class ' " + obj_class + " ' in list of valid classes. " ;
CV_Error ( CV_StsError , err_msg . c_str ( ) ) ;
}
/* convert iterator to index */
target_idx = ( int ) std : : distance ( output_headers . begin ( ) , target_idx_it ) ;
}
/* prepare variables related to calculating recall if using the recall threshold */
int retrieved_hits = 0 ;
int total_relevant = 0 ;
if ( cond = = CV_VOC_CCOND_RECALL )
{
vector < char > ground_truth ;
/* in order to calculate the total number of relevant images for normalization of recall
it ' s necessary to extract the ground truth for the images under consideration */
getClassifierGroundTruth ( obj_class , images , ground_truth ) ;
total_relevant = ( int ) std : : count_if ( ground_truth . begin ( ) , ground_truth . end ( ) , std : : bind2nd ( std : : equal_to < char > ( ) , ( char ) 1 ) ) ;
}
/* iterate through images */
vector < ObdObject > img_objects ;
vector < VocObjectData > img_object_data ;
int total_images = 0 ;
for ( size_t image_idx = 0 ; image_idx < images . size ( ) ; + + image_idx )
{
/* if using the score as the break condition, check for it now */
if ( cond = = CV_VOC_CCOND_SCORETHRESH )
{
if ( scores [ ranking [ image_idx ] ] < = threshold ) break ;
}
/* if continuing for this iteration, increment the image counter for later normalization */
+ + total_images ;
/* for each image retrieve the objects contained */
getObjects ( images [ ranking [ image_idx ] ] . id , img_objects , img_object_data ) ;
//check if the tested for object class is present
if ( getClassifierGroundTruthImage ( obj_class , images [ ranking [ image_idx ] ] . id ) )
{
//if the target class is present, assign fully to the target class element in the confusion matrix row
output_values [ target_idx ] + = 1.0 ;
if ( cond = = CV_VOC_CCOND_RECALL ) + + retrieved_hits ;
} else {
//first delete all objects marked as difficult
for ( size_t obj_idx = 0 ; obj_idx < img_objects . size ( ) ; + + obj_idx )
{
if ( img_object_data [ obj_idx ] . difficult = = true )
{
vector < ObdObject > : : iterator it1 = img_objects . begin ( ) ;
std : : advance ( it1 , obj_idx ) ;
img_objects . erase ( it1 ) ;
vector < VocObjectData > : : iterator it2 = img_object_data . begin ( ) ;
std : : advance ( it2 , obj_idx ) ;
img_object_data . erase ( it2 ) ;
- - obj_idx ;
}
}
//if the target class is not present, add values to the confusion matrix row in equal proportions to all objects present in the image
for ( size_t obj_idx = 0 ; obj_idx < img_objects . size ( ) ; + + obj_idx )
{
//find the index of the currently considered object
vector < string > : : iterator class_idx_it = std : : find ( output_headers . begin ( ) , output_headers . end ( ) , img_objects [ obj_idx ] . object_class ) ;
//if the class name extracted from the ground truth file could not be found in the list of available classes, raise an exception
if ( class_idx_it = = output_headers . end ( ) )
{
string err_msg = " could not find object class ' " + img_objects [ obj_idx ] . object_class + " ' specified in the ground truth file of ' " + images [ ranking [ image_idx ] ] . id + " 'in list of valid classes. " ;
CV_Error ( CV_StsError , err_msg . c_str ( ) ) ;
}
/* convert iterator to index */
int class_idx = ( int ) std : : distance ( output_headers . begin ( ) , class_idx_it ) ;
//add to confusion matrix row in proportion
output_values [ class_idx ] + = 1.f / static_cast < float > ( img_objects . size ( ) ) ;
}
}
//check break conditions if breaking on certain level of recall
if ( cond = = CV_VOC_CCOND_RECALL )
{
if ( static_cast < float > ( retrieved_hits ) / static_cast < float > ( total_relevant ) > = threshold ) break ;
}
}
/* finally, normalize confusion matrix row */
for ( vector < float > : : iterator it = output_values . begin ( ) ; it < output_values . end ( ) ; + + it )
{
( * it ) / = static_cast < float > ( total_images ) ;
}
}
// NOTE: doesn't ignore repeated detections
void VocData : : calcDetectorConfMatRow ( const string & obj_class , const ObdDatasetType dataset , const vector < ObdImage > & images , const vector < vector < float > > & scores , const vector < vector < Rect > > & bounding_boxes , const VocConfCond cond , const float threshold , vector < string > & output_headers , vector < float > & output_values , bool ignore_difficult )
{
CV_Assert ( images . size ( ) = = scores . size ( ) ) ;
CV_Assert ( images . size ( ) = = bounding_boxes . size ( ) ) ;
//collapse scores and ground_truth vectors into 1D vectors to allow ranking
/* define final flat vectors */
vector < string > images_flat ;
vector < float > scores_flat ;
vector < Rect > bounding_boxes_flat ;
{
/* first count how many objects to allow preallocation */
int obj_count = 0 ;
CV_Assert ( scores . size ( ) = = bounding_boxes . size ( ) ) ;
for ( size_t img_idx = 0 ; img_idx < scores . size ( ) ; + + img_idx )
{
CV_Assert ( scores [ img_idx ] . size ( ) = = bounding_boxes [ img_idx ] . size ( ) ) ;
for ( size_t obj_idx = 0 ; obj_idx < scores [ img_idx ] . size ( ) ; + + obj_idx )
{
+ + obj_count ;
}
}
/* preallocate vectors */
images_flat . resize ( obj_count ) ;
scores_flat . resize ( obj_count ) ;
bounding_boxes_flat . resize ( obj_count ) ;
/* now copy across to preallocated vectors */
int flat_pos = 0 ;
for ( size_t img_idx = 0 ; img_idx < scores . size ( ) ; + + img_idx )
{
for ( size_t obj_idx = 0 ; obj_idx < scores [ img_idx ] . size ( ) ; + + obj_idx )
{
images_flat [ flat_pos ] = images [ img_idx ] . id ;
scores_flat [ flat_pos ] = scores [ img_idx ] [ obj_idx ] ;
bounding_boxes_flat [ flat_pos ] = bounding_boxes [ img_idx ] [ obj_idx ] ;
+ + flat_pos ;
}
}
}
// SORT RESULTS BY THEIR SCORE
/* 1. store sorting order in 'ranking' */
vector < size_t > ranking ;
VocData : : getSortOrder ( scores_flat , ranking ) ;
// CALCULATE CONFUSION MATRIX ENTRIES
/* prepare object category headers */
output_headers = m_object_classes ;
output_headers . push_back ( " background " ) ;
vector < float > ( output_headers . size ( ) , 0.0 ) . swap ( output_values ) ;
/* prepare variables related to calculating recall if using the recall threshold */
int retrieved_hits = 0 ;
int total_relevant = 0 ;
if ( cond = = CV_VOC_CCOND_RECALL )
{
// vector<char> ground_truth;
// /* in order to calculate the total number of relevant images for normalization of recall
// it's necessary to extract the ground truth for the images under consideration */
// getClassifierGroundTruth(obj_class, images, ground_truth);
// total_relevant = std::count_if(ground_truth.begin(),ground_truth.end(),std::bind2nd(std::equal_to<bool>(),true));
/* calculate the total number of objects in the ground truth for the current dataset */
vector < ObdImage > gt_images ;
vector < char > gt_object_present ;
getClassImages ( obj_class , dataset , gt_images , gt_object_present ) ;
for ( size_t image_idx = 0 ; image_idx < gt_images . size ( ) ; + + image_idx )
{
vector < ObdObject > gt_img_objects ;
vector < VocObjectData > gt_img_object_data ;
getObjects ( gt_images [ image_idx ] . id , gt_img_objects , gt_img_object_data ) ;
for ( size_t obj_idx = 0 ; obj_idx < gt_img_objects . size ( ) ; + + obj_idx )
{
if ( gt_img_objects [ obj_idx ] . object_class = = obj_class )
{
if ( ( gt_img_object_data [ obj_idx ] . difficult = = false ) | | ( ignore_difficult = = false ) )
+ + total_relevant ;
}
}
}
}
/* iterate through objects */
vector < ObdObject > img_objects ;
vector < VocObjectData > img_object_data ;
int total_objects = 0 ;
for ( size_t image_idx = 0 ; image_idx < images . size ( ) ; + + image_idx )
{
/* if using the score as the break condition, check for it now */
if ( cond = = CV_VOC_CCOND_SCORETHRESH )
{
if ( scores_flat [ ranking [ image_idx ] ] < = threshold ) break ;
}
/* increment the image counter for later normalization */
+ + total_objects ;
/* for each image retrieve the objects contained */
getObjects ( images [ ranking [ image_idx ] ] . id , img_objects , img_object_data ) ;
//find the ground truth object which has the highest overlap score with the detected object
float maxov = - 1.0 ;
int max_gt_obj_idx = - 1 ;
//-- for each detected object iterate through objects present in ground truth --
for ( size_t gt_obj_idx = 0 ; gt_obj_idx < img_objects . size ( ) ; + + gt_obj_idx )
{
//check difficulty flag
if ( ignore_difficult | | ( img_object_data [ gt_obj_idx ] . difficult = = false ) )
{
//if the class matches, then check if the detected object and ground truth object overlap by a sufficient margin
float ov = testBoundingBoxesForOverlap ( bounding_boxes_flat [ ranking [ image_idx ] ] , img_objects [ gt_obj_idx ] . boundingBox ) ;
if ( ov ! = - 1.f )
{
//if all conditions are met store the overlap score and index (as objects are assigned to the highest scoring match)
if ( ov > maxov )
{
maxov = ov ;
max_gt_obj_idx = ( int ) gt_obj_idx ;
}
}
}
}
//assign to appropriate object class if an object was detected
if ( maxov ! = - 1.0 )
{
//find the index of the currently considered object
vector < string > : : iterator class_idx_it = std : : find ( output_headers . begin ( ) , output_headers . end ( ) , img_objects [ max_gt_obj_idx ] . object_class ) ;
//if the class name extracted from the ground truth file could not be found in the list of available classes, raise an exception
if ( class_idx_it = = output_headers . end ( ) )
{
string err_msg = " could not find object class ' " + img_objects [ max_gt_obj_idx ] . object_class + " ' specified in the ground truth file of ' " + images [ ranking [ image_idx ] ] . id + " 'in list of valid classes. " ;
CV_Error ( CV_StsError , err_msg . c_str ( ) ) ;
}
/* convert iterator to index */
int class_idx = ( int ) std : : distance ( output_headers . begin ( ) , class_idx_it ) ;
//add to confusion matrix row in proportion
output_values [ class_idx ] + = 1.0 ;
} else {
//otherwise assign to background class
output_values [ output_values . size ( ) - 1 ] + = 1.0 ;
}
//check break conditions if breaking on certain level of recall
if ( cond = = CV_VOC_CCOND_RECALL )
{
if ( static_cast < float > ( retrieved_hits ) / static_cast < float > ( total_relevant ) > = threshold ) break ;
}
}
/* finally, normalize confusion matrix row */
for ( vector < float > : : iterator it = output_values . begin ( ) ; it < output_values . end ( ) ; + + it )
{
( * it ) / = static_cast < float > ( total_objects ) ;
}
}
//Save Precision-Recall results to a p-r curve in GNUPlot format
//--------------------------------------------------------------
//INPUTS:
// - output_file The file to which to save the GNUPlot data file. If only a filename is specified, the data
// file is saved to the standard VOC results directory.
// - precision Vector of precisions as returned from calcClassifier/DetectorPrecRecall
// - recall Vector of recalls as returned from calcClassifier/DetectorPrecRecall
// - ap ap as returned from calcClassifier/DetectorPrecRecall
// - (title) Title to use for the plot (if not specified, just the ap is printed as the title)
// This also specifies the filename of the output file if printing to pdf
// - (plot_type) Specifies whether to instruct GNUPlot to save to a PDF file (CV_VOC_PLOT_PDF) or directly
// to screen (CV_VOC_PLOT_SCREEN) in the datafile
//NOTES:
// The GNUPlot data file can be executed using GNUPlot from the commandline in the following way:
// >> GNUPlot <output_file>
// This will then display the p-r curve on the screen or save it to a pdf file depending on plot_type
void VocData : : savePrecRecallToGnuplot ( const string & output_file , const vector < float > & precision , const vector < float > & recall , const float ap , const string title , const VocPlotType plot_type )
{
string output_file_std = checkFilenamePathsep ( output_file ) ;
//if no directory is specified, by default save the output file in the results directory
// if (output_file_std.find("/") == output_file_std.npos)
// {
// output_file_std = m_results_directory + output_file_std;
// }
std : : ofstream plot_file ( output_file_std . c_str ( ) ) ;
if ( plot_file . is_open ( ) )
{
plot_file < < " set xrange [0:1] " < < endl ;
plot_file < < " set yrange [0:1] " < < endl ;
plot_file < < " set size square " < < endl ;
string title_text = title ;
if ( title_text . size ( ) = = 0 ) title_text = " Precision-Recall Curve " ;
plot_file < < " set title \" " < < title_text < < " (ap: " < < ap < < " ) \" " < < endl ;
plot_file < < " set xlabel \" Recall \" " < < endl ;
plot_file < < " set ylabel \" Precision \" " < < endl ;
plot_file < < " set style data lines " < < endl ;
plot_file < < " set nokey " < < endl ;
if ( plot_type = = CV_VOC_PLOT_PNG )
{
plot_file < < " set terminal png " < < endl ;
string pdf_filename ;
if ( title . size ( ) ! = 0 )
{
pdf_filename = title ;
} else {
pdf_filename = " prcurve " ;
}
plot_file < < " set out \" " < < title < < " .png \" " < < endl ;
}
plot_file < < " plot \" - \" using 1:2 " < < endl ;
plot_file < < " # X Y " < < endl ;
CV_Assert ( precision . size ( ) = = recall . size ( ) ) ;
for ( size_t i = 0 ; i < precision . size ( ) ; + + i )
{
plot_file < < " " < < recall [ i ] < < " " < < precision [ i ] < < endl ;
}
plot_file < < " end " < < endl ;
if ( plot_type = = CV_VOC_PLOT_SCREEN )
{
plot_file < < " pause -1 " < < endl ;
}
plot_file . close ( ) ;
} else {
string err_msg = " could not open plot file ' " + output_file_std + " ' for writing. " ;
CV_Error ( CV_StsError , err_msg . c_str ( ) ) ;
}
}
void VocData : : readClassifierGroundTruth ( const string & obj_class , const ObdDatasetType dataset , vector < ObdImage > & images , vector < char > & object_present )
{
images . clear ( ) ;
string gtFilename = m_class_imageset_path ;
gtFilename . replace ( gtFilename . find ( " %s " ) , 2 , obj_class ) ;
if ( dataset = = CV_OBD_TRAIN )
{
gtFilename . replace ( gtFilename . find ( " %s " ) , 2 , m_train_set ) ;
} else {
gtFilename . replace ( gtFilename . find ( " %s " ) , 2 , m_test_set ) ;
}
vector < string > image_codes ;
readClassifierGroundTruth ( gtFilename , image_codes , object_present ) ;
convertImageCodesToObdImages ( image_codes , images ) ;
}
void VocData : : readClassifierResultsFile ( const std : : string & input_file , vector < ObdImage > & images , vector < float > & scores )
{
images . clear ( ) ;
string input_file_std = checkFilenamePathsep ( input_file ) ;
//if no directory is specified, by default search for the input file in the results directory
// if (input_file_std.find("/") == input_file_std.npos)
// {
// input_file_std = m_results_directory + input_file_std;
// }
vector < string > image_codes ;
readClassifierResultsFile ( input_file_std , image_codes , scores ) ;
convertImageCodesToObdImages ( image_codes , images ) ;
}
void VocData : : readDetectorResultsFile ( const string & input_file , vector < ObdImage > & images , vector < vector < float > > & scores , vector < vector < Rect > > & bounding_boxes )
{
images . clear ( ) ;
string input_file_std = checkFilenamePathsep ( input_file ) ;
//if no directory is specified, by default search for the input file in the results directory
// if (input_file_std.find("/") == input_file_std.npos)
// {
// input_file_std = m_results_directory + input_file_std;
// }
vector < string > image_codes ;
readDetectorResultsFile ( input_file_std , image_codes , scores , bounding_boxes ) ;
convertImageCodesToObdImages ( image_codes , images ) ;
}
const vector < string > & VocData : : getObjectClasses ( )
{
return m_object_classes ;
}
//string VocData::getResultsDirectory()
//{
// return m_results_directory;
//}
//---------------------------------------------------------
// Protected Functions ------------------------------------
//---------------------------------------------------------
static string getVocName ( const string & vocPath )
{
size_t found = vocPath . rfind ( ' / ' ) ;
if ( found = = string : : npos )
{
found = vocPath . rfind ( ' \\ ' ) ;
if ( found = = string : : npos )
return vocPath ;
}
return vocPath . substr ( found + 1 , vocPath . size ( ) - found ) ;
}
void VocData : : initVoc ( const string & vocPath , const bool useTestDataset )
{
initVoc2007to2010 ( vocPath , useTestDataset ) ;
}
//Initialize file paths and settings for the VOC 2010 dataset
//-----------------------------------------------------------
void VocData : : initVoc2007to2010 ( const string & vocPath , const bool useTestDataset )
{
//check format of root directory and modify if necessary
m_vocName = getVocName ( vocPath ) ;
CV_Assert ( ! m_vocName . compare ( " VOC2007 " ) | | ! m_vocName . compare ( " VOC2008 " ) | |
! m_vocName . compare ( " VOC2009 " ) | | ! m_vocName . compare ( " VOC2010 " ) ) ;
m_vocPath = checkFilenamePathsep ( vocPath , true ) ;
if ( useTestDataset )
{
m_train_set = " trainval " ;
m_test_set = " test " ;
} else {
m_train_set = " train " ;
m_test_set = " val " ;
}
// initialize main classification/detection challenge paths
m_annotation_path = m_vocPath + " /Annotations/%s.xml " ;
m_image_path = m_vocPath + " /JPEGImages/%s.jpg " ;
m_imageset_path = m_vocPath + " /ImageSets/Main/%s.txt " ;
m_class_imageset_path = m_vocPath + " /ImageSets/Main/%s_%s.txt " ;
//define available object_classes for VOC2010 dataset
m_object_classes . push_back ( " aeroplane " ) ;
m_object_classes . push_back ( " bicycle " ) ;
m_object_classes . push_back ( " bird " ) ;
m_object_classes . push_back ( " boat " ) ;
m_object_classes . push_back ( " bottle " ) ;
m_object_classes . push_back ( " bus " ) ;
m_object_classes . push_back ( " car " ) ;
m_object_classes . push_back ( " cat " ) ;
m_object_classes . push_back ( " chair " ) ;
m_object_classes . push_back ( " cow " ) ;
m_object_classes . push_back ( " diningtable " ) ;
m_object_classes . push_back ( " dog " ) ;
m_object_classes . push_back ( " horse " ) ;
m_object_classes . push_back ( " motorbike " ) ;
m_object_classes . push_back ( " person " ) ;
m_object_classes . push_back ( " pottedplant " ) ;
m_object_classes . push_back ( " sheep " ) ;
m_object_classes . push_back ( " sofa " ) ;
m_object_classes . push_back ( " train " ) ;
m_object_classes . push_back ( " tvmonitor " ) ;
m_min_overlap = 0.5 ;
//up until VOC 2010, ap was calculated by sampling p-r curve, not taking complete curve
m_sampled_ap = ( ( m_vocName = = " VOC2007 " ) | | ( m_vocName = = " VOC2008 " ) | | ( m_vocName = = " VOC2009 " ) ) ;
}
//Read a VOC classification ground truth text file for a given object class and dataset
//-------------------------------------------------------------------------------------
//INPUTS:
// - filename The path of the text file to read
//OUTPUTS:
// - image_codes VOC image codes extracted from the GT file in the form 20XX_XXXXXX where the first four
// digits specify the year of the dataset, and the last group specifies a unique ID
// - object_present For each image in the 'image_codes' array, specifies whether the object class described
// in the loaded GT file is present or not
void VocData : : readClassifierGroundTruth ( const string & filename , vector < string > & image_codes , vector < char > & object_present )
{
image_codes . clear ( ) ;
object_present . clear ( ) ;
std : : ifstream gtfile ( filename . c_str ( ) ) ;
if ( ! gtfile . is_open ( ) )
{
string err_msg = " could not open VOC ground truth textfile ' " + filename + " '. " ;
CV_Error ( CV_StsError , err_msg . c_str ( ) ) ;
}
string line ;
string image ;
int obj_present ;
while ( ! gtfile . eof ( ) )
{
std : : getline ( gtfile , line ) ;
std : : istringstream iss ( line ) ;
iss > > image > > obj_present ;
if ( ! iss . fail ( ) )
{
image_codes . push_back ( image ) ;
object_present . push_back ( obj_present = = 1 ) ;
} else {
if ( ! gtfile . eof ( ) ) CV_Error ( CV_StsParseError , " error parsing VOC ground truth textfile. " ) ;
}
}
gtfile . close ( ) ;
}
void VocData : : readClassifierResultsFile ( const string & input_file , vector < string > & image_codes , vector < float > & scores )
{
//check if results file exists
std : : ifstream result_file ( input_file . c_str ( ) ) ;
if ( result_file . is_open ( ) )
{
string line ;
string image ;
float score ;
//read in the results file
while ( ! result_file . eof ( ) )
{
std : : getline ( result_file , line ) ;
std : : istringstream iss ( line ) ;
iss > > image > > score ;
if ( ! iss . fail ( ) )
{
image_codes . push_back ( image ) ;
scores . push_back ( score ) ;
} else {
if ( ! result_file . eof ( ) ) CV_Error ( CV_StsParseError , " error parsing VOC classifier results file. " ) ;
}
}
result_file . close ( ) ;
} else {
string err_msg = " could not open classifier results file ' " + input_file + " ' for reading. " ;
CV_Error ( CV_StsError , err_msg . c_str ( ) ) ;
}
}
void VocData : : readDetectorResultsFile ( const string & input_file , vector < string > & image_codes , vector < vector < float > > & scores , vector < vector < Rect > > & bounding_boxes )
{
image_codes . clear ( ) ;
scores . clear ( ) ;
bounding_boxes . clear ( ) ;
//check if results file exists
std : : ifstream result_file ( input_file . c_str ( ) ) ;
if ( result_file . is_open ( ) )
{
string line ;
string image ;
Rect bounding_box ;
float score ;
//read in the results file
while ( ! result_file . eof ( ) )
{
std : : getline ( result_file , line ) ;
std : : istringstream iss ( line ) ;
iss > > image > > score > > bounding_box . x > > bounding_box . y > > bounding_box . width > > bounding_box . height ;
if ( ! iss . fail ( ) )
{
//convert right and bottom positions to width and height
bounding_box . width - = bounding_box . x ;
bounding_box . height - = bounding_box . y ;
//convert to 0-indexing
bounding_box . x - = 1 ;
bounding_box . y - = 1 ;
//store in output vectors
/* first check if the current image code has been seen before */
vector < string > : : iterator image_codes_it = std : : find ( image_codes . begin ( ) , image_codes . end ( ) , image ) ;
if ( image_codes_it = = image_codes . end ( ) )
{
image_codes . push_back ( image ) ;
vector < float > score_vect ( 1 ) ;
score_vect [ 0 ] = score ;
scores . push_back ( score_vect ) ;
vector < Rect > bounding_box_vect ( 1 ) ;
bounding_box_vect [ 0 ] = bounding_box ;
bounding_boxes . push_back ( bounding_box_vect ) ;
} else {
/* if the image index has been seen before, add the current object below it in the 2D arrays */
int image_idx = ( int ) std : : distance ( image_codes . begin ( ) , image_codes_it ) ;
scores [ image_idx ] . push_back ( score ) ;
bounding_boxes [ image_idx ] . push_back ( bounding_box ) ;
}
} else {
if ( ! result_file . eof ( ) ) CV_Error ( CV_StsParseError , " error parsing VOC detector results file. " ) ;
}
}
result_file . close ( ) ;
} else {
string err_msg = " could not open detector results file ' " + input_file + " ' for reading. " ;
CV_Error ( CV_StsError , err_msg . c_str ( ) ) ;
}
}
//Read a VOC annotation xml file for a given image
//------------------------------------------------
//INPUTS:
// - filename The path of the xml file to read
//OUTPUTS:
// - objects Array of VocObject describing all object instances present in the given image
void VocData : : extractVocObjects ( const string filename , vector < ObdObject > & objects , vector < VocObjectData > & object_data )
{
# ifdef PR_DEBUG
int block = 1 ;
cout < < " SAMPLE VOC OBJECT EXTRACTION for " < < filename < < " : " < < endl ;
# endif
objects . clear ( ) ;
object_data . clear ( ) ;
string contents , object_contents , tag_contents ;
readFileToString ( filename , contents ) ;
//keep on extracting 'object' blocks until no more can be found
if ( extractXMLBlock ( contents , " annotation " , 0 , contents ) ! = - 1 )
{
int searchpos = 0 ;
searchpos = extractXMLBlock ( contents , " object " , searchpos , object_contents ) ;
while ( searchpos ! = - 1 )
{
# ifdef PR_DEBUG
cout < < " SEARCHPOS: " < < searchpos < < endl ;
cout < < " start block " < < block < < " --------- " < < endl ;
cout < < object_contents < < endl ;
cout < < " end block " < < block < < " ----------- " < < endl ;
+ + block ;
# endif
ObdObject object ;
VocObjectData object_d ;
//object class -------------
if ( extractXMLBlock ( object_contents , " name " , 0 , tag_contents ) = = - 1 ) CV_Error ( CV_StsError , " missing <name> tag in object definition of ' " + filename + " ' " ) ;
object . object_class . swap ( tag_contents ) ;
//object bounding box -------------
int xmax , xmin , ymax , ymin ;
if ( extractXMLBlock ( object_contents , " xmax " , 0 , tag_contents ) = = - 1 ) CV_Error ( CV_StsError , " missing <xmax> tag in object definition of ' " + filename + " ' " ) ;
xmax = stringToInteger ( tag_contents ) ;
if ( extractXMLBlock ( object_contents , " xmin " , 0 , tag_contents ) = = - 1 ) CV_Error ( CV_StsError , " missing <xmin> tag in object definition of ' " + filename + " ' " ) ;
xmin = stringToInteger ( tag_contents ) ;
if ( extractXMLBlock ( object_contents , " ymax " , 0 , tag_contents ) = = - 1 ) CV_Error ( CV_StsError , " missing <ymax> tag in object definition of ' " + filename + " ' " ) ;
ymax = stringToInteger ( tag_contents ) ;
if ( extractXMLBlock ( object_contents , " ymin " , 0 , tag_contents ) = = - 1 ) CV_Error ( CV_StsError , " missing <ymin> tag in object definition of ' " + filename + " ' " ) ;
ymin = stringToInteger ( tag_contents ) ;
object . boundingBox . x = xmin - 1 ; //convert to 0-based indexing
object . boundingBox . width = xmax - xmin ;
object . boundingBox . y = ymin - 1 ;
object . boundingBox . height = ymax - ymin ;
CV_Assert ( xmin ! = 0 ) ;
CV_Assert ( xmax > xmin ) ;
CV_Assert ( ymin ! = 0 ) ;
CV_Assert ( ymax > ymin ) ;
//object tags -------------
if ( extractXMLBlock ( object_contents , " difficult " , 0 , tag_contents ) ! = - 1 )
{
object_d . difficult = ( tag_contents = = " 1 " ) ;
} else object_d . difficult = false ;
if ( extractXMLBlock ( object_contents , " occluded " , 0 , tag_contents ) ! = - 1 )
{
object_d . occluded = ( tag_contents = = " 1 " ) ;
} else object_d . occluded = false ;
if ( extractXMLBlock ( object_contents , " truncated " , 0 , tag_contents ) ! = - 1 )
{
object_d . truncated = ( tag_contents = = " 1 " ) ;
} else object_d . truncated = false ;
if ( extractXMLBlock ( object_contents , " pose " , 0 , tag_contents ) ! = - 1 )
{
if ( tag_contents = = " Frontal " ) object_d . pose = CV_VOC_POSE_FRONTAL ;
if ( tag_contents = = " Rear " ) object_d . pose = CV_VOC_POSE_REAR ;
if ( tag_contents = = " Left " ) object_d . pose = CV_VOC_POSE_LEFT ;
if ( tag_contents = = " Right " ) object_d . pose = CV_VOC_POSE_RIGHT ;
}
//add to array of objects
objects . push_back ( object ) ;
object_data . push_back ( object_d ) ;
//extract next 'object' block from file if it exists
searchpos = extractXMLBlock ( contents , " object " , searchpos , object_contents ) ;
}
}
}
//Converts an image identifier string in the format YYYY_XXXXXX to a single index integer of form XXXXXXYYYY
//where Y represents a year and returns the image path
//----------------------------------------------------------------------------------------------------------
string VocData : : getImagePath ( const string & input_str )
{
string path = m_image_path ;
path . replace ( path . find ( " %s " ) , 2 , input_str ) ;
return path ;
}
//Tests two boundary boxes for overlap (using the intersection over union metric) and returns the overlap if the objects
//defined by the two bounding boxes are considered to be matched according to the criterion outlined in
//the VOC documentation [namely intersection/union > some threshold] otherwise returns -1.0 (no match)
//----------------------------------------------------------------------------------------------------------
float VocData : : testBoundingBoxesForOverlap ( const Rect detection , const Rect ground_truth )
{
int detection_x2 = detection . x + detection . width ;
int detection_y2 = detection . y + detection . height ;
int ground_truth_x2 = ground_truth . x + ground_truth . width ;
int ground_truth_y2 = ground_truth . y + ground_truth . height ;
//first calculate the boundaries of the intersection of the rectangles
int intersection_x = std : : max ( detection . x , ground_truth . x ) ; //rightmost left
int intersection_y = std : : max ( detection . y , ground_truth . y ) ; //bottommost top
int intersection_x2 = std : : min ( detection_x2 , ground_truth_x2 ) ; //leftmost right
int intersection_y2 = std : : min ( detection_y2 , ground_truth_y2 ) ; //topmost bottom
//then calculate the width and height of the intersection rect
int intersection_width = intersection_x2 - intersection_x + 1 ;
int intersection_height = intersection_y2 - intersection_y + 1 ;
//if there is no overlap then return false straight away
if ( ( intersection_width < = 0 ) | | ( intersection_height < = 0 ) ) return - 1.0 ;
//otherwise calculate the intersection
int intersection_area = intersection_width * intersection_height ;
//now calculate the union
int union_area = ( detection . width + 1 ) * ( detection . height + 1 ) + ( ground_truth . width + 1 ) * ( ground_truth . height + 1 ) - intersection_area ;
//calculate the intersection over union and use as threshold as per VOC documentation
float overlap = static_cast < float > ( intersection_area ) / static_cast < float > ( union_area ) ;
if ( overlap > m_min_overlap )
{
return overlap ;
} else {
return - 1.0 ;
}
}
//Extracts the object class and dataset from the filename of a VOC standard results text file, which takes
//the format 'comp<n>_{cls/det}_<dataset>_<objclass>.txt'
//----------------------------------------------------------------------------------------------------------
void VocData : : extractDataFromResultsFilename ( const string & input_file , string & class_name , string & dataset_name )
{
string input_file_std = checkFilenamePathsep ( input_file ) ;
size_t fnamestart = input_file_std . rfind ( " / " ) ;
size_t fnameend = input_file_std . rfind ( " .txt " ) ;
if ( ( fnamestart = = input_file_std . npos ) | | ( fnameend = = input_file_std . npos ) )
CV_Error ( CV_StsError , " Could not extract filename of results file. " ) ;
+ + fnamestart ;
if ( fnamestart > = fnameend )
CV_Error ( CV_StsError , " Could not extract filename of results file. " ) ;
//extract dataset and class names, triggering exception if the filename format is not correct
string filename = input_file_std . substr ( fnamestart , fnameend - fnamestart ) ;
size_t datasetstart = filename . find ( " _ " ) ;
datasetstart = filename . find ( " _ " , datasetstart + 1 ) ;
size_t classstart = filename . find ( " _ " , datasetstart + 1 ) ;
//allow for appended index after a further '_' by discarding this part if it exists
size_t classend = filename . find ( " _ " , classstart + 1 ) ;
if ( classend = = filename . npos ) classend = filename . size ( ) ;
if ( ( datasetstart = = filename . npos ) | | ( classstart = = filename . npos ) )
CV_Error ( CV_StsError , " Error parsing results filename. Is it in standard format of 'comp<n>_{cls/det}_<dataset>_<objclass>.txt'? " ) ;
+ + datasetstart ;
+ + classstart ;
if ( ( ( datasetstart - classstart ) < 1 ) | | ( ( classend - datasetstart ) < 1 ) )
CV_Error ( CV_StsError , " Error parsing results filename. Is it in standard format of 'comp<n>_{cls/det}_<dataset>_<objclass>.txt'? " ) ;
dataset_name = filename . substr ( datasetstart , classstart - datasetstart - 1 ) ;
class_name = filename . substr ( classstart , classend - classstart ) ;
}
bool VocData : : getClassifierGroundTruthImage ( const string & obj_class , const string & id )
{
/* if the classifier ground truth data for all images of the current class has not been loaded yet, load it now */
if ( m_classifier_gt_all_ids . empty ( ) | | ( m_classifier_gt_class ! = obj_class ) )
{
m_classifier_gt_all_ids . clear ( ) ;
m_classifier_gt_all_present . clear ( ) ;
m_classifier_gt_class = obj_class ;
for ( int i = 0 ; i < 2 ; + + i ) //run twice (once over test set and once over training set)
{
//generate the filename of the classification ground-truth textfile for the object class
string gtFilename = m_class_imageset_path ;
gtFilename . replace ( gtFilename . find ( " %s " ) , 2 , obj_class ) ;
if ( i = = 0 )
{
gtFilename . replace ( gtFilename . find ( " %s " ) , 2 , m_train_set ) ;
} else {
gtFilename . replace ( gtFilename . find ( " %s " ) , 2 , m_test_set ) ;
}
//parse the ground truth file, storing in two separate vectors
//for the image code and the ground truth value
vector < string > image_codes ;
vector < char > object_present ;
readClassifierGroundTruth ( gtFilename , image_codes , object_present ) ;
m_classifier_gt_all_ids . insert ( m_classifier_gt_all_ids . end ( ) , image_codes . begin ( ) , image_codes . end ( ) ) ;
m_classifier_gt_all_present . insert ( m_classifier_gt_all_present . end ( ) , object_present . begin ( ) , object_present . end ( ) ) ;
CV_Assert ( m_classifier_gt_all_ids . size ( ) = = m_classifier_gt_all_present . size ( ) ) ;
}
}
//search for the image code
vector < string > : : iterator it = find ( m_classifier_gt_all_ids . begin ( ) , m_classifier_gt_all_ids . end ( ) , id ) ;
if ( it ! = m_classifier_gt_all_ids . end ( ) )
{
//image found, so return corresponding ground truth
return m_classifier_gt_all_present [ std : : distance ( m_classifier_gt_all_ids . begin ( ) , it ) ] ! = 0 ;
} else {
string err_msg = " could not find classifier ground truth for image ' " + id + " ' and class ' " + obj_class + " ' " ;
CV_Error ( CV_StsError , err_msg . c_str ( ) ) ;
}
return true ;
}
//-------------------------------------------------------------------
// Protected Functions (utility) ------------------------------------
//-------------------------------------------------------------------
//returns a vector containing indexes of the input vector in sorted ascending/descending order
void VocData : : getSortOrder ( const vector < float > & values , vector < size_t > & order , bool descending )
{
/* 1. store sorting order in 'order_pair' */
vector < std : : pair < size_t , vector < float > : : const_iterator > > order_pair ( values . size ( ) ) ;
size_t n = 0 ;
for ( vector < float > : : const_iterator it = values . begin ( ) ; it ! = values . end ( ) ; + + it , + + n )
order_pair [ n ] = make_pair ( n , it ) ;
std : : sort ( order_pair . begin ( ) , order_pair . end ( ) , orderingSorter ( ) ) ;
if ( descending = = false ) std : : reverse ( order_pair . begin ( ) , order_pair . end ( ) ) ;
vector < size_t > ( order_pair . size ( ) ) . swap ( order ) ;
for ( size_t i = 0 ; i < order_pair . size ( ) ; + + i )
{
order [ i ] = order_pair [ i ] . first ;
}
}
void VocData : : readFileToString ( const string filename , string & file_contents )
{
std : : ifstream ifs ( filename . c_str ( ) ) ;
if ( ! ifs . is_open ( ) ) CV_Error ( CV_StsError , " could not open text file " ) ;
stringstream oss ;
oss < < ifs . rdbuf ( ) ;
file_contents = oss . str ( ) ;
}
int VocData : : stringToInteger ( const string input_str )
{
int result ;
stringstream ss ( input_str ) ;
if ( ( ss > > result ) . fail ( ) )
{
CV_Error ( CV_StsBadArg , " could not perform string to integer conversion " ) ;
}
return result ;
}
string VocData : : integerToString ( const int input_int )
{
string result ;
stringstream ss ;
if ( ( ss < < input_int ) . fail ( ) )
{
CV_Error ( CV_StsBadArg , " could not perform integer to string conversion " ) ;
}
result = ss . str ( ) ;
return result ;
}
string VocData : : checkFilenamePathsep ( const string filename , bool add_trailing_slash )
{
string filename_new = filename ;
size_t pos = filename_new . find ( " \\ \\ " ) ;
while ( pos ! = filename_new . npos )
{
filename_new . replace ( pos , 2 , " / " ) ;
pos = filename_new . find ( " \\ \\ " , pos ) ;
}
pos = filename_new . find ( " \\ " ) ;
while ( pos ! = filename_new . npos )
{
filename_new . replace ( pos , 1 , " / " ) ;
pos = filename_new . find ( " \\ " , pos ) ;
}
if ( add_trailing_slash )
{
//add training slash if this is missing
if ( filename_new . rfind ( " / " ) ! = filename_new . length ( ) - 1 ) filename_new + = " / " ;
}
return filename_new ;
}
void VocData : : convertImageCodesToObdImages ( const vector < string > & image_codes , vector < ObdImage > & images )
{
images . clear ( ) ;
images . reserve ( image_codes . size ( ) ) ;
string path ;
//transfer to output arrays
for ( size_t i = 0 ; i < image_codes . size ( ) ; + + i )
{
//generate image path and indices from extracted string code
path = getImagePath ( image_codes [ i ] ) ;
images . push_back ( ObdImage ( image_codes [ i ] , path ) ) ;
}
}
//Extract text from within a given tag from an XML file
//-----------------------------------------------------
//INPUTS:
// - src XML source file
// - tag XML tag delimiting block to extract
// - searchpos position within src at which to start search
//OUTPUTS:
// - tag_contents text extracted between <tag> and </tag> tags
//RETURN VALUE:
// - the position of the final character extracted in tag_contents within src
// (can be used to call extractXMLBlock recursively to extract multiple blocks)
// returns -1 if the tag could not be found
int VocData : : extractXMLBlock ( const string src , const string tag , const int searchpos , string & tag_contents )
{
size_t startpos , next_startpos , endpos ;
int embed_count = 1 ;
//find position of opening tag
startpos = src . find ( " < " + tag + " > " , searchpos ) ;
if ( startpos = = string : : npos ) return - 1 ;
//initialize endpos -
// start searching for end tag anywhere after opening tag
endpos = startpos ;
//find position of next opening tag
next_startpos = src . find ( " < " + tag + " > " , startpos + 1 ) ;
//match opening tags with closing tags, and only
//accept final closing tag of same level as original
//opening tag
while ( embed_count > 0 )
{
endpos = src . find ( " </ " + tag + " > " , endpos + 1 ) ;
if ( endpos = = string : : npos ) return - 1 ;
//the next code is only executed if there are embedded tags with the same name
if ( next_startpos ! = string : : npos )
{
while ( next_startpos < endpos )
{
//counting embedded start tags
+ + embed_count ;
next_startpos = src . find ( " < " + tag + " > " , next_startpos + 1 ) ;
if ( next_startpos = = string : : npos ) break ;
}
}
//passing end tag so decrement nesting level
- - embed_count ;
}
//finally, extract the tag region
startpos + = tag . length ( ) + 2 ;
if ( startpos > src . length ( ) ) return - 1 ;
if ( endpos > src . length ( ) ) return - 1 ;
tag_contents = src . substr ( startpos , endpos - startpos ) ;
return static_cast < int > ( endpos ) ;
}
/****************************************************************************************\
* Sample on image classification *
\ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
//
// This part of the code was a little refactor
//
struct DDMParams
{
DDMParams ( ) : detectorType ( " SURF " ) , descriptorType ( " SURF " ) , matcherType ( " BruteForce " ) { }
DDMParams ( const string _detectorType , const string _descriptorType , const string & _matcherType ) :
detectorType ( _detectorType ) , descriptorType ( _descriptorType ) , matcherType ( _matcherType ) { }
void read ( const FileNode & fn )
{
fn [ " detectorType " ] > > detectorType ;
fn [ " descriptorType " ] > > descriptorType ;
fn [ " matcherType " ] > > matcherType ;
}
void write ( FileStorage & fs ) const
{
fs < < " detectorType " < < detectorType ;
fs < < " descriptorType " < < descriptorType ;
fs < < " matcherType " < < matcherType ;
}
void print ( ) const
{
cout < < " detectorType: " < < detectorType < < endl ;
cout < < " descriptorType: " < < descriptorType < < endl ;
cout < < " matcherType: " < < matcherType < < endl ;
}
string detectorType ;
string descriptorType ;
string matcherType ;
} ;
struct VocabTrainParams
{
VocabTrainParams ( ) : trainObjClass ( " chair " ) , vocabSize ( 1000 ) , memoryUse ( 200 ) , descProportion ( 0.3f ) { }
VocabTrainParams ( const string _trainObjClass , size_t _vocabSize , size_t _memoryUse , float _descProportion ) :
trainObjClass ( _trainObjClass ) , vocabSize ( ( int ) _vocabSize ) , memoryUse ( ( int ) _memoryUse ) , descProportion ( _descProportion ) { }
void read ( const FileNode & fn )
{
fn [ " trainObjClass " ] > > trainObjClass ;
fn [ " vocabSize " ] > > vocabSize ;
fn [ " memoryUse " ] > > memoryUse ;
fn [ " descProportion " ] > > descProportion ;
}
void write ( FileStorage & fs ) const
{
fs < < " trainObjClass " < < trainObjClass ;
fs < < " vocabSize " < < vocabSize ;
fs < < " memoryUse " < < memoryUse ;
fs < < " descProportion " < < descProportion ;
}
void print ( ) const
{
cout < < " trainObjClass: " < < trainObjClass < < endl ;
cout < < " vocabSize: " < < vocabSize < < endl ;
cout < < " memoryUse: " < < memoryUse < < endl ;
cout < < " descProportion: " < < descProportion < < endl ;
}
string trainObjClass ; // Object class used for training visual vocabulary.
// It shouldn't matter which object class is specified here - visual vocab will still be the same.
int vocabSize ; //number of visual words in vocabulary to train
int memoryUse ; // Memory to preallocate (in MB) when training vocab.
// Change this depending on the size of the dataset/available memory.
float descProportion ; // Specifies the number of descriptors to use from each image as a proportion of the total num descs.
} ;
struct SVMTrainParamsExt
{
SVMTrainParamsExt ( ) : descPercent ( 0.5f ) , targetRatio ( 0.4f ) , balanceClasses ( true ) { }
SVMTrainParamsExt ( float _descPercent , float _targetRatio , bool _balanceClasses ) :
descPercent ( _descPercent ) , targetRatio ( _targetRatio ) , balanceClasses ( _balanceClasses ) { }
void read ( const FileNode & fn )
{
fn [ " descPercent " ] > > descPercent ;
fn [ " targetRatio " ] > > targetRatio ;
fn [ " balanceClasses " ] > > balanceClasses ;
}
void write ( FileStorage & fs ) const
{
fs < < " descPercent " < < descPercent ;
fs < < " targetRatio " < < targetRatio ;
fs < < " balanceClasses " < < balanceClasses ;
}
void print ( ) const
{
cout < < " descPercent: " < < descPercent < < endl ;
cout < < " targetRatio: " < < targetRatio < < endl ;
cout < < " balanceClasses: " < < balanceClasses < < endl ;
}
float descPercent ; // Percentage of extracted descriptors to use for training.
float targetRatio ; // Try to get this ratio of positive to negative samples (minimum).
bool balanceClasses ; // Balance class weights by number of samples in each (if true cSvmTrainTargetRatio is ignored).
} ;
static void readUsedParams ( const FileNode & fn , string & vocName , DDMParams & ddmParams , VocabTrainParams & vocabTrainParams , SVMTrainParamsExt & svmTrainParamsExt )
{
fn [ " vocName " ] > > vocName ;
FileNode currFn = fn ;
currFn = fn [ " ddmParams " ] ;
ddmParams . read ( currFn ) ;
currFn = fn [ " vocabTrainParams " ] ;
vocabTrainParams . read ( currFn ) ;
currFn = fn [ " svmTrainParamsExt " ] ;
svmTrainParamsExt . read ( currFn ) ;
}
static void writeUsedParams ( FileStorage & fs , const string & vocName , const DDMParams & ddmParams , const VocabTrainParams & vocabTrainParams , const SVMTrainParamsExt & svmTrainParamsExt )
{
fs < < " vocName " < < vocName ;
fs < < " ddmParams " < < " { " ;
ddmParams . write ( fs ) ;
fs < < " } " ;
fs < < " vocabTrainParams " < < " { " ;
vocabTrainParams . write ( fs ) ;
fs < < " } " ;
fs < < " svmTrainParamsExt " < < " { " ;
svmTrainParamsExt . write ( fs ) ;
fs < < " } " ;
}
static void printUsedParams ( const string & vocPath , const string & resDir ,
const DDMParams & ddmParams , const VocabTrainParams & vocabTrainParams ,
const SVMTrainParamsExt & svmTrainParamsExt )
{
cout < < " CURRENT CONFIGURATION " < < endl ;
cout < < " ---------------------------------------------------------------- " < < endl ;
cout < < " vocPath: " < < vocPath < < endl ;
cout < < " resDir: " < < resDir < < endl ;
cout < < endl ; ddmParams . print ( ) ;
cout < < endl ; vocabTrainParams . print ( ) ;
cout < < endl ; svmTrainParamsExt . print ( ) ;
cout < < " ---------------------------------------------------------------- " < < endl < < endl ;
}
static bool readVocabulary ( const string & filename , Mat & vocabulary )
{
cout < < " Reading vocabulary... " ;
FileStorage fs ( filename , FileStorage : : READ ) ;
if ( fs . isOpened ( ) )
{
fs [ " vocabulary " ] > > vocabulary ;
cout < < " done " < < endl ;
return true ;
}
return false ;
}
static bool writeVocabulary ( const string & filename , const Mat & vocabulary )
{
cout < < " Saving vocabulary... " < < endl ;
FileStorage fs ( filename , FileStorage : : WRITE ) ;
if ( fs . isOpened ( ) )
{
fs < < " vocabulary " < < vocabulary ;
return true ;
}
return false ;
}
static Mat trainVocabulary ( const string & filename , VocData & vocData , const VocabTrainParams & trainParams ,
const Ptr < FeatureDetector > & fdetector , const Ptr < DescriptorExtractor > & dextractor )
{
Mat vocabulary ;
if ( ! readVocabulary ( filename , vocabulary ) )
{
CV_Assert ( dextractor - > descriptorType ( ) = = CV_32FC1 ) ;
const int descByteSize = dextractor - > descriptorSize ( ) * 4 ;
const int maxDescCount = ( trainParams . memoryUse * 1048576 ) / descByteSize ; // Total number of descs to use for training.
cout < < " Extracting VOC data... " < < endl ;
vector < ObdImage > images ;
vector < char > objectPresent ;
vocData . getClassImages ( trainParams . trainObjClass , CV_OBD_TRAIN , images , objectPresent ) ;
cout < < " Computing descriptors... " < < endl ;
RNG & rng = theRNG ( ) ;
TermCriteria terminate_criterion ;
terminate_criterion . epsilon = FLT_EPSILON ;
BOWKMeansTrainer bowTrainer ( trainParams . vocabSize , terminate_criterion , 3 , KMEANS_PP_CENTERS ) ;
while ( images . size ( ) > 0 )
{
if ( bowTrainer . descripotorsCount ( ) > = maxDescCount )
{
assert ( bowTrainer . descripotorsCount ( ) = = maxDescCount ) ;
# ifdef DEBUG_DESC_PROGRESS
cout < < " Breaking due to full memory ( descriptors count = " < < bowTrainer . descripotorsCount ( )
< < " ; descriptor size in bytes = " < < descByteSize < < " ; all used memory = "
< < bowTrainer . descripotorsCount ( ) * descByteSize < < endl ;
# endif
break ;
}
// Randomly pick an image from the dataset which hasn't yet been seen
// and compute the descriptors from that image.
int randImgIdx = rng ( ( unsigned ) images . size ( ) ) ;
Mat colorImage = imread ( images [ randImgIdx ] . path ) ;
vector < KeyPoint > imageKeypoints ;
fdetector - > detect ( colorImage , imageKeypoints ) ;
Mat imageDescriptors ;
dextractor - > compute ( colorImage , imageKeypoints , imageDescriptors ) ;
//check that there were descriptors calculated for the current image
if ( ! imageDescriptors . empty ( ) )
{
int descCount = imageDescriptors . rows ;
// Extract trainParams.descProportion descriptors from the image, breaking if the 'allDescriptors' matrix becomes full
int descsToExtract = static_cast < int > ( trainParams . descProportion * static_cast < float > ( descCount ) ) ;
// Fill mask of used descriptors
vector < char > usedMask ( descCount , false ) ;
fill ( usedMask . begin ( ) , usedMask . begin ( ) + descsToExtract , true ) ;
for ( int i = 0 ; i < descCount ; i + + )
{
int i1 = rng ( descCount ) , i2 = rng ( descCount ) ;
char tmp = usedMask [ i1 ] ; usedMask [ i1 ] = usedMask [ i2 ] ; usedMask [ i2 ] = tmp ;
}
for ( int i = 0 ; i < descCount ; i + + )
{
if ( usedMask [ i ] & & bowTrainer . descripotorsCount ( ) < maxDescCount )
bowTrainer . add ( imageDescriptors . row ( i ) ) ;
}
}
# ifdef DEBUG_DESC_PROGRESS
cout < < images . size ( ) < < " images left, " < < images [ randImgIdx ] . id < < " processed - "
< < /* descs_extracted << "/" << image_descriptors.rows << " extracted - " << */
cvRound ( ( static_cast < double > ( bowTrainer . descripotorsCount ( ) ) / static_cast < double > ( maxDescCount ) ) * 100.0 )
< < " % memory used " < < ( imageDescriptors . empty ( ) ? " -> no descriptors extracted, skipping " : " " ) < < endl ;
# endif
// Delete the current element from images so it is not added again
images . erase ( images . begin ( ) + randImgIdx ) ;
}
cout < < " Maximum allowed descriptor count: " < < maxDescCount < < " , Actual descriptor count: " < < bowTrainer . descripotorsCount ( ) < < endl ;
cout < < " Training vocabulary... " < < endl ;
vocabulary = bowTrainer . cluster ( ) ;
if ( ! writeVocabulary ( filename , vocabulary ) )
{
cout < < " Error: file " < < filename < < " can not be opened to write " < < endl ;
exit ( - 1 ) ;
}
}
return vocabulary ;
}
static bool readBowImageDescriptor ( const string & file , Mat & bowImageDescriptor )
{
FileStorage fs ( file , FileStorage : : READ ) ;
if ( fs . isOpened ( ) )
{
fs [ " imageDescriptor " ] > > bowImageDescriptor ;
return true ;
}
return false ;
}
static bool writeBowImageDescriptor ( const string & file , const Mat & bowImageDescriptor )
{
FileStorage fs ( file , FileStorage : : WRITE ) ;
if ( fs . isOpened ( ) )
{
fs < < " imageDescriptor " < < bowImageDescriptor ;
return true ;
}
return false ;
}
// Load in the bag of words vectors for a set of images, from file if possible
static void calculateImageDescriptors ( const vector < ObdImage > & images , vector < Mat > & imageDescriptors ,
Ptr < BOWImgDescriptorExtractor > & bowExtractor , const Ptr < FeatureDetector > & fdetector ,
const string & resPath )
{
CV_Assert ( ! bowExtractor - > getVocabulary ( ) . empty ( ) ) ;
imageDescriptors . resize ( images . size ( ) ) ;
for ( size_t i = 0 ; i < images . size ( ) ; i + + )
{
string filename = resPath + bowImageDescriptorsDir + " / " + images [ i ] . id + " .xml.gz " ;
if ( readBowImageDescriptor ( filename , imageDescriptors [ i ] ) )
{
# ifdef DEBUG_DESC_PROGRESS
cout < < " Loaded bag of word vector for image " < < i + 1 < < " of " < < images . size ( ) < < " ( " < < images [ i ] . id < < " ) " < < endl ;
# endif
}
else
{
Mat colorImage = imread ( images [ i ] . path ) ;
# ifdef DEBUG_DESC_PROGRESS
cout < < " Computing descriptors for image " < < i + 1 < < " of " < < images . size ( ) < < " ( " < < images [ i ] . id < < " ) " < < flush ;
# endif
vector < KeyPoint > keypoints ;
fdetector - > detect ( colorImage , keypoints ) ;
# ifdef DEBUG_DESC_PROGRESS
cout < < " + generating BoW vector " < < std : : flush ;
# endif
bowExtractor - > compute ( colorImage , keypoints , imageDescriptors [ i ] ) ;
# ifdef DEBUG_DESC_PROGRESS
cout < < " ...DONE " < < static_cast < int > ( static_cast < float > ( i + 1 ) / static_cast < float > ( images . size ( ) ) * 100.0 )
< < " % complete " < < endl ;
# endif
if ( ! imageDescriptors [ i ] . empty ( ) )
{
if ( ! writeBowImageDescriptor ( filename , imageDescriptors [ i ] ) )
{
cout < < " Error: file " < < filename < < " can not be opened to write bow image descriptor " < < endl ;
exit ( - 1 ) ;
}
}
}
}
}
static void removeEmptyBowImageDescriptors ( vector < ObdImage > & images , vector < Mat > & bowImageDescriptors ,
vector < char > & objectPresent )
{
CV_Assert ( ! images . empty ( ) ) ;
for ( int i = ( int ) images . size ( ) - 1 ; i > = 0 ; i - - )
{
bool res = bowImageDescriptors [ i ] . empty ( ) ;
if ( res )
{
cout < < " Removing image " < < images [ i ] . id < < " due to no descriptors... " < < endl ;
images . erase ( images . begin ( ) + i ) ;
bowImageDescriptors . erase ( bowImageDescriptors . begin ( ) + i ) ;
objectPresent . erase ( objectPresent . begin ( ) + i ) ;
}
}
}
static void removeBowImageDescriptorsByCount ( vector < ObdImage > & images , vector < Mat > bowImageDescriptors , vector < char > objectPresent ,
const SVMTrainParamsExt & svmParamsExt , int descsToDelete )
{
RNG & rng = theRNG ( ) ;
int pos_ex = ( int ) std : : count ( objectPresent . begin ( ) , objectPresent . end ( ) , ( char ) 1 ) ;
int neg_ex = ( int ) std : : count ( objectPresent . begin ( ) , objectPresent . end ( ) , ( char ) 0 ) ;
while ( descsToDelete ! = 0 )
{
int randIdx = rng ( ( unsigned ) images . size ( ) ) ;
// Prefer positive training examples according to svmParamsExt.targetRatio if required
if ( objectPresent [ randIdx ] )
{
if ( ( static_cast < float > ( pos_ex ) / static_cast < float > ( neg_ex + pos_ex ) < svmParamsExt . targetRatio ) & &
( neg_ex > 0 ) & & ( svmParamsExt . balanceClasses = = false ) )
{ continue ; }
else
{ pos_ex - - ; }
}
else
{ neg_ex - - ; }
images . erase ( images . begin ( ) + randIdx ) ;
bowImageDescriptors . erase ( bowImageDescriptors . begin ( ) + randIdx ) ;
objectPresent . erase ( objectPresent . begin ( ) + randIdx ) ;
descsToDelete - - ;
}
CV_Assert ( bowImageDescriptors . size ( ) = = objectPresent . size ( ) ) ;
}
static void setSVMParams ( CvSVMParams & svmParams , CvMat & class_wts_cv , const Mat & responses , bool balanceClasses )
{
int pos_ex = countNonZero ( responses = = 1 ) ;
int neg_ex = countNonZero ( responses = = - 1 ) ;
cout < < pos_ex < < " positive training samples; " < < neg_ex < < " negative training samples " < < endl ;
svmParams . svm_type = CvSVM : : C_SVC ;
svmParams . kernel_type = CvSVM : : RBF ;
if ( balanceClasses )
{
Mat class_wts ( 2 , 1 , CV_32FC1 ) ;
// The first training sample determines the '+1' class internally, even if it is negative,
// so store whether this is the case so that the class weights can be reversed accordingly.
bool reversed_classes = ( responses . at < float > ( 0 ) < 0.f ) ;
if ( reversed_classes = = false )
{
class_wts . at < float > ( 0 ) = static_cast < float > ( pos_ex ) / static_cast < float > ( pos_ex + neg_ex ) ; // weighting for costs of positive class + 1 (i.e. cost of false positive - larger gives greater cost)
class_wts . at < float > ( 1 ) = static_cast < float > ( neg_ex ) / static_cast < float > ( pos_ex + neg_ex ) ; // weighting for costs of negative class - 1 (i.e. cost of false negative)
}
else
{
class_wts . at < float > ( 0 ) = static_cast < float > ( neg_ex ) / static_cast < float > ( pos_ex + neg_ex ) ;
class_wts . at < float > ( 1 ) = static_cast < float > ( pos_ex ) / static_cast < float > ( pos_ex + neg_ex ) ;
}
class_wts_cv = class_wts ;
svmParams . class_weights = & class_wts_cv ;
}
}
static void setSVMTrainAutoParams ( CvParamGrid & c_grid , CvParamGrid & gamma_grid ,
CvParamGrid & p_grid , CvParamGrid & nu_grid ,
CvParamGrid & coef_grid , CvParamGrid & degree_grid )
{
c_grid = CvSVM : : get_default_grid ( CvSVM : : C ) ;
gamma_grid = CvSVM : : get_default_grid ( CvSVM : : GAMMA ) ;
p_grid = CvSVM : : get_default_grid ( CvSVM : : P ) ;
p_grid . step = 0 ;
nu_grid = CvSVM : : get_default_grid ( CvSVM : : NU ) ;
nu_grid . step = 0 ;
coef_grid = CvSVM : : get_default_grid ( CvSVM : : COEF ) ;
coef_grid . step = 0 ;
degree_grid = CvSVM : : get_default_grid ( CvSVM : : DEGREE ) ;
degree_grid . step = 0 ;
}
static void trainSVMClassifier ( CvSVM & svm , const SVMTrainParamsExt & svmParamsExt , const string & objClassName , VocData & vocData ,
Ptr < BOWImgDescriptorExtractor > & bowExtractor , const Ptr < FeatureDetector > & fdetector ,
const string & resPath )
{
/* first check if a previously trained svm for the current class has been saved to file */
string svmFilename = resPath + svmsDir + " / " + objClassName + " .xml.gz " ;
FileStorage fs ( svmFilename , FileStorage : : READ ) ;
if ( fs . isOpened ( ) )
{
cout < < " *** LOADING SVM CLASSIFIER FOR CLASS " < < objClassName < < " *** " < < endl ;
svm . load ( svmFilename . c_str ( ) ) ;
}
else
{
cout < < " *** TRAINING CLASSIFIER FOR CLASS " < < objClassName < < " *** " < < endl ;
cout < < " CALCULATING BOW VECTORS FOR TRAINING SET OF " < < objClassName < < " ... " < < endl ;
// Get classification ground truth for images in the training set
vector < ObdImage > images ;
vector < Mat > bowImageDescriptors ;
vector < char > objectPresent ;
vocData . getClassImages ( objClassName , CV_OBD_TRAIN , images , objectPresent ) ;
// Compute the bag of words vector for each image in the training set.
calculateImageDescriptors ( images , bowImageDescriptors , bowExtractor , fdetector , resPath ) ;
// Remove any images for which descriptors could not be calculated
removeEmptyBowImageDescriptors ( images , bowImageDescriptors , objectPresent ) ;
CV_Assert ( svmParamsExt . descPercent > 0.f & & svmParamsExt . descPercent < = 1.f ) ;
if ( svmParamsExt . descPercent < 1.f )
{
int descsToDelete = static_cast < int > ( static_cast < float > ( images . size ( ) ) * ( 1.0 - svmParamsExt . descPercent ) ) ;
cout < < " Using " < < ( images . size ( ) - descsToDelete ) < < " of " < < images . size ( ) < <
" descriptors for training ( " < < svmParamsExt . descPercent * 100.0 < < " %) " < < endl ;
removeBowImageDescriptorsByCount ( images , bowImageDescriptors , objectPresent , svmParamsExt , descsToDelete ) ;
}
// Prepare the input matrices for SVM training.
Mat trainData ( ( int ) images . size ( ) , bowExtractor - > getVocabulary ( ) . rows , CV_32FC1 ) ;
Mat responses ( ( int ) images . size ( ) , 1 , CV_32SC1 ) ;
// Transfer bag of words vectors and responses across to the training data matrices
for ( size_t imageIdx = 0 ; imageIdx < images . size ( ) ; imageIdx + + )
{
// Transfer image descriptor (bag of words vector) to training data matrix
Mat submat = trainData . row ( ( int ) imageIdx ) ;
if ( bowImageDescriptors [ imageIdx ] . cols ! = bowExtractor - > descriptorSize ( ) )
{
cout < < " Error: computed bow image descriptor size " < < bowImageDescriptors [ imageIdx ] . cols
< < " differs from vocabulary size " < < bowExtractor - > getVocabulary ( ) . cols < < endl ;
exit ( - 1 ) ;
}
bowImageDescriptors [ imageIdx ] . copyTo ( submat ) ;
// Set response value
responses . at < int > ( ( int ) imageIdx ) = objectPresent [ imageIdx ] ? 1 : - 1 ;
}
cout < < " TRAINING SVM FOR CLASS ... " < < objClassName < < " ... " < < endl ;
CvSVMParams svmParams ;
CvMat class_wts_cv ;
setSVMParams ( svmParams , class_wts_cv , responses , svmParamsExt . balanceClasses ) ;
CvParamGrid c_grid , gamma_grid , p_grid , nu_grid , coef_grid , degree_grid ;
setSVMTrainAutoParams ( c_grid , gamma_grid , p_grid , nu_grid , coef_grid , degree_grid ) ;
svm . train_auto ( trainData , responses , Mat ( ) , Mat ( ) , svmParams , 10 , c_grid , gamma_grid , p_grid , nu_grid , coef_grid , degree_grid ) ;
cout < < " SVM TRAINING FOR CLASS " < < objClassName < < " COMPLETED " < < endl ;
svm . save ( svmFilename . c_str ( ) ) ;
cout < < " SAVED CLASSIFIER TO FILE " < < endl ;
}
}
static void computeConfidences ( CvSVM & svm , const string & objClassName , VocData & vocData ,
Ptr < BOWImgDescriptorExtractor > & bowExtractor , const Ptr < FeatureDetector > & fdetector ,
const string & resPath )
{
cout < < " *** CALCULATING CONFIDENCES FOR CLASS " < < objClassName < < " *** " < < endl ;
cout < < " CALCULATING BOW VECTORS FOR TEST SET OF " < < objClassName < < " ... " < < endl ;
// Get classification ground truth for images in the test set
vector < ObdImage > images ;
vector < Mat > bowImageDescriptors ;
vector < char > objectPresent ;
vocData . getClassImages ( objClassName , CV_OBD_TEST , images , objectPresent ) ;
// Compute the bag of words vector for each image in the test set
calculateImageDescriptors ( images , bowImageDescriptors , bowExtractor , fdetector , resPath ) ;
// Remove any images for which descriptors could not be calculated
removeEmptyBowImageDescriptors ( images , bowImageDescriptors , objectPresent ) ;
// Use the bag of words vectors to calculate classifier output for each image in test set
cout < < " CALCULATING CONFIDENCE SCORES FOR CLASS " < < objClassName < < " ... " < < endl ;
vector < float > confidences ( images . size ( ) ) ;
float signMul = 1.f ;
for ( size_t imageIdx = 0 ; imageIdx < images . size ( ) ; imageIdx + + )
{
if ( imageIdx = = 0 )
{
// In the first iteration, determine the sign of the positive class
float classVal = confidences [ imageIdx ] = svm . predict ( bowImageDescriptors [ imageIdx ] , false ) ;
float scoreVal = confidences [ imageIdx ] = svm . predict ( bowImageDescriptors [ imageIdx ] , true ) ;
signMul = ( classVal < 0 ) = = ( scoreVal < 0 ) ? 1.f : - 1.f ;
}
// svm output of decision function
confidences [ imageIdx ] = signMul * svm . predict ( bowImageDescriptors [ imageIdx ] , true ) ;
}
cout < < " WRITING QUERY RESULTS TO VOC RESULTS FILE FOR CLASS " < < objClassName < < " ... " < < endl ;
vocData . writeClassifierResultsFile ( resPath + plotsDir , objClassName , CV_OBD_TEST , images , confidences , 1 , true ) ;
cout < < " DONE - " < < objClassName < < endl ;
cout < < " --------------------------------------------------------------- " < < endl ;
}
static void computeGnuPlotOutput ( const string & resPath , const string & objClassName , VocData & vocData )
{
vector < float > precision , recall ;
float ap ;
const string resultFile = vocData . getResultsFilename ( objClassName , CV_VOC_TASK_CLASSIFICATION , CV_OBD_TEST ) ;
const string plotFile = resultFile . substr ( 0 , resultFile . size ( ) - 4 ) + " .plt " ;
cout < < " Calculating precision recall curve for class ' " < < objClassName < < " ' " < < endl ;
vocData . calcClassifierPrecRecall ( resPath + plotsDir + " / " + resultFile , precision , recall , ap , true ) ;
cout < < " Outputting to GNUPlot file... " < < endl ;
vocData . savePrecRecallToGnuplot ( resPath + plotsDir + " / " + plotFile , precision , recall , ap , objClassName , CV_VOC_PLOT_PNG ) ;
}
int main ( int argc , char * * argv )
{
if ( argc ! = 3 & & argc ! = 6 )
{
help ( argv ) ;
return - 1 ;
}
cv : : initModule_nonfree ( ) ;
const string vocPath = argv [ 1 ] , resPath = argv [ 2 ] ;
// Read or set default parameters
string vocName ;
DDMParams ddmParams ;
VocabTrainParams vocabTrainParams ;
SVMTrainParamsExt svmTrainParamsExt ;
makeUsedDirs ( resPath ) ;
FileStorage paramsFS ( resPath + " / " + paramsFile , FileStorage : : READ ) ;
if ( paramsFS . isOpened ( ) )
{
readUsedParams ( paramsFS . root ( ) , vocName , ddmParams , vocabTrainParams , svmTrainParamsExt ) ;
CV_Assert ( vocName = = getVocName ( vocPath ) ) ;
}
else
{
vocName = getVocName ( vocPath ) ;
if ( argc ! = 6 )
{
cout < < " Feature detector, descriptor extractor, descriptor matcher must be set " < < endl ;
return - 1 ;
}
ddmParams = DDMParams ( argv [ 3 ] , argv [ 4 ] , argv [ 5 ] ) ; // from command line
// vocabTrainParams and svmTrainParamsExt is set by defaults
paramsFS . open ( resPath + " / " + paramsFile , FileStorage : : WRITE ) ;
if ( paramsFS . isOpened ( ) )
{
writeUsedParams ( paramsFS , vocName , ddmParams , vocabTrainParams , svmTrainParamsExt ) ;
paramsFS . release ( ) ;
}
else
{
cout < < " File " < < ( resPath + " / " + paramsFile ) < < " can not be opened to write " < < endl ;
return - 1 ;
}
}
// Create detector, descriptor, matcher.
Ptr < FeatureDetector > featureDetector = FeatureDetector : : create ( ddmParams . detectorType ) ;
Ptr < DescriptorExtractor > descExtractor = DescriptorExtractor : : create ( ddmParams . descriptorType ) ;
Ptr < BOWImgDescriptorExtractor > bowExtractor ;
if ( featureDetector . empty ( ) | | descExtractor . empty ( ) )
{
cout < < " featureDetector or descExtractor was not created " < < endl ;
return - 1 ;
}
{
Ptr < DescriptorMatcher > descMatcher = DescriptorMatcher : : create ( ddmParams . matcherType ) ;
if ( featureDetector . empty ( ) | | descExtractor . empty ( ) | | descMatcher . empty ( ) )
{
cout < < " descMatcher was not created " < < endl ;
return - 1 ;
}
bowExtractor = new BOWImgDescriptorExtractor ( descExtractor , descMatcher ) ;
}
// Print configuration to screen
printUsedParams ( vocPath , resPath , ddmParams , vocabTrainParams , svmTrainParamsExt ) ;
// Create object to work with VOC
VocData vocData ( vocPath , false ) ;
// 1. Train visual word vocabulary if a pre-calculated vocabulary file doesn't already exist from previous run
Mat vocabulary = trainVocabulary ( resPath + " / " + vocabularyFile , vocData , vocabTrainParams ,
featureDetector , descExtractor ) ;
bowExtractor - > setVocabulary ( vocabulary ) ;
// 2. Train a classifier and run a sample query for each object class
const vector < string > & objClasses = vocData . getObjectClasses ( ) ; // object class list
for ( size_t classIdx = 0 ; classIdx < objClasses . size ( ) ; + + classIdx )
{
// Train a classifier on train dataset
CvSVM svm ;
trainSVMClassifier ( svm , svmTrainParamsExt , objClasses [ classIdx ] , vocData ,
bowExtractor , featureDetector , resPath ) ;
// Now use the classifier over all images on the test dataset and rank according to score order
// also calculating precision-recall etc.
computeConfidences ( svm , objClasses [ classIdx ] , vocData ,
bowExtractor , featureDetector , resPath ) ;
// Calculate precision/recall/ap and use GNUPlot to output to a pdf file
computeGnuPlotOutput ( resPath , objClasses [ classIdx ] , vocData ) ;
}
return 0 ;
}