updated doc on features2d

pull/13383/head
Maria Dimashova 14 years ago
parent d7c86bbcdc
commit cf849c3baa
  1. 512
      doc/features2d_common_detection_description.tex
  2. 827
      doc/features2d_detection_description.tex
  3. 200
      doc/features2d_object_categorization.tex
  4. 402
      doc/features2d_object_detection.tex
  5. 667
      doc/features2d_object_recognition.tex
  6. 6
      doc/opencvref_body.tex

@ -1,245 +1,11 @@
\section{Feature detection and description}
\ifCPy
\ifPy
\cvclass{CvSURFPoint}
A SURF keypoint, represented as a tuple \texttt{((x, y), laplacian, size, dir, hessian)}.
\begin{description}
\cvarg{x}{x-coordinate of the feature within the image}
\cvarg{y}{y-coordinate of the feature within the image}
\cvarg{laplacian}{-1, 0 or +1. sign of the laplacian at the point. Can be used to speedup feature comparison since features with laplacians of different signs can not match}
\cvarg{size}{size of the feature}
\cvarg{dir}{orientation of the feature: 0..360 degrees}
\cvarg{hessian}{value of the hessian (can be used to approximately estimate the feature strengths; see also params.hessianThreshold)}
\end{description}
\fi
\cvCPyFunc{ExtractSURF}
Extracts Speeded Up Robust Features from an image.
\cvdefC{
void cvExtractSURF( \par const CvArr* image,\par const CvArr* mask,\par CvSeq** keypoints,\par CvSeq** descriptors,\par CvMemStorage* storage,\par CvSURFParams params );
}
\cvdefPy{ExtractSURF(image,mask,storage,params)-> (keypoints,descriptors)}
\begin{description}
\cvarg{image}{The input 8-bit grayscale image}
\cvarg{mask}{The optional input 8-bit mask. The features are only found in the areas that contain more than 50\% of non-zero mask pixels}
\ifC
\cvarg{keypoints}{The output parameter; double pointer to the sequence of keypoints. The sequence of CvSURFPoint structures is as follows:}
\begin{lstlisting}
typedef struct CvSURFPoint
{
CvPoint2D32f pt; // position of the feature within the image
int laplacian; // -1, 0 or +1. sign of the laplacian at the point.
// can be used to speedup feature comparison
// (normally features with laplacians of different
// signs can not match)
int size; // size of the feature
float dir; // orientation of the feature: 0..360 degrees
float hessian; // value of the hessian (can be used to
// approximately estimate the feature strengths;
// see also params.hessianThreshold)
}
CvSURFPoint;
\end{lstlisting}
\cvarg{descriptors}{The optional output parameter; double pointer to the sequence of descriptors. Depending on the params.extended value, each element of the sequence will be either a 64-element or a 128-element floating-point (\texttt{CV\_32F}) vector. If the parameter is NULL, the descriptors are not computed}
\else
\cvarg{keypoints}{sequence of keypoints.}
\cvarg{descriptors}{sequence of descriptors. Each SURF descriptor is a list of floats, of length 64 or 128.}
\fi
\cvarg{storage}{Memory storage where keypoints and descriptors will be stored}
\ifC
\cvarg{params}{Various algorithm parameters put to the structure CvSURFParams:}
\begin{lstlisting}
typedef struct CvSURFParams
{
int extended; // 0 means basic descriptors (64 elements each),
// 1 means extended descriptors (128 elements each)
double hessianThreshold; // only features with keypoint.hessian
// larger than that are extracted.
// good default value is ~300-500 (can depend on the
// average local contrast and sharpness of the image).
// user can further filter out some features based on
// their hessian values and other characteristics.
int nOctaves; // the number of octaves to be used for extraction.
// With each next octave the feature size is doubled
// (3 by default)
int nOctaveLayers; // The number of layers within each octave
// (4 by default)
}
CvSURFParams;
CvSURFParams cvSURFParams(double hessianThreshold, int extended=0);
// returns default parameters
\end{lstlisting}
\else
\cvarg{params}{Various algorithm parameters in a tuple \texttt{(extended, hessianThreshold, nOctaves, nOctaveLayers)}:
\begin{description}
\cvarg{extended}{0 means basic descriptors (64 elements each), 1 means extended descriptors (128 elements each)}
\cvarg{hessianThreshold}{only features with hessian larger than that are extracted. good default value is ~300-500 (can depend on the average local contrast and sharpness of the image). user can further filter out some features based on their hessian values and other characteristics.}
\cvarg{nOctaves}{the number of octaves to be used for extraction. With each next octave the feature size is doubled (3 by default)}
\cvarg{nOctaveLayers}{The number of layers within each octave (4 by default)}
\end{description}}
\fi
\end{description}
The function cvExtractSURF finds robust features in the image, as
described in \cite{Bay06}. For each feature it returns its location, size,
orientation and optionally the descriptor, basic or extended. The function
can be used for object tracking and localization, image stitching etc.
\ifC
See the
\texttt{find\_obj.cpp} demo in OpenCV samples directory.
\else
To extract strong SURF features from an image
\begin{lstlisting}
>>> import cv
>>> im = cv.LoadImageM("building.jpg", cv.CV_LOAD_IMAGE_GRAYSCALE)
>>> (keypoints, descriptors) = cv.ExtractSURF(im, None, cv.CreateMemStorage(), (0, 30000, 3, 1))
>>> print len(keypoints), len(descriptors)
6 6
>>> for ((x, y), laplacian, size, dir, hessian) in keypoints:
... print "x=\%d y=\%d laplacian=\%d size=\%d dir=\%f hessian=\%f" \% (x, y, laplacian, size, dir, hessian)
x=30 y=27 laplacian=-1 size=31 dir=69.778503 hessian=36979.789062
x=296 y=197 laplacian=1 size=33 dir=111.081039 hessian=31514.349609
x=296 y=266 laplacian=1 size=32 dir=107.092300 hessian=31477.908203
x=254 y=284 laplacian=1 size=31 dir=279.137360 hessian=34169.800781
x=498 y=525 laplacian=-1 size=33 dir=278.006592 hessian=31002.759766
x=777 y=281 laplacian=1 size=70 dir=167.940964 hessian=35538.363281
\end{lstlisting}
\fi
\cvCPyFunc{GetStarKeypoints}
Retrieves keypoints using the StarDetector algorithm.
\cvdefC{
CvSeq* cvGetStarKeypoints( \par const CvArr* image,\par CvMemStorage* storage,\par CvStarDetectorParams params=cvStarDetectorParams() );
}
\cvdefPy{GetStarKeypoints(image,storage,params)-> keypoints}
\begin{description}
\cvarg{image}{The input 8-bit grayscale image}
\cvarg{storage}{Memory storage where the keypoints will be stored}
\ifC
\cvarg{params}{Various algorithm parameters given to the structure CvStarDetectorParams:}
\begin{lstlisting}
typedef struct CvStarDetectorParams
{
int maxSize; // maximal size of the features detected. The following
// values of the parameter are supported:
// 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128
int responseThreshold; // threshold for the approximatd laplacian,
// used to eliminate weak features
int lineThresholdProjected; // another threshold for laplacian to
// eliminate edges
int lineThresholdBinarized; // another threshold for the feature
// scale to eliminate edges
int suppressNonmaxSize; // linear size of a pixel neighborhood
// for non-maxima suppression
}
CvStarDetectorParams;
\end{lstlisting}
\else
\cvarg{params}{Various algorithm parameters in a tuple \texttt{(maxSize, responseThreshold, lineThresholdProjected, lineThresholdBinarized, suppressNonmaxSize)}:
\begin{description}
\cvarg{maxSize}{maximal size of the features detected. The following values of the parameter are supported: 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128}
\cvarg{responseThreshold}{threshold for the approximatd laplacian, used to eliminate weak features}
\cvarg{lineThresholdProjected}{another threshold for laplacian to eliminate edges}
\cvarg{lineThresholdBinarized}{another threshold for the feature scale to eliminate edges}
\cvarg{suppressNonmaxSize}{linear size of a pixel neighborhood for non-maxima suppression}
\end{description}
}
\fi
\end{description}
The function GetStarKeypoints extracts keypoints that are local
scale-space extremas. The scale-space is constructed by computing
approximate values of laplacians with different sigma's at each
pixel. Instead of using pyramids, a popular approach to save computing
time, all of the laplacians are computed at each pixel of the original
high-resolution image. But each approximate laplacian value is computed
in O(1) time regardless of the sigma, thanks to the use of integral
images. The algorithm is based on the paper
Agrawal08
, but instead
of a square, hexagon or octagon it uses an 8-end star shape, hence the name,
consisting of overlapping upright and tilted squares.
\ifC
Each computed feature is represented by the following structure:
\begin{lstlisting}
typedef struct CvStarKeypoint
{
CvPoint pt; // coordinates of the feature
int size; // feature size, see CvStarDetectorParams::maxSize
float response; // the approximated laplacian value at that point.
}
CvStarKeypoint;
inline CvStarKeypoint cvStarKeypoint(CvPoint pt, int size, float response);
\end{lstlisting}
\else
Each keypoint is represented by a tuple \texttt{((x, y), size, response)}:
\begin{description}
\cvarg{x, y}{Screen coordinates of the keypoint}
\cvarg{size}{feature size, up to \texttt{maxSize}}
\cvarg{response}{approximated laplacian value for the keypoint}
\end{description}
\fi
\ifC
Below is the small usage sample:
\begin{lstlisting}
#include "cv.h"
#include "highgui.h"
int main(int argc, char** argv)
{
const char* filename = argc > 1 ? argv[1] : "lena.jpg";
IplImage* img = cvLoadImage( filename, 0 ), *cimg;
CvMemStorage* storage = cvCreateMemStorage(0);
CvSeq* keypoints = 0;
int i;
if( !img )
return 0;
cvNamedWindow( "image", 1 );
cvShowImage( "image", img );
cvNamedWindow( "features", 1 );
cimg = cvCreateImage( cvGetSize(img), 8, 3 );
cvCvtColor( img, cimg, CV_GRAY2BGR );
keypoints = cvGetStarKeypoints( img, storage, cvStarDetectorParams(45) );
for( i = 0; i < (keypoints ? keypoints->total : 0); i++ )
{
CvStarKeypoint kpt = *(CvStarKeypoint*)cvGetSeqElem(keypoints, i);
int r = kpt.size/2;
cvCircle( cimg, kpt.pt, r, CV_RGB(0,255,0));
cvLine( cimg, cvPoint(kpt.pt.x + r, kpt.pt.y + r),
cvPoint(kpt.pt.x - r, kpt.pt.y - r), CV_RGB(0,255,0));
cvLine( cimg, cvPoint(kpt.pt.x - r, kpt.pt.y + r),
cvPoint(kpt.pt.x + r, kpt.pt.y - r), CV_RGB(0,255,0));
}
cvShowImage( "features", cimg );
cvWaitKey();
}
\end{lstlisting}
\fi
\fi
\ifCpp
\section{Common Interfaces of Feature Detectors}
Feature detectors in OpenCV have wrappers with common interface that enables to switch easily
between different algorithms solving the same problem. All objects that implement keypoint detectors
inherit \cvCppCross{FeatureDetector} interface.
\cvclass{KeyPoint}
Data structure for salient point detectors
Data structure for salient point detectors.
\begin{lstlisting}
class KeyPoint
@ -247,7 +13,7 @@ class KeyPoint
public:
// the default constructor
KeyPoint() : pt(0,0), size(0), angle(-1), response(0), octave(0),
class_id(-1) {}
class_id(-1) {}
// the full constructor
KeyPoint(Point2f _pt, float _size, float _angle=-1,
float _response=0, int _octave=0, int _class_id=-1)
@ -278,11 +44,11 @@ public:
float size; // diameter of the meaningfull keypoint neighborhood
float angle; // computed orientation of the keypoint (-1 if not applicable)
float response; // the response by which the most strong keypoints
// have been selected. Can be used for the further sorting
// or subsampling
// have been selected. Can be used for the further sorting
// or subsampling
int octave; // octave (pyramid layer) from which the keypoint has been extracted
int class_id; // object class (if the keypoints need to be clustered by
// an object they belong to)
// an object they belong to)
};
// writes vector of keypoints to the file storage
@ -291,189 +57,6 @@ void write(FileStorage& fs, const string& name, const vector<KeyPoint>& keypoint
void read(const FileNode& node, CV_OUT vector<KeyPoint>& keypoints);
\end{lstlisting}
\cvclass{MSER}
Maximally-Stable Extremal Region Extractor
\begin{lstlisting}
class MSER : public CvMSERParams
{
public:
// default constructor
MSER();
// constructor that initializes all the algorithm parameters
MSER( int _delta, int _min_area, int _max_area,
float _max_variation, float _min_diversity,
int _max_evolution, double _area_threshold,
double _min_margin, int _edge_blur_size );
// runs the extractor on the specified image; returns the MSERs,
// each encoded as a contour (vector<Point>, see findContours)
// the optional mask marks the area where MSERs are searched for
void operator()( const Mat& image, vector<vector<Point> >& msers, const Mat& mask ) const;
};
\end{lstlisting}
The class encapsulates all the parameters of MSER (see \url{http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions}) extraction algorithm.
\cvclass{StarDetector}
Implements Star keypoint detector
\begin{lstlisting}
class StarDetector : CvStarDetectorParams
{
public:
// default constructor
StarDetector();
// the full constructor initialized all the algorithm parameters:
// maxSize - maximum size of the features. The following
// values of the parameter are supported:
// 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128
// responseThreshold - threshold for the approximated laplacian,
// used to eliminate weak features. The larger it is,
// the less features will be retrieved
// lineThresholdProjected - another threshold for the laplacian to
// eliminate edges
// lineThresholdBinarized - another threshold for the feature
// size to eliminate edges.
// The larger the 2 threshold, the more points you get.
StarDetector(int maxSize, int responseThreshold,
int lineThresholdProjected,
int lineThresholdBinarized,
int suppressNonmaxSize);
// finds keypoints in an image
void operator()(const Mat& image, vector<KeyPoint>& keypoints) const;
};
\end{lstlisting}
The class implements a modified version of CenSurE keypoint detector described in
\cite{Agrawal08}
\cvclass{SIFT}
Class for extracting keypoints and computing descriptors using approach named Scale Invariant Feature Transform (SIFT).
\begin{lstlisting}
class CV_EXPORTS SIFT
{
public:
struct CommonParams
{
static const int DEFAULT_NOCTAVES = 4;
static const int DEFAULT_NOCTAVE_LAYERS = 3;
static const int DEFAULT_FIRST_OCTAVE = -1;
enum{ FIRST_ANGLE = 0, AVERAGE_ANGLE = 1 };
CommonParams();
CommonParams( int _nOctaves, int _nOctaveLayers, int _firstOctave,
int _angleMode );
int nOctaves, nOctaveLayers, firstOctave;
int angleMode;
};
struct DetectorParams
{
static double GET_DEFAULT_THRESHOLD()
{ return 0.04 / SIFT::CommonParams::DEFAULT_NOCTAVE_LAYERS / 2.0; }
static double GET_DEFAULT_EDGE_THRESHOLD() { return 10.0; }
DetectorParams();
DetectorParams( double _threshold, double _edgeThreshold );
double threshold, edgeThreshold;
};
struct DescriptorParams
{
static double GET_DEFAULT_MAGNIFICATION() { return 3.0; }
static const bool DEFAULT_IS_NORMALIZE = true;
static const int DESCRIPTOR_SIZE = 128;
DescriptorParams();
DescriptorParams( double _magnification, bool _isNormalize,
bool _recalculateAngles );
double magnification;
bool isNormalize;
bool recalculateAngles;
};
SIFT();
//! sift-detector constructor
SIFT( double _threshold, double _edgeThreshold,
int _nOctaves=CommonParams::DEFAULT_NOCTAVES,
int _nOctaveLayers=CommonParams::DEFAULT_NOCTAVE_LAYERS,
int _firstOctave=CommonParams::DEFAULT_FIRST_OCTAVE,
int _angleMode=CommonParams::FIRST_ANGLE );
//! sift-descriptor constructor
SIFT( double _magnification, bool _isNormalize=true,
bool _recalculateAngles = true,
int _nOctaves=CommonParams::DEFAULT_NOCTAVES,
int _nOctaveLayers=CommonParams::DEFAULT_NOCTAVE_LAYERS,
int _firstOctave=CommonParams::DEFAULT_FIRST_OCTAVE,
int _angleMode=CommonParams::FIRST_ANGLE );
SIFT( const CommonParams& _commParams,
const DetectorParams& _detectorParams = DetectorParams(),
const DescriptorParams& _descriptorParams = DescriptorParams() );
//! returns the descriptor size in floats (128)
int descriptorSize() const { return DescriptorParams::DESCRIPTOR_SIZE; }
//! finds the keypoints using SIFT algorithm
void operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints) const;
//! finds the keypoints and computes descriptors for them using SIFT algorithm.
//! Optionally it can compute descriptors for the user-provided keypoints
void operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints,
Mat& descriptors,
bool useProvidedKeypoints=false) const;
CommonParams getCommonParams () const { return commParams; }
DetectorParams getDetectorParams () const { return detectorParams; }
DescriptorParams getDescriptorParams () const { return descriptorParams; }
protected:
...
};
\end{lstlisting}
\cvclass{SURF}
Class for extracting Speeded Up Robust Features from an image.
\begin{lstlisting}
class SURF : public CvSURFParams
{
public:
// default constructor
SURF();
// constructor that initializes all the algorithm parameters
SURF(double _hessianThreshold, int _nOctaves=4,
int _nOctaveLayers=2, bool _extended=false);
// returns the number of elements in each descriptor (64 or 128)
int descriptorSize() const;
// detects keypoints using fast multi-scale Hessian detector
void operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints) const;
// detects keypoints and computes the SURF descriptors for them
void operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints,
vector<float>& descriptors,
bool useProvidedKeypoints=false) const;
};
\end{lstlisting}
The class \texttt{SURF} implements Speeded Up Robust Features descriptor \cite{Bay06}.
There is fast multi-scale Hessian keypoint detector that can be used to find the keypoints
(which is the default option), but the descriptors can be also computed for the user-specified keypoints.
The function can be used for object tracking and localization, image stitching etc. See the
\texttt{find\_obj.cpp} demo in OpenCV samples directory.
\section{Common Interfaces for Feature Detection and Descriptor Extraction}
Both detectors and descriptors in OpenCV have wrappers with common interface that enables to switch easily
between different algorithms solving the same problem. All objects that implement keypoint detectors inherit
FeatureDetector interface. Descriptors that are represented as vectors in a multidimensional space can be
computed with DescriptorExtractor interface. DescriptorMatcher interface can be used to find matches between
two sets of descriptors. GenericDescriptorMatcher is a more generic interface for descriptors. It does not make any
assumptions about descriptor representation. Every descriptor with DescriptorExtractor interface has a wrapper with
GenericDescriptorMatcher interface (see VectorDescriptorMatch). There are descriptors such as one way descriptor and
ferns that have GenericDescriptorMatcher interface implemented, but do not support DescriptorExtractor.
\cvclass{FeatureDetector}
Abstract base class for 2D image feature detectors.
@ -737,6 +320,14 @@ Ptr<FeatureDetector> createFeatureDetector( const string& detectorType );
\cvarg{detectorType}{Feature detector type, e.g. ''SURF'', ''FAST'', ...}
\end{description}
\section{Common Interfaces of Descriptor Extractors}
Extractors of keypoint descriptors in OpenCV have wrappers with common interface that enables to switch easily
between different algorithms solving the same problem. This section is devoted to computing descriptors
that are represented as vectors in a multidimensional space. All objects that implement ''vector''
descriptor extractors inherit \cvCppCross{DescriptorExtractor} interface.
\cvclass{DescriptorExtractor}
Abstract base class for computing descriptors for image keypoints.
@ -890,6 +481,53 @@ protected:
}
\end{lstlisting}
\cvclass{OpponentColorDescriptorExtractor}
Adapts a descriptor extractor to compute descripors in Opponent Color Space
(refer to van de Sande et al., CGIV 2008 "Color Descriptors for Object Category Recognition").
Input RGB image is transformed in Opponent Color Space. Then unadapted descriptor extractor
(set in constructor) computes descriptors on each of the three channel and concatenate
them into a single color descriptor.
\begin{lstlisting}
class OpponentColorDescriptorExtractor : public DescriptorExtractor
{
public:
OpponentColorDescriptorExtractor( const Ptr<DescriptorExtractor>& dextractor );
virtual void compute( const Mat& image, vector<KeyPoint>& keypoints,
Mat& descriptors ) const;
virtual void read( const FileNode& );
virtual void write( FileStorage& ) const;
virtual int descriptorSize() const;
virtual int descriptorType() const;
protected:
...
};
\end{lstlisting}
\cvCppFunc{createDescriptorExtractor}
Descriptor extractor factory that creates \cvCppCross{DescriptorExtractor} of given type with
default parameters (rather using default constructor).
\begin{lstlisting}
Ptr<DescriptorExtractor>
createDescriptorExtractor( const string& descriptorExtractorType );
\end{lstlisting}
\begin{description}
\cvarg{descriptorExtractorType}{Descriptor extractor type, e.g. ''SURF'', ''SIFT'', ...}
\end{description}
\section{Common Interfaces of Descriptor Matchers}
Matchers of keypoint descriptors in OpenCV have wrappers with common interface that enables to switch easily
between different algorithms solving the same problem. This section is devoted to matching descriptors
that are represented as vectors in a multidimensional space. All objects that implement ''vector''
descriptor matchers inherit \cvCppCross{DescriptorMatcher} interface.
\cvclass{DMatch}
Match between two keypoint descriptors: query descriptor index,
train descriptor index, train image index and distance between descriptors.
@ -1031,7 +669,8 @@ void DescriptorMatcher::match( const Mat\& queryDescs,
\begin{description}
\cvarg{queryDescs}{Query set of descriptors.}
\cvarg{trainDescs}{Train set of descriptors.}
\cvarg{trainDescs}{Train set of descriptors. This will not be added to train descripotors collection
stored in class object.}
\cvarg{matches}{Matches. If some query descriptor masked out in \texttt{mask} no match will be added for this descriptor.
So \texttt{matches} size may be less query descriptors count.}
\cvarg{mask}{Mask specifying permissible matches between input query and train matrices of descriptors.}
@ -1184,6 +823,16 @@ Ptr<DescriptorMatcher> createDescriptorMatcher( const string& descriptorMatcherT
\cvarg{descriptorMatcherType}{Descriptor matcher type, e. g. ''BruteForce'', ''FlannBased'', ...}
\end{description}
\section{Common Interfaces of Generic Descriptor Matchers}
Matchers of keypoint descriptors in OpenCV have wrappers with common interface that enables to switch easily
between different algorithms solving the same problem. This section is devoted to matching descriptors
that can not be represented as vectors in a multidimensional space. \texttt{GenericDescriptorMatcher}
is a more generic interface for descriptors. It does not make any assumptions about descriptor representation.
Every descriptor with \cvCppCross{DescriptorExtractor} interface has a wrapper with
\texttt{GenericDescriptorMatcher} interface (see \cvCppCross{VectorDescriptorMatcher}).
There are descriptors such as One way descriptor and Ferns that have \texttt{GenericDescriptorMatcher}
interface implemented, but do not support \cvCppCross{DescriptorExtractor}.
\cvclass{GenericDescriptorMatcher}
Abstract interface for a keypoint descriptor extracting and matching.
There is \cvCppCross{DescriptorExtractor} and \cvCppCross{DescriptorMatcher}
@ -1352,9 +1001,11 @@ void GenericDescriptorMatcher::match(
\begin{description}
\cvarg{queryImg}{Query image.}
\cvarg{queryPoints}{Keypoint detected in \texttt{queryImg}.}
\cvarg{trainImg}{Train image.}
\cvarg{trainPoints}{Keypoint detected in \texttt{trainImg}.}
\cvarg{queryPoints}{Keypoints detected in \texttt{queryImg}.}
\cvarg{trainImg}{Train image. This will not be added to train image collection
stored in class object.}
\cvarg{trainPoints}{Keypoints detected in \texttt{trainImg}. They will not be added to train points collection
stored in class object.}
\cvarg{matches}{Matches. If some query descriptor (keypoint) masked out in \texttt{mask}
no match will be added for this descriptor.
So \texttt{matches} size may be less query keypoints count.}
@ -1565,6 +1216,7 @@ VectorDescriptorMatcher matcher( new SurfDescriptorExtractor,
new BruteForceMatcher<L2<float> > );
\end{lstlisting}
\section{Drawing Function of Keypoints and Matches}
\cvCppFunc{drawMatches}
This function draws matches of keypints from two images on output image.
Match is a line connecting two keypoints (circles).

@ -0,0 +1,827 @@
\section{Feature detection and description}
\ifCpp
\cvCppFunc{FAST}
Detects corners using FAST algorithm by E. Rosten (''Machine learning for high-speed corner detection'', 2006).
\fi
\cvdefCpp{
void FAST( const Mat\& image, vector<KeyPoint>\& keypoints,
int threshold, bool nonmaxSupression=true );
}
\begin{description}
\cvarg{image}{The image. Keypoints (corners) will be detected on this.}
\cvarg{keypoints}{Keypoints detected on the image.}
\cvarg{threshold}{Threshold on difference between intensity of center pixel and
pixels on circle around this pixel. See description of the algorithm.}
\cvarg{nonmaxSupression}{If it is true then non-maximum supression will be applied to detected corners (keypoints). }
\end{description}
\ifCPy
\ifPy
\cvclass{CvSURFPoint}
A SURF keypoint, represented as a tuple \texttt{((x, y), laplacian, size, dir, hessian)}.
\begin{description}
\cvarg{x}{x-coordinate of the feature within the image}
\cvarg{y}{y-coordinate of the feature within the image}
\cvarg{laplacian}{-1, 0 or +1. sign of the laplacian at the point. Can be used to speedup feature comparison since features with laplacians of different signs can not match}
\cvarg{size}{size of the feature}
\cvarg{dir}{orientation of the feature: 0..360 degrees}
\cvarg{hessian}{value of the hessian (can be used to approximately estimate the feature strengths; see also params.hessianThreshold)}
\end{description}
\fi
\cvCPyFunc{ExtractSURF}
Extracts Speeded Up Robust Features from an image.
\cvdefC{
void cvExtractSURF( \par const CvArr* image,\par const CvArr* mask,\par CvSeq** keypoints,\par CvSeq** descriptors,\par CvMemStorage* storage,\par CvSURFParams params );
}
\cvdefPy{ExtractSURF(image,mask,storage,params)-> (keypoints,descriptors)}
\begin{description}
\cvarg{image}{The input 8-bit grayscale image}
\cvarg{mask}{The optional input 8-bit mask. The features are only found in the areas that contain more than 50\% of non-zero mask pixels}
\ifC
\cvarg{keypoints}{The output parameter; double pointer to the sequence of keypoints. The sequence of CvSURFPoint structures is as follows:}
\begin{lstlisting}
typedef struct CvSURFPoint
{
CvPoint2D32f pt; // position of the feature within the image
int laplacian; // -1, 0 or +1. sign of the laplacian at the point.
// can be used to speedup feature comparison
// (normally features with laplacians of different
// signs can not match)
int size; // size of the feature
float dir; // orientation of the feature: 0..360 degrees
float hessian; // value of the hessian (can be used to
// approximately estimate the feature strengths;
// see also params.hessianThreshold)
}
CvSURFPoint;
\end{lstlisting}
\cvarg{descriptors}{The optional output parameter; double pointer to the sequence of descriptors. Depending on the params.extended value, each element of the sequence will be either a 64-element or a 128-element floating-point (\texttt{CV\_32F}) vector. If the parameter is NULL, the descriptors are not computed}
\else
\cvarg{keypoints}{sequence of keypoints.}
\cvarg{descriptors}{sequence of descriptors. Each SURF descriptor is a list of floats, of length 64 or 128.}
\fi
\cvarg{storage}{Memory storage where keypoints and descriptors will be stored}
\ifC
\cvarg{params}{Various algorithm parameters put to the structure CvSURFParams:}
\begin{lstlisting}
typedef struct CvSURFParams
{
int extended; // 0 means basic descriptors (64 elements each),
// 1 means extended descriptors (128 elements each)
double hessianThreshold; // only features with keypoint.hessian
// larger than that are extracted.
// good default value is ~300-500 (can depend on the
// average local contrast and sharpness of the image).
// user can further filter out some features based on
// their hessian values and other characteristics.
int nOctaves; // the number of octaves to be used for extraction.
// With each next octave the feature size is doubled
// (3 by default)
int nOctaveLayers; // The number of layers within each octave
// (4 by default)
}
CvSURFParams;
CvSURFParams cvSURFParams(double hessianThreshold, int extended=0);
// returns default parameters
\end{lstlisting}
\else
\cvarg{params}{Various algorithm parameters in a tuple \texttt{(extended, hessianThreshold, nOctaves, nOctaveLayers)}:
\begin{description}
\cvarg{extended}{0 means basic descriptors (64 elements each), 1 means extended descriptors (128 elements each)}
\cvarg{hessianThreshold}{only features with hessian larger than that are extracted. good default value is ~300-500 (can depend on the average local contrast and sharpness of the image). user can further filter out some features based on their hessian values and other characteristics.}
\cvarg{nOctaves}{the number of octaves to be used for extraction. With each next octave the feature size is doubled (3 by default)}
\cvarg{nOctaveLayers}{The number of layers within each octave (4 by default)}
\end{description}}
\fi
\end{description}
The function cvExtractSURF finds robust features in the image, as
described in \cite{Bay06}. For each feature it returns its location, size,
orientation and optionally the descriptor, basic or extended. The function
can be used for object tracking and localization, image stitching etc.
\ifC
See the
\texttt{find\_obj.cpp} demo in OpenCV samples directory.
\else
To extract strong SURF features from an image
\begin{lstlisting}
>>> import cv
>>> im = cv.LoadImageM("building.jpg", cv.CV_LOAD_IMAGE_GRAYSCALE)
>>> (keypoints, descriptors) = cv.ExtractSURF(im, None, cv.CreateMemStorage(), (0, 30000, 3, 1))
>>> print len(keypoints), len(descriptors)
6 6
>>> for ((x, y), laplacian, size, dir, hessian) in keypoints:
... print "x=\%d y=\%d laplacian=\%d size=\%d dir=\%f hessian=\%f" \% (x, y, laplacian, size, dir, hessian)
x=30 y=27 laplacian=-1 size=31 dir=69.778503 hessian=36979.789062
x=296 y=197 laplacian=1 size=33 dir=111.081039 hessian=31514.349609
x=296 y=266 laplacian=1 size=32 dir=107.092300 hessian=31477.908203
x=254 y=284 laplacian=1 size=31 dir=279.137360 hessian=34169.800781
x=498 y=525 laplacian=-1 size=33 dir=278.006592 hessian=31002.759766
x=777 y=281 laplacian=1 size=70 dir=167.940964 hessian=35538.363281
\end{lstlisting}
\fi
\cvCPyFunc{GetStarKeypoints}
Retrieves keypoints using the StarDetector algorithm.
\cvdefC{
CvSeq* cvGetStarKeypoints( \par const CvArr* image,\par CvMemStorage* storage,\par CvStarDetectorParams params=cvStarDetectorParams() );
}
\cvdefPy{GetStarKeypoints(image,storage,params)-> keypoints}
\begin{description}
\cvarg{image}{The input 8-bit grayscale image}
\cvarg{storage}{Memory storage where the keypoints will be stored}
\ifC
\cvarg{params}{Various algorithm parameters given to the structure CvStarDetectorParams:}
\begin{lstlisting}
typedef struct CvStarDetectorParams
{
int maxSize; // maximal size of the features detected. The following
// values of the parameter are supported:
// 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128
int responseThreshold; // threshold for the approximatd laplacian,
// used to eliminate weak features
int lineThresholdProjected; // another threshold for laplacian to
// eliminate edges
int lineThresholdBinarized; // another threshold for the feature
// scale to eliminate edges
int suppressNonmaxSize; // linear size of a pixel neighborhood
// for non-maxima suppression
}
CvStarDetectorParams;
\end{lstlisting}
\else
\cvarg{params}{Various algorithm parameters in a tuple \texttt{(maxSize, responseThreshold, lineThresholdProjected, lineThresholdBinarized, suppressNonmaxSize)}:
\begin{description}
\cvarg{maxSize}{maximal size of the features detected. The following values of the parameter are supported: 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128}
\cvarg{responseThreshold}{threshold for the approximatd laplacian, used to eliminate weak features}
\cvarg{lineThresholdProjected}{another threshold for laplacian to eliminate edges}
\cvarg{lineThresholdBinarized}{another threshold for the feature scale to eliminate edges}
\cvarg{suppressNonmaxSize}{linear size of a pixel neighborhood for non-maxima suppression}
\end{description}
}
\fi
\end{description}
The function GetStarKeypoints extracts keypoints that are local
scale-space extremas. The scale-space is constructed by computing
approximate values of laplacians with different sigma's at each
pixel. Instead of using pyramids, a popular approach to save computing
time, all of the laplacians are computed at each pixel of the original
high-resolution image. But each approximate laplacian value is computed
in O(1) time regardless of the sigma, thanks to the use of integral
images. The algorithm is based on the paper
Agrawal08
, but instead
of a square, hexagon or octagon it uses an 8-end star shape, hence the name,
consisting of overlapping upright and tilted squares.
\ifC
Each computed feature is represented by the following structure:
\begin{lstlisting}
typedef struct CvStarKeypoint
{
CvPoint pt; // coordinates of the feature
int size; // feature size, see CvStarDetectorParams::maxSize
float response; // the approximated laplacian value at that point.
}
CvStarKeypoint;
inline CvStarKeypoint cvStarKeypoint(CvPoint pt, int size, float response);
\end{lstlisting}
\else
Each keypoint is represented by a tuple \texttt{((x, y), size, response)}:
\begin{description}
\cvarg{x, y}{Screen coordinates of the keypoint}
\cvarg{size}{feature size, up to \texttt{maxSize}}
\cvarg{response}{approximated laplacian value for the keypoint}
\end{description}
\fi
\ifC
Below is the small usage sample:
\begin{lstlisting}
#include "cv.h"
#include "highgui.h"
int main(int argc, char** argv)
{
const char* filename = argc > 1 ? argv[1] : "lena.jpg";
IplImage* img = cvLoadImage( filename, 0 ), *cimg;
CvMemStorage* storage = cvCreateMemStorage(0);
CvSeq* keypoints = 0;
int i;
if( !img )
return 0;
cvNamedWindow( "image", 1 );
cvShowImage( "image", img );
cvNamedWindow( "features", 1 );
cimg = cvCreateImage( cvGetSize(img), 8, 3 );
cvCvtColor( img, cimg, CV_GRAY2BGR );
keypoints = cvGetStarKeypoints( img, storage, cvStarDetectorParams(45) );
for( i = 0; i < (keypoints ? keypoints->total : 0); i++ )
{
CvStarKeypoint kpt = *(CvStarKeypoint*)cvGetSeqElem(keypoints, i);
int r = kpt.size/2;
cvCircle( cimg, kpt.pt, r, CV_RGB(0,255,0));
cvLine( cimg, cvPoint(kpt.pt.x + r, kpt.pt.y + r),
cvPoint(kpt.pt.x - r, kpt.pt.y - r), CV_RGB(0,255,0));
cvLine( cimg, cvPoint(kpt.pt.x - r, kpt.pt.y + r),
cvPoint(kpt.pt.x + r, kpt.pt.y - r), CV_RGB(0,255,0));
}
cvShowImage( "features", cimg );
cvWaitKey();
}
\end{lstlisting}
\fi
\fi
\ifCpp
\cvclass{MSER}
Maximally-Stable Extremal Region Extractor
\begin{lstlisting}
class MSER : public CvMSERParams
{
public:
// default constructor
MSER();
// constructor that initializes all the algorithm parameters
MSER( int _delta, int _min_area, int _max_area,
float _max_variation, float _min_diversity,
int _max_evolution, double _area_threshold,
double _min_margin, int _edge_blur_size );
// runs the extractor on the specified image; returns the MSERs,
// each encoded as a contour (vector<Point>, see findContours)
// the optional mask marks the area where MSERs are searched for
void operator()( const Mat& image, vector<vector<Point> >& msers, const Mat& mask ) const;
};
\end{lstlisting}
The class encapsulates all the parameters of MSER (see \url{http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions}) extraction algorithm.
\cvclass{StarDetector}
Implements Star keypoint detector
\begin{lstlisting}
class StarDetector : CvStarDetectorParams
{
public:
// default constructor
StarDetector();
// the full constructor initialized all the algorithm parameters:
// maxSize - maximum size of the features. The following
// values of the parameter are supported:
// 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128
// responseThreshold - threshold for the approximated laplacian,
// used to eliminate weak features. The larger it is,
// the less features will be retrieved
// lineThresholdProjected - another threshold for the laplacian to
// eliminate edges
// lineThresholdBinarized - another threshold for the feature
// size to eliminate edges.
// The larger the 2 threshold, the more points you get.
StarDetector(int maxSize, int responseThreshold,
int lineThresholdProjected,
int lineThresholdBinarized,
int suppressNonmaxSize);
// finds keypoints in an image
void operator()(const Mat& image, vector<KeyPoint>& keypoints) const;
};
\end{lstlisting}
The class implements a modified version of CenSurE keypoint detector described in
\cite{Agrawal08}
\cvclass{SIFT}
Class for extracting keypoints and computing descriptors using approach named Scale Invariant Feature Transform (SIFT).
\begin{lstlisting}
class CV_EXPORTS SIFT
{
public:
struct CommonParams
{
static const int DEFAULT_NOCTAVES = 4;
static const int DEFAULT_NOCTAVE_LAYERS = 3;
static const int DEFAULT_FIRST_OCTAVE = -1;
enum{ FIRST_ANGLE = 0, AVERAGE_ANGLE = 1 };
CommonParams();
CommonParams( int _nOctaves, int _nOctaveLayers, int _firstOctave,
int _angleMode );
int nOctaves, nOctaveLayers, firstOctave;
int angleMode;
};
struct DetectorParams
{
static double GET_DEFAULT_THRESHOLD()
{ return 0.04 / SIFT::CommonParams::DEFAULT_NOCTAVE_LAYERS / 2.0; }
static double GET_DEFAULT_EDGE_THRESHOLD() { return 10.0; }
DetectorParams();
DetectorParams( double _threshold, double _edgeThreshold );
double threshold, edgeThreshold;
};
struct DescriptorParams
{
static double GET_DEFAULT_MAGNIFICATION() { return 3.0; }
static const bool DEFAULT_IS_NORMALIZE = true;
static const int DESCRIPTOR_SIZE = 128;
DescriptorParams();
DescriptorParams( double _magnification, bool _isNormalize,
bool _recalculateAngles );
double magnification;
bool isNormalize;
bool recalculateAngles;
};
SIFT();
//! sift-detector constructor
SIFT( double _threshold, double _edgeThreshold,
int _nOctaves=CommonParams::DEFAULT_NOCTAVES,
int _nOctaveLayers=CommonParams::DEFAULT_NOCTAVE_LAYERS,
int _firstOctave=CommonParams::DEFAULT_FIRST_OCTAVE,
int _angleMode=CommonParams::FIRST_ANGLE );
//! sift-descriptor constructor
SIFT( double _magnification, bool _isNormalize=true,
bool _recalculateAngles = true,
int _nOctaves=CommonParams::DEFAULT_NOCTAVES,
int _nOctaveLayers=CommonParams::DEFAULT_NOCTAVE_LAYERS,
int _firstOctave=CommonParams::DEFAULT_FIRST_OCTAVE,
int _angleMode=CommonParams::FIRST_ANGLE );
SIFT( const CommonParams& _commParams,
const DetectorParams& _detectorParams = DetectorParams(),
const DescriptorParams& _descriptorParams = DescriptorParams() );
//! returns the descriptor size in floats (128)
int descriptorSize() const { return DescriptorParams::DESCRIPTOR_SIZE; }
//! finds the keypoints using SIFT algorithm
void operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints) const;
//! finds the keypoints and computes descriptors for them using SIFT algorithm.
//! Optionally it can compute descriptors for the user-provided keypoints
void operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints,
Mat& descriptors,
bool useProvidedKeypoints=false) const;
CommonParams getCommonParams () const { return commParams; }
DetectorParams getDetectorParams () const { return detectorParams; }
DescriptorParams getDescriptorParams () const { return descriptorParams; }
protected:
...
};
\end{lstlisting}
\cvclass{SURF}
Class for extracting Speeded Up Robust Features from an image.
\begin{lstlisting}
class SURF : public CvSURFParams
{
public:
// default constructor
SURF();
// constructor that initializes all the algorithm parameters
SURF(double _hessianThreshold, int _nOctaves=4,
int _nOctaveLayers=2, bool _extended=false);
// returns the number of elements in each descriptor (64 or 128)
int descriptorSize() const;
// detects keypoints using fast multi-scale Hessian detector
void operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints) const;
// detects keypoints and computes the SURF descriptors for them
void operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints,
vector<float>& descriptors,
bool useProvidedKeypoints=false) const;
};
\end{lstlisting}
The class \texttt{SURF} implements Speeded Up Robust Features descriptor \cite{Bay06}.
There is fast multi-scale Hessian keypoint detector that can be used to find the keypoints
(which is the default option), but the descriptors can be also computed for the user-specified keypoints.
The function can be used for object tracking and localization, image stitching etc. See the
\texttt{find\_obj.cpp} demo in OpenCV samples directory.
\cvclass{RandomizedTree}
The class contains base structure for \texttt{RTreeClassifier}
\begin{lstlisting}
class CV_EXPORTS RandomizedTree
{
public:
friend class RTreeClassifier;
RandomizedTree();
~RandomizedTree();
void train(std::vector<BaseKeypoint> const& base_set,
cv::RNG &rng, int depth, int views,
size_t reduced_num_dim, int num_quant_bits);
void train(std::vector<BaseKeypoint> const& base_set,
cv::RNG &rng, PatchGenerator &make_patch, int depth,
int views, size_t reduced_num_dim, int num_quant_bits);
// following two funcs are EXPERIMENTAL
//(do not use unless you know exactly what you do)
static void quantizeVector(float *vec, int dim, int N, float bnds[2],
int clamp_mode=0);
static void quantizeVector(float *src, int dim, int N, float bnds[2],
uchar *dst);
// patch_data must be a 32x32 array (no row padding)
float* getPosterior(uchar* patch_data);
const float* getPosterior(uchar* patch_data) const;
uchar* getPosterior2(uchar* patch_data);
void read(const char* file_name, int num_quant_bits);
void read(std::istream &is, int num_quant_bits);
void write(const char* file_name) const;
void write(std::ostream &os) const;
int classes() { return classes_; }
int depth() { return depth_; }
void discardFloatPosteriors() { freePosteriors(1); }
inline void applyQuantization(int num_quant_bits)
{ makePosteriors2(num_quant_bits); }
private:
int classes_;
int depth_;
int num_leaves_;
std::vector<RTreeNode> nodes_;
float **posteriors_; // 16-bytes aligned posteriors
uchar **posteriors2_; // 16-bytes aligned posteriors
std::vector<int> leaf_counts_;
void createNodes(int num_nodes, cv::RNG &rng);
void allocPosteriorsAligned(int num_leaves, int num_classes);
void freePosteriors(int which);
// which: 1=posteriors_, 2=posteriors2_, 3=both
void init(int classes, int depth, cv::RNG &rng);
void addExample(int class_id, uchar* patch_data);
void finalize(size_t reduced_num_dim, int num_quant_bits);
int getIndex(uchar* patch_data) const;
inline float* getPosteriorByIndex(int index);
inline uchar* getPosteriorByIndex2(int index);
inline const float* getPosteriorByIndex(int index) const;
void convertPosteriorsToChar();
void makePosteriors2(int num_quant_bits);
void compressLeaves(size_t reduced_num_dim);
void estimateQuantPercForPosteriors(float perc[2]);
};
\end{lstlisting}
\cvCppFunc{RandomizedTree::train}
Trains a randomized tree using input set of keypoints
\cvdefCpp{
void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
int num\_quant\_bits);
}
\cvdefCpp{
void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
int num\_quant\_bits);
}
\begin{description}
\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
\cvarg{rng} {Random numbers generator is used for training}
\cvarg{make\_patch} {Patch generator is used for training}
\cvarg{depth} {Maximum tree depth}
%\cvarg{views} {}
\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\end{description}
\cvCppFunc{RandomizedTree::read}
Reads pre-saved randomized tree from file or stream
\cvdefCpp{read(const char* file\_name, int num\_quant\_bits)}
\cvdefCpp{read(std::istream \&is, int num\_quant\_bits)}
\begin{description}
\cvarg{file\_name}{Filename of file contains randomized tree data}
\cvarg{is}{Input stream associated with file contains randomized tree data}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\end{description}
\cvCppFunc{RandomizedTree::write}
Writes current randomized tree to a file or stream
\cvdefCpp{void write(const char* file\_name) const;}
\cvdefCpp{void write(std::ostream \&os) const;}
\begin{description}
\cvarg{file\_name}{Filename of file where randomized tree data will be stored}
\cvarg{is}{Output stream associated with file where randomized tree data will be stored}
\end{description}
\cvCppFunc{RandomizedTree::applyQuantization}
Applies quantization to the current randomized tree
\cvdefCpp{void applyQuantization(int num\_quant\_bits)}
\begin{description}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\end{description}
\cvclass{RTreeNode}
The class contains base structure for \texttt{RandomizedTree}
\begin{lstlisting}
struct RTreeNode
{
short offset1, offset2;
RTreeNode() {}
RTreeNode(uchar x1, uchar y1, uchar x2, uchar y2)
: offset1(y1*PATCH_SIZE + x1),
offset2(y2*PATCH_SIZE + x2)
{}
//! Left child on 0, right child on 1
inline bool operator() (uchar* patch_data) const
{
return patch_data[offset1] > patch_data[offset2];
}
};
\end{lstlisting}
\cvclass{RTreeClassifier}
The class contains \texttt{RTreeClassifier}. It represents calonder descriptor which was originally introduced by Michael Calonder
\begin{lstlisting}
class CV_EXPORTS RTreeClassifier
{
public:
static const int DEFAULT_TREES = 48;
static const size_t DEFAULT_NUM_QUANT_BITS = 4;
RTreeClassifier();
void train(std::vector<BaseKeypoint> const& base_set,
cv::RNG &rng,
int num_trees = RTreeClassifier::DEFAULT_TREES,
int depth = DEFAULT_DEPTH,
int views = DEFAULT_VIEWS,
size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
bool print_status = true);
void train(std::vector<BaseKeypoint> const& base_set,
cv::RNG &rng,
PatchGenerator &make_patch,
int num_trees = RTreeClassifier::DEFAULT_TREES,
int depth = DEFAULT_DEPTH,
int views = DEFAULT_VIEWS,
size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
bool print_status = true);
// sig must point to a memory block of at least
//classes()*sizeof(float|uchar) bytes
void getSignature(IplImage *patch, uchar *sig);
void getSignature(IplImage *patch, float *sig);
void getSparseSignature(IplImage *patch, float *sig,
float thresh);
static int countNonZeroElements(float *vec, int n, double tol=1e-10);
static inline void safeSignatureAlloc(uchar **sig, int num_sig=1,
int sig_len=176);
static inline uchar* safeSignatureAlloc(int num_sig=1,
int sig_len=176);
inline int classes() { return classes_; }
inline int original_num_classes()
{ return original_num_classes_; }
void setQuantization(int num_quant_bits);
void discardFloatPosteriors();
void read(const char* file_name);
void read(std::istream &is);
void write(const char* file_name) const;
void write(std::ostream &os) const;
std::vector<RandomizedTree> trees_;
private:
int classes_;
int num_quant_bits_;
uchar **posteriors_;
ushort *ptemp_;
int original_num_classes_;
bool keep_floats_;
};
\end{lstlisting}
\cvCppFunc{RTreeClassifier::train}
Trains a randomized tree classificator using input set of keypoints
\cvdefCpp{
void train(std::vector<BaseKeypoint> const\& base\_set,
cv::RNG \&rng,
int num\_trees = RTreeClassifier::DEFAULT\_TREES,
int depth = DEFAULT\_DEPTH,
int views = DEFAULT\_VIEWS,
size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
}
\cvdefCpp{
void train(std::vector<BaseKeypoint> const\& base\_set,
cv::RNG \&rng,
PatchGenerator \&make\_patch,
int num\_trees = RTreeClassifier::DEFAULT\_TREES,
int depth = DEFAULT\_DEPTH,
int views = DEFAULT\_VIEWS,
size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
}
\begin{description}
\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
\cvarg{rng} {Random numbers generator is used for training}
\cvarg{make\_patch} {Patch generator is used for training}
\cvarg{num\_trees} {Number of randomized trees used in RTreeClassificator}
\cvarg{depth} {Maximum tree depth}
%\cvarg{views} {}
\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\cvarg{print\_status} {Print current status of training on the console}
\end{description}
\cvCppFunc{RTreeClassifier::getSignature}
Returns signature for image patch
\cvdefCpp{
void getSignature(IplImage *patch, uchar *sig)
}
\cvdefCpp{
void getSignature(IplImage *patch, float *sig)
}
\begin{description}
\cvarg{patch} {Image patch to calculate signature for}
\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
\end{description}
\cvCppFunc{RTreeClassifier::getSparseSignature}
The function is simular to \texttt{getSignature} but uses the threshold for removing all signature elements less than the threshold. So that the signature is compressed
\cvdefCpp{
void getSparseSignature(IplImage *patch, float *sig,
float thresh);
}
\begin{description}
\cvarg{patch} {Image patch to calculate signature for}
\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
\cvarg{tresh} {The threshold that is used for compressing the signature}
\end{description}
\cvCppFunc{RTreeClassifier::countNonZeroElements}
The function returns the number of non-zero elements in the input array.
\cvdefCpp{
static int countNonZeroElements(float *vec, int n, double tol=1e-10);
}
\begin{description}
\cvarg{vec}{Input vector contains float elements}
\cvarg{n}{Input vector size}
\cvarg{tol} {The threshold used for elements counting. We take all elements are less than \texttt{tol} as zero elements}
\end{description}
\cvCppFunc{RTreeClassifier::read}
Reads pre-saved RTreeClassifier from file or stream
\cvdefCpp{read(const char* file\_name)}
\cvdefCpp{read(std::istream \&is)}
\begin{description}
\cvarg{file\_name}{Filename of file contains randomized tree data}
\cvarg{is}{Input stream associated with file contains randomized tree data}
\end{description}
\cvCppFunc{RTreeClassifier::write}
Writes current RTreeClassifier to a file or stream
\cvdefCpp{void write(const char* file\_name) const;}
\cvdefCpp{void write(std::ostream \&os) const;}
\begin{description}
\cvarg{file\_name}{Filename of file where randomized tree data will be stored}
\cvarg{is}{Output stream associated with file where randomized tree data will be stored}
\end{description}
\cvCppFunc{RTreeClassifier::setQuantization}
Applies quantization to the current randomized tree
\cvdefCpp{void setQuantization(int num\_quant\_bits)}
\begin{description}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\end{description}
Below there is an example of \texttt{RTreeClassifier} usage for feature matching. There are test and train images and we extract features from both with SURF. Output is $best\_corr$ and $best\_corr\_idx$ arrays which keep the best probabilities and corresponding features indexes for every train feature.
% ===== Example. Using RTreeClassifier for features matching =====
\begin{lstlisting}
CvMemStorage* storage = cvCreateMemStorage(0);
CvSeq *objectKeypoints = 0, *objectDescriptors = 0;
CvSeq *imageKeypoints = 0, *imageDescriptors = 0;
CvSURFParams params = cvSURFParams(500, 1);
cvExtractSURF( test_image, 0, &imageKeypoints, &imageDescriptors,
storage, params );
cvExtractSURF( train_image, 0, &objectKeypoints, &objectDescriptors,
storage, params );
cv::RTreeClassifier detector;
int patch_width = cv::PATCH_SIZE;
iint patch_height = cv::PATCH_SIZE;
vector<cv::BaseKeypoint> base_set;
int i=0;
CvSURFPoint* point;
for (i=0;i<(n_points > 0 ? n_points : objectKeypoints->total);i++)
{
point=(CvSURFPoint*)cvGetSeqElem(objectKeypoints,i);
base_set.push_back(
cv::BaseKeypoint(point->pt.x,point->pt.y,train_image));
}
//Detector training
cv::RNG rng( cvGetTickCount() );
cv::PatchGenerator gen(0,255,2,false,0.7,1.3,-CV_PI/3,CV_PI/3,
-CV_PI/3,CV_PI/3);
printf("RTree Classifier training...\n");
detector.train(base_set,rng,gen,24,cv::DEFAULT_DEPTH,2000,
(int)base_set.size(), detector.DEFAULT_NUM_QUANT_BITS);
printf("Done\n");
float* signature = new float[detector.original_num_classes()];
float* best_corr;
int* best_corr_idx;
if (imageKeypoints->total > 0)
{
best_corr = new float[imageKeypoints->total];
best_corr_idx = new int[imageKeypoints->total];
}
for(i=0; i < imageKeypoints->total; i++)
{
point=(CvSURFPoint*)cvGetSeqElem(imageKeypoints,i);
int part_idx = -1;
float prob = 0.0f;
CvRect roi = cvRect((int)(point->pt.x) - patch_width/2,
(int)(point->pt.y) - patch_height/2,
patch_width, patch_height);
cvSetImageROI(test_image, roi);
roi = cvGetImageROI(test_image);
if(roi.width != patch_width || roi.height != patch_height)
{
best_corr_idx[i] = part_idx;
best_corr[i] = prob;
}
else
{
cvSetImageROI(test_image, roi);
IplImage* roi_image =
cvCreateImage(cvSize(roi.width, roi.height),
test_image->depth, test_image->nChannels);
cvCopy(test_image,roi_image);
detector.getSignature(roi_image, signature);
for (int j = 0; j< detector.original_num_classes();j++)
{
if (prob < signature[j])
{
part_idx = j;
prob = signature[j];
}
}
best_corr_idx[i] = part_idx;
best_corr[i] = prob;
if (roi_image)
cvReleaseImage(&roi_image);
}
cvResetImageROI(test_image);
}
\end{lstlisting}
\fi

@ -0,0 +1,200 @@
\ifCpp
\section{Object Categorization}
Some approaches based on local 2D features and used to object categorization
are described in this section.
\cvclass{BOWTrainer}
Abstract base class for training ''bag of visual words'' vocabulary from a set of descriptors.
See e.g. ''Visual Categorization with Bags of Keypoints'' of Gabriella Csurka, Christopher R. Dance,
Lixin Fan, Jutta Willamowski, Cedric Bray, 2004.
\begin{lstlisting}
class BOWTrainer
{
public:
BOWTrainer(){}
virtual ~BOWTrainer(){}
void add( const Mat& descriptors );
const vector<Mat>& getDescriptors() const;
int descripotorsCount() const;
virtual void clear();
virtual Mat cluster() const = 0;
virtual Mat cluster( const Mat& descriptors ) const = 0;
protected:
...
};
\end{lstlisting}
\cvCppFunc{BOWTrainer::add}
Add descriptors to training set. The training set will be clustered using \texttt{cluster}
method to construct vocabulary.
\cvdefCpp{
void BOWTrainer::add( const Mat\& descriptors );
}
\begin{description}
\cvarg{descriptors}{Descriptors to add to training set. Each row of \texttt{descriptors}
matrix is a one descriptor.}
\end{description}
\cvCppFunc{BOWTrainer::getDescriptors}
Returns training set of descriptors.
\cvdefCpp{
const vector<Mat>\& BOWTrainer::getDescriptors() const;
}
\cvCppFunc{BOWTrainer::descripotorsCount}
Returns count of all descriptors stored in the training set.
\cvdefCpp{
const vector<Mat>\& BOWTrainer::descripotorsCount() const;
}
\cvCppFunc{BOWTrainer::cluster}
Cluster train descriptors. Vocabulary consists from cluster centers. So this method
returns vocabulary. In first method variant the stored in object train descriptors will be
clustered, in second variant -- input descriptors will be clustered.
\cvdefCpp{
Mat BOWTrainer::cluster() const;
}
\cvdefCpp{
Mat BOWTrainer::cluster( const Mat\& descriptors ) const;
}
\begin{description}
\cvarg{descriptors}{Descriptors to cluster. Each row of \texttt{descriptors}
matrix is a one descriptor. Descriptors will not be added
to the inner train descriptor set.}
\end{description}
\cvclass{BOWKMeansTrainer}
\cvCppCross{kmeans} based class to train visual vocabulary using the ''bag of visual words'' approach.
\begin{lstlisting}
class BOWKMeansTrainer : public BOWTrainer
{
public:
BOWKMeansTrainer( int clusterCount, const TermCriteria& termcrit=TermCriteria(),
int attempts=3, int flags=KMEANS_PP_CENTERS );
virtual ~BOWKMeansTrainer(){}
// Returns trained vocabulary (i.e. cluster centers).
virtual Mat cluster() const;
virtual Mat cluster( const Mat& descriptors ) const;
protected:
...
};
\end{lstlisting}
To gain an understanding of constructor parameters see \cvCppCross{kmeans} function
arguments.
\cvclass{BOWImgDescriptorExtractor}
Class to compute image descriptor using ''bad of visual words''. In few,
such computing consists from the following steps:
1. Compute descriptors for given image and it's keypoints set, \\
2. Find nearest visual words from vocabulary for each keypoint descriptor, \\
3. Image descriptor is a normalized histogram of vocabulary words encountered in the image. I.e.
\texttt{i}-bin of the histogram is a frequency of \texttt{i}-word of vocabulary in the given image.
\begin{lstlisting}
class BOWImgDescriptorExtractor
{
public:
BOWImgDescriptorExtractor( const Ptr<DescriptorExtractor>& dextractor,
const Ptr<DescriptorMatcher>& dmatcher );
virtual ~BOWImgDescriptorExtractor(){}
void setVocabulary( const Mat& vocabulary );
const Mat& getVocabulary() const;
void compute( const Mat& image, vector<KeyPoint>& keypoints,
Mat& imgDescriptor,
vector<vector<int> >* pointIdxsOfClusters=0,
Mat* descriptors=0 );
int descriptorSize() const;
int descriptorType() const;
protected:
...
};
\end{lstlisting}
\cvCppFunc{BOWImgDescriptorExtractor::BOWImgDescriptorExtractor}
Constructor.
\cvdefCpp{
BOWImgDescriptorExtractor::BOWImgDescriptorExtractor(
\par const Ptr<DescriptorExtractor>\& dextractor,
\par const Ptr<DescriptorMatcher>\& dmatcher );
}
\begin{description}
\cvarg{dextractor}{Descriptor extractor that will be used to compute descriptors
for input image and it's keypoints.}
\cvarg{dmatcher}{Descriptor matcher that will be used to find nearest word of trained vocabulary to
each keupoints descriptor of the image.}
\end{description}
\cvCppFunc{BOWImgDescriptorExtractor::setVocabulary}
Method to set visual vocabulary.
\cvdefCpp{
void BOWImgDescriptorExtractor::setVocabulary( const Mat\& vocabulary );
}
\begin{description}
\cvarg{vocabulary}{Vocabulary (can be trained using inheritor of \cvCppCross{BOWTrainer}).
Each row of vocabulary is a one visual word (cluster center).}
\end{description}
\cvCppFunc{BOWImgDescriptorExtractor::getVocabulary}
Returns set vocabulary.
\cvdefCpp{
const Mat\& BOWImgDescriptorExtractor::getVocabulary() const;
}
\cvCppFunc{BOWImgDescriptorExtractor::compute}
Compute image descriptor using set visual vocabulary.
\cvdefCpp{
void BOWImgDescriptorExtractor::compute( const Mat\& image,
\par vector<KeyPoint>\& keypoints, Mat\& imgDescriptor,
\par vector<vector<int> >* pointIdxsOfClusters=0,
\par Mat* descriptors=0 );
}
\begin{description}
\cvarg{image}{The image. Image descriptor will be computed for this.}
\cvarg{keypoints}{Keypoints detected in the input image.}
\cvarg{imgDescriptor}{This is output, i.e. computed image descriptor.}
\cvarg{pointIdxsOfClusters}{Indices of keypoints which belong to the cluster, i.e.
\texttt{pointIdxsOfClusters[i]} is keypoint indices which belong
to the \texttt{i-}cluster (word of vocabulary) (returned if it is not 0.)}
\cvarg{descriptors}{Descriptors of the image keypoints (returned if it is not 0.)}
\end{description}
\cvCppFunc{BOWImgDescriptorExtractor::descriptorSize}
Returns image discriptor size, if vocabulary was set, and 0 otherwise.
\cvdefCpp{
int BOWImgDescriptorExtractor::descriptorSize() const;
}
\cvCppFunc{BOWImgDescriptorExtractor::descriptorType}
Returns image descriptor type.
\cvdefCpp{
int BOWImgDescriptorExtractor::descriptorType() const;
}
\fi

@ -1,402 +0,0 @@
\section{Object detection and descriptors}
\ifCpp
\cvclass{RandomizedTree}
The class contains base structure for \texttt{RTreeClassifier}
\begin{lstlisting}
class CV_EXPORTS RandomizedTree
{
public:
friend class RTreeClassifier;
RandomizedTree();
~RandomizedTree();
void train(std::vector<BaseKeypoint> const& base_set,
cv::RNG &rng, int depth, int views,
size_t reduced_num_dim, int num_quant_bits);
void train(std::vector<BaseKeypoint> const& base_set,
cv::RNG &rng, PatchGenerator &make_patch, int depth,
int views, size_t reduced_num_dim, int num_quant_bits);
// following two funcs are EXPERIMENTAL
//(do not use unless you know exactly what you do)
static void quantizeVector(float *vec, int dim, int N, float bnds[2],
int clamp_mode=0);
static void quantizeVector(float *src, int dim, int N, float bnds[2],
uchar *dst);
// patch_data must be a 32x32 array (no row padding)
float* getPosterior(uchar* patch_data);
const float* getPosterior(uchar* patch_data) const;
uchar* getPosterior2(uchar* patch_data);
void read(const char* file_name, int num_quant_bits);
void read(std::istream &is, int num_quant_bits);
void write(const char* file_name) const;
void write(std::ostream &os) const;
int classes() { return classes_; }
int depth() { return depth_; }
void discardFloatPosteriors() { freePosteriors(1); }
inline void applyQuantization(int num_quant_bits)
{ makePosteriors2(num_quant_bits); }
private:
int classes_;
int depth_;
int num_leaves_;
std::vector<RTreeNode> nodes_;
float **posteriors_; // 16-bytes aligned posteriors
uchar **posteriors2_; // 16-bytes aligned posteriors
std::vector<int> leaf_counts_;
void createNodes(int num_nodes, cv::RNG &rng);
void allocPosteriorsAligned(int num_leaves, int num_classes);
void freePosteriors(int which);
// which: 1=posteriors_, 2=posteriors2_, 3=both
void init(int classes, int depth, cv::RNG &rng);
void addExample(int class_id, uchar* patch_data);
void finalize(size_t reduced_num_dim, int num_quant_bits);
int getIndex(uchar* patch_data) const;
inline float* getPosteriorByIndex(int index);
inline uchar* getPosteriorByIndex2(int index);
inline const float* getPosteriorByIndex(int index) const;
void convertPosteriorsToChar();
void makePosteriors2(int num_quant_bits);
void compressLeaves(size_t reduced_num_dim);
void estimateQuantPercForPosteriors(float perc[2]);
};
\end{lstlisting}
\cvCppFunc{RandomizedTree::train}
Trains a randomized tree using input set of keypoints
\cvdefCpp{
void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
int num\_quant\_bits);
}
\cvdefCpp{
void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
int num\_quant\_bits);
}
\begin{description}
\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
\cvarg{rng} {Random numbers generator is used for training}
\cvarg{make\_patch} {Patch generator is used for training}
\cvarg{depth} {Maximum tree depth}
%\cvarg{views} {}
\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\end{description}
\cvCppFunc{RandomizedTree::read}
Reads pre-saved randomized tree from file or stream
\cvdefCpp{read(const char* file\_name, int num\_quant\_bits)}
\cvdefCpp{read(std::istream \&is, int num\_quant\_bits)}
\begin{description}
\cvarg{file\_name}{Filename of file contains randomized tree data}
\cvarg{is}{Input stream associated with file contains randomized tree data}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\end{description}
\cvCppFunc{RandomizedTree::write}
Writes current randomized tree to a file or stream
\cvdefCpp{void write(const char* file\_name) const;}
\cvdefCpp{void write(std::ostream \&os) const;}
\begin{description}
\cvarg{file\_name}{Filename of file where randomized tree data will be stored}
\cvarg{is}{Output stream associated with file where randomized tree data will be stored}
\end{description}
\cvCppFunc{RandomizedTree::applyQuantization}
Applies quantization to the current randomized tree
\cvdefCpp{void applyQuantization(int num\_quant\_bits)}
\begin{description}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\end{description}
\cvstruct{RTreeNode}
The class contains base structure for \texttt{RandomizedTree}
\begin{lstlisting}
struct RTreeNode
{
short offset1, offset2;
RTreeNode() {}
RTreeNode(uchar x1, uchar y1, uchar x2, uchar y2)
: offset1(y1*PATCH_SIZE + x1),
offset2(y2*PATCH_SIZE + x2)
{}
//! Left child on 0, right child on 1
inline bool operator() (uchar* patch_data) const
{
return patch_data[offset1] > patch_data[offset2];
}
};
\end{lstlisting}
\cvclass{RTreeClassifier}
The class contains \texttt{RTreeClassifier}. It represents calonder descriptor which was originally introduced by Michael Calonder
\begin{lstlisting}
class CV_EXPORTS RTreeClassifier
{
public:
static const int DEFAULT_TREES = 48;
static const size_t DEFAULT_NUM_QUANT_BITS = 4;
RTreeClassifier();
void train(std::vector<BaseKeypoint> const& base_set,
cv::RNG &rng,
int num_trees = RTreeClassifier::DEFAULT_TREES,
int depth = DEFAULT_DEPTH,
int views = DEFAULT_VIEWS,
size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
bool print_status = true);
void train(std::vector<BaseKeypoint> const& base_set,
cv::RNG &rng,
PatchGenerator &make_patch,
int num_trees = RTreeClassifier::DEFAULT_TREES,
int depth = DEFAULT_DEPTH,
int views = DEFAULT_VIEWS,
size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
bool print_status = true);
// sig must point to a memory block of at least
//classes()*sizeof(float|uchar) bytes
void getSignature(IplImage *patch, uchar *sig);
void getSignature(IplImage *patch, float *sig);
void getSparseSignature(IplImage *patch, float *sig,
float thresh);
static int countNonZeroElements(float *vec, int n, double tol=1e-10);
static inline void safeSignatureAlloc(uchar **sig, int num_sig=1,
int sig_len=176);
static inline uchar* safeSignatureAlloc(int num_sig=1,
int sig_len=176);
inline int classes() { return classes_; }
inline int original_num_classes()
{ return original_num_classes_; }
void setQuantization(int num_quant_bits);
void discardFloatPosteriors();
void read(const char* file_name);
void read(std::istream &is);
void write(const char* file_name) const;
void write(std::ostream &os) const;
std::vector<RandomizedTree> trees_;
private:
int classes_;
int num_quant_bits_;
uchar **posteriors_;
ushort *ptemp_;
int original_num_classes_;
bool keep_floats_;
};
\end{lstlisting}
\cvCppFunc{RTreeClassifier::train}
Trains a randomized tree classificator using input set of keypoints
\cvdefCpp{
void train(std::vector<BaseKeypoint> const\& base\_set,
cv::RNG \&rng,
int num\_trees = RTreeClassifier::DEFAULT\_TREES,
int depth = DEFAULT\_DEPTH,
int views = DEFAULT\_VIEWS,
size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
}
\cvdefCpp{
void train(std::vector<BaseKeypoint> const\& base\_set,
cv::RNG \&rng,
PatchGenerator \&make\_patch,
int num\_trees = RTreeClassifier::DEFAULT\_TREES,
int depth = DEFAULT\_DEPTH,
int views = DEFAULT\_VIEWS,
size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
}
\begin{description}
\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
\cvarg{rng} {Random numbers generator is used for training}
\cvarg{make\_patch} {Patch generator is used for training}
\cvarg{num\_trees} {Number of randomized trees used in RTreeClassificator}
\cvarg{depth} {Maximum tree depth}
%\cvarg{views} {}
\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\cvarg{print\_status} {Print current status of training on the console}
\end{description}
\cvCppFunc{RTreeClassifier::getSignature}
Returns signature for image patch
\cvdefCpp{
void getSignature(IplImage *patch, uchar *sig)
}
\cvdefCpp{
void getSignature(IplImage *patch, float *sig)
}
\begin{description}
\cvarg{patch} {Image patch to calculate signature for}
\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
\end{description}
\cvCppFunc{RTreeClassifier::getSparseSignature}
The function is simular to \texttt{getSignature} but uses the threshold for removing all signature elements less than the threshold. So that the signature is compressed
\cvdefCpp{
void getSparseSignature(IplImage *patch, float *sig,
float thresh);
}
\begin{description}
\cvarg{patch} {Image patch to calculate signature for}
\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
\cvarg{tresh} {The threshold that is used for compressing the signature}
\end{description}
\cvCppFunc{RTreeClassifier::countNonZeroElements}
The function returns the number of non-zero elements in the input array.
\cvdefCpp{
static int countNonZeroElements(float *vec, int n, double tol=1e-10);
}
\begin{description}
\cvarg{vec}{Input vector contains float elements}
\cvarg{n}{Input vector size}
\cvarg{tol} {The threshold used for elements counting. We take all elements are less than \texttt{tol} as zero elements}
\end{description}
\cvCppFunc{RTreeClassifier::read}
Reads pre-saved RTreeClassifier from file or stream
\cvdefCpp{read(const char* file\_name)}
\cvdefCpp{read(std::istream \&is)}
\begin{description}
\cvarg{file\_name}{Filename of file contains randomized tree data}
\cvarg{is}{Input stream associated with file contains randomized tree data}
\end{description}
\cvCppFunc{RTreeClassifier::write}
Writes current RTreeClassifier to a file or stream
\cvdefCpp{void write(const char* file\_name) const;}
\cvdefCpp{void write(std::ostream \&os) const;}
\begin{description}
\cvarg{file\_name}{Filename of file where randomized tree data will be stored}
\cvarg{is}{Output stream associated with file where randomized tree data will be stored}
\end{description}
\cvCppFunc{RTreeClassifier::setQuantization}
Applies quantization to the current randomized tree
\cvdefCpp{void setQuantization(int num\_quant\_bits)}
\begin{description}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\end{description}
Below there is an example of \texttt{RTreeClassifier} usage for feature matching. There are test and train images and we extract features from both with SURF. Output is $best\_corr$ and $best\_corr\_idx$ arrays which keep the best probabilities and corresponding features indexes for every train feature.
% ===== Example. Using RTreeClassifier for features matching =====
\begin{lstlisting}
CvMemStorage* storage = cvCreateMemStorage(0);
CvSeq *objectKeypoints = 0, *objectDescriptors = 0;
CvSeq *imageKeypoints = 0, *imageDescriptors = 0;
CvSURFParams params = cvSURFParams(500, 1);
cvExtractSURF( test_image, 0, &imageKeypoints, &imageDescriptors,
storage, params );
cvExtractSURF( train_image, 0, &objectKeypoints, &objectDescriptors,
storage, params );
cv::RTreeClassifier detector;
int patch_width = cv::PATCH_SIZE;
iint patch_height = cv::PATCH_SIZE;
vector<cv::BaseKeypoint> base_set;
int i=0;
CvSURFPoint* point;
for (i=0;i<(n_points > 0 ? n_points : objectKeypoints->total);i++)
{
point=(CvSURFPoint*)cvGetSeqElem(objectKeypoints,i);
base_set.push_back(
cv::BaseKeypoint(point->pt.x,point->pt.y,train_image));
}
//Detector training
cv::RNG rng( cvGetTickCount() );
cv::PatchGenerator gen(0,255,2,false,0.7,1.3,-CV_PI/3,CV_PI/3,
-CV_PI/3,CV_PI/3);
printf("RTree Classifier training...\n");
detector.train(base_set,rng,gen,24,cv::DEFAULT_DEPTH,2000,
(int)base_set.size(), detector.DEFAULT_NUM_QUANT_BITS);
printf("Done\n");
float* signature = new float[detector.original_num_classes()];
float* best_corr;
int* best_corr_idx;
if (imageKeypoints->total > 0)
{
best_corr = new float[imageKeypoints->total];
best_corr_idx = new int[imageKeypoints->total];
}
for(i=0; i < imageKeypoints->total; i++)
{
point=(CvSURFPoint*)cvGetSeqElem(imageKeypoints,i);
int part_idx = -1;
float prob = 0.0f;
CvRect roi = cvRect((int)(point->pt.x) - patch_width/2,
(int)(point->pt.y) - patch_height/2,
patch_width, patch_height);
cvSetImageROI(test_image, roi);
roi = cvGetImageROI(test_image);
if(roi.width != patch_width || roi.height != patch_height)
{
best_corr_idx[i] = part_idx;
best_corr[i] = prob;
}
else
{
cvSetImageROI(test_image, roi);
IplImage* roi_image =
cvCreateImage(cvSize(roi.width, roi.height),
test_image->depth, test_image->nChannels);
cvCopy(test_image,roi_image);
detector.getSignature(roi_image, signature);
for (int j = 0; j< detector.original_num_classes();j++)
{
if (prob < signature[j])
{
part_idx = j;
prob = signature[j];
}
}
best_corr_idx[i] = part_idx;
best_corr[i] = prob;
if (roi_image)
cvReleaseImage(&roi_image);
}
cvResetImageROI(test_image);
}
\end{lstlisting}
\fi

@ -1,667 +0,0 @@
\section{Object Recognition}
\subsection{Bag of Visual Words Matching}
The functions and classes described in this section are used to allow OpenCV's
2D feature descriptors to be used in a bag of words framework, first described in \cite{sivic_zisserman_2003}.
\ifCpp
\cvclass{BasicBOWTrainer}
Class used for training visual vocabularies using the bag of words approach.
\begin{lstlisting}
class BasicBOWTrainer : public BOWTrainer
{
public:
BasicBOWTrainer(const int ClusterCenters);
//generate vocabulary - input should contain one row per descriptor
void compute(const Mat& descriptors, Mat& vocabulary);
void saveVocabulary(const std::string filename, const Mat& vocabulary);
};
\end{lstlisting}
The class must be initialized using \texttt{BasicBOWTrainer(clusterCenterCount)}, where clusterCenterCount specifies how many visual words to learn during the training stage.
\cvCppFunc{BasicBOWTrainer::compute}
Computes a code-book of visual words or \emph{vocabulary} given a set of input descriptor vectors.
\cvdefCpp{void compute(const Mat\& descriptors, Mat\& vocabulary);}
\begin{description}
\cvarg{descriptors}{ Matrix of type CV\_32F containing the features (descriptors) to cluster to generate the code book. The size of the matrix is num\_features x feature\_dimensionality.}
\cvarg{vocabulary}{ Matrix of type CV\_32F which is filled with the code book visual words trained from the input descriptor set. The size of the matrix is cluster\_center\_count x feature\_dimensionality.}
\end{description}
\cvCppFunc{BasicBOWTrainer::saveVocabulary}
Saves a trained vocabulary to file for use later e.g. by the BOWGenerator class.
\cvdefCpp{void saveVocabulary(const std::string filename, const Mat\& vocabulary);}
\begin{description}
\cvarg{filename}{ Filename to save the vocabulary to.}
\cvarg{vocabulary}{ Matrix of type CV\_32F as returned from BasicBOWTrainer::compute.}
\end{description}
\cvclass{BOWGenerator}
Class used for generating image descriptors or `bag-of-visual-words' vectors for a given image given a set of keypoints and a vocabulary of visual words.
\begin{lstlisting}
template<class dExtractor>
class BOWGenerator: public ImagedescGenerator
{
public:
/* constructors */
BOWGenerator(const Mat& vocabulary);
BOWGenerator(const std::string vocabulary);
/* 'Bag of visual word' descriptor computation */
void compute(KeyPointCollection& keypoints, Mat& image_descs);
void compute(const Mat& image, std::vector<KeyPoint>& points,
Mat& image_desc);
void compute(const Mat& image, std::vector<KeyPoint>& points,
Mat& image_desc, std::vector<std::vector<KeyPoint> >& keypoint_data);
void compute(KeyPointCollection& keypoints, Mat& image_descs,
std::vector<std::vector<std::vector<KeyPoint> > >& keypoint_data);
};
\end{lstlisting}
The class must first be initialized with a vocabulary of visual words trained using the BasicBOWTrainer class. Such a vocabulary can be specified directly by calling the \texttt{BOWGenerator(const Mat\& vocabulary)} with a pre-computed vocabulary in the form of an OpenCV matrix. Alternatively, the \texttt{BOWGenerator(const std::string vocabulary)} constructor can be used which loads in a visual vocabulary previously saved to file using \texttt{BasicBOWTrainer::saveVocabulary}.
This is a template class, and it must also be initialized with a class type parameter derived from the features\_2d::DescriptorExtractor abstract base class. A simple example of usage in conjunction with the BasicBowTrainer class and using SURF descriptors might be as follows:
\begin{lstlisting}
cv::Mat all_descriptors, voc_vocab
//-- load descriptors from training images into all_descriptors matrix --
//Train a vocabulary of visual words using BasicBOWTrainer class
cv::BasicBOWTrainer bow_trainer(5000);
bow_trainer.compute(all_descriptors, voc_vocab);
//Initialize BOWGenerator using the trained visual vocabulary and
// specify that SURF visual features should be used when extracting
// feature descriptors
cv::BOWGenerator<cv::SurfDescriptorExtractor> bow_gen(voc_vocab);
\end{lstlisting}
Once the class has been properly initialized with a visual vocabulary and extractor type, the \texttt{BOWGenerator::compute} member function can be used to compute image descriptors using the vocabulary.
\cvCppFunc{BOWGenerator::compute}
Computes the `bag-of-visual-words' vector for a set of keypoints using the currently loaded visual vocabulary. There are several different ways in which this can be called. The most basic way is to compute the image descriptor for a single image using:
\cvdefCpp{void compute(const Mat\& image, std::vector<KeyPoint>\& points,
Mat\& image\_desc);}
\begin{description}
\cvarg{image}{ Source image for which to compute the image descriptor.}
\cvarg{points}{ A vector of keypoints extracted from the image using a class derived from features\_2d::FeatureDetector.}
\cvarg{image\_desc}{ A vector of type CV\_32F in which the bag of words vector for \texttt{image} is returned. The vector is of the same length as the size of the visual vocabulary in use.}
\end{description}
If information about the specific keypoint to which a given visual word occurence in the returned image descriptor relates is required (e.g. to incorporate a spatial verification stage when matching bag of words vectors) the following overloaded version of the function can be used:
\cvdefCpp{void compute(const Mat\& image, std::vector<KeyPoint>\& points,
Mat\& image\_desc, std::vector<std::vector<KeyPoint> >\& keypoint\_data);}
A three dimensional vector of keypoints is returned in \texttt{keypoint\_data} which can be used to establish which keypoint in the \texttt{points} input array each visual word occurence in \texttt{image\_desc} relates to. Keypoints can be indexed in the form:
\[
\texttt{keypoint\_data}[\texttt{visual\_word\_index}][\texttt{occurence\_index}]
\]
Finally, bag of words vectors can be returned for multiple images at the same time by passing image and keypoint data to the function using a KeyPointCollection structure. There are two versions which correspond to the overloaded function calls for single images above:
\cvdefCpp{void compute(KeyPointCollection\& keypoints, Mat\& image\_descs);
void compute(KeyPointCollection\& keypoints, Mat\& image\_descs, std::vector<std::vector<std::vector<KeyPoint> > >\& keypoint\_data);}
In the case of the version which returns keypoint data, as the keypoints from multiple images have now been used \texttt{keypoint\_data} is now indexed in the form:
\[
\texttt{keypoint\_data}[\texttt{image\_index}][\texttt{visual\_word\_index}][\texttt{occurence\_index}]
\].
\subsection{PASCAL VOC Datasets}
This section documents OpenCV's interface to the PASCAL Visual Object Classes Challenge datasets\footnote{http://pascallin.ecs.soton.ac.uk/challenges/VOC/}. This can be used to load in data from all VOC datasets from VOC2007 up to and including the most recent (VOC 2010) and evaluate the performance of a given approach to object recognition in a standardized manner. The VOC2005 and VOC2006 dataset are currently unsupported due to differences in the way these older datasets store ground truth data.
The interface conforms to the guidelines provided by the PASCAL VOC development kit\footnote{http://pascallin.ecs.soton.ac.uk/challenges/VOC/voc2010/index.html\#devkit} and can be used to evaluate (and output standard VOC results files) for both the classification and detection tasks as well as output standard classification performance metrics such as precision/recall/ap for a given object classification/query result.
\cvclass{VocData}
Class used to encapsulate all input/output operations to the PASCAL VOC dataset and compute standard performance metrics for a given object classification/query result.
\begin{lstlisting}
class VocData : public ObdData
{
public:
/* constructors */
VocData(std::string rootDir, bool useTestDataset,
VocDataset dataset = CV_VOC2010);
/* functions for returning classification/object data for multiple
images given an object class */
void getClassImages(const std::string& obj_class,
const ObdDatasetType dataset, vector<ObdImage>& images,
vector<bool>& object_present);
void getClassObjects(const std::string& obj_class,
const ObdDatasetType dataset, vector<ObdImage>& images,
vector<vector<ObdObject> >& objects);
void getClassObjects(const std::string& obj_class,
const ObdDatasetType dataset, vector<ObdImage>& images,
vector<vector<ObdObject> >& objects,
vector<vector<VocObjectData> >& object_data,
vector<VocGT>& ground_truth);
/* functions for returning object data for a single image
given an image id */
ObdImage getObjects(const std::string& id, vector<ObdObject>& objects);
ObdImage getObjects(const std::string& id, vector<ObdObject>& objects,
vector<VocObjectData>& object_data);
ObdImage getObjects(const std::string& obj_class, const std::string id,
vector<ObdObject>& objects, vector<VocObjectData>& object_data,
VocGT& ground_truth);
/* functions for returning the ground truth (present/absent) for
groups of images */
void getClassifierGroundTruth(const std::string& obj_class,
const vector<ObdImage>& images, vector<bool>& ground_truth);
void getClassifierGroundTruth(const std::string& obj_class,
const vector<std::string>& images, vector<bool>& ground_truth);
void getDetectorGroundTruth(const std::string& obj_class,
const ObdDatasetType dataset, const vector<ObdImage>& images,
const vector<vector<Rect> >& bounding_boxes,
const vector<vector<float> >& scores,
vector<vector<bool> >& ground_truth,
vector<vector<bool> >& detection_difficult,
bool ignore_difficult = true);
/* functions for writing VOC-compatible results files */
void writeClassifierResultsFile(const std::string& obj_class,
const ObdDatasetType dataset, const vector<ObdImage>& images,
const vector<float>& scores, const int competition = 1,
const bool overwrite_ifexists = false);
void writeDetectorResultsFile(const std::string& obj_class,
const ObdDatasetType dataset, const vector<ObdImage>& images,
const vector<vector<float> >& scores,
const vector<vector<Rect> >& bounding_boxes,
const int competition = 3,
const bool overwrite_ifexists = false);
/* functions for calculating metrics from a set of
classification/detection results */
string getResultsFilename(const std::string& obj_class,
const VocTask task, const ObdDatasetType dataset,
const int competition = -1, const int number = -1);
void calcClassifierPrecRecall(const std::string& obj_class,
const vector<ObdImage>& images, const vector<float>& scores,
vector<float>& precision, vector<float>& recall, float& ap);
void calcClassifierPrecRecall(const std::string& obj_class,
const vector<ObdImage>& images, const vector<float>& scores,
vector<float>& precision, vector<float>& recall, float& ap,
vector<size_t>& ranking);
void calcClassifierPrecRecall(const std::string& input_file,
vector<float>& precision, vector<float>& recall, float& ap,
bool outputRankingFile = false);
void calcDetectorPrecRecall(const std::string& obj_class,
const ObdDatasetType dataset, const vector<ObdImage>& images,
const vector<vector<float> >& scores,
const vector<vector<Rect> >& bounding_boxes,
vector<float>& precision, vector<float>& recall, float& ap,
bool ignore_difficult = true);
void calcDetectorPrecRecall(const std::string& input_file,
vector<float>& precision, vector<float>& recall, float& ap,
bool ignore_difficult = true);
/* functions for calculating confusion matrices */
void calcClassifierConfMatRow(const std::string& obj_class,
const vector<ObdImage>& images, const vector<float>& scores,
const VocConfCond cond, const float threshold,
vector<string>& output_headers, vector<float>& output_values);
void calcDetectorConfMatRow(const std::string& obj_class,
const ObdDatasetType dataset, const vector<ObdImage>& images,
const vector<vector<float> >& scores,
const vector<vector<Rect> >& bounding_boxes, const VocConfCond cond,
const float threshold, vector<string>& output_headers,
vector<float>& output_values, bool ignore_difficult = true);
/* functions for outputting gnuplot output files */
void savePrecRecallToGnuplot(const std::string output_file,
const vector<float>& precision, const vector<float>& recall,
const float ap, const std::string title = std::string(),
const VocPlotType plot_type = CV_VOC_PLOT_SCREEN);
/* functions for reading in result/ground truth files */
void readClassifierGroundTruth(const std::string& obj_class,
const ObdDatasetType dataset, vector<ObdObject>& images,
vector<bool>& object_present);
void readClassifierResultsFile(const std:: string& input_file,
vector<ObdImage>& images, vector<float>& socres);
void readDetectorResultsFile(const std::string& input_file,
vector<ObdImage>& images, vector<vector<float> >& scores,
vector<vector<Rect> >& bounding_boxes);
/* functions for getting dataset info */
std::vector<std::string> getObjectClasses();
std::string getResultsDirectory();
};
\end{lstlisting}
The first step in using the class is to initialize it with the desired VOC dataset and the path to the root directory where the VOC ground truth data is stored. Below is the description of the class constructor.
\cvCppFunc{VocData::VocData}
\cvdefCpp{VocData(std::string rootDir, bool useTestDataset, VocDataset dataset = CV\_VOC2010)}
\begin{description}
\cvarg{rootDir}{ The path to the directory which contains the ground truth data for the VOC dataset to load. For example, in the case of the VOC2010 dataset, this would be set to the location of the `VOC2010' directory. The VOC datasets can be downloaded from the PASCAL VOC website\footnote{http://pascallin.ecs.soton.ac.uk/challenges/VOC/}.}
\cvarg{useTestDataset}{ Determines whether the VOC test dataset is also available in the VOC dataset folder. This in general needs to be obtained seperately from the VOC training/validation set and is the dataset used to evaluate performance in the final challenge. If the VOC test dataset is available, the combination of the VOC training and validation datasets are used as the class `training' dataset when retrieving ground truth data using the interface and the VOC test dataset is used as the class `test' dataset. If the VOC test dataset is not available, the VOC training dataset is used as the class `training' dataset and the VOC validation dataset is used as the class `test' dataset.}
\cvarg{VocDataset}{ Specifies the VOC dataset to use. Must correspond to the ground truth data available at the location specified by \texttt{rootDir}. Can be one of the following values: \texttt{\{CV\_VOC2007, CV\_VOC2008, CV\_VOC2009, CV\_VOC2010\}}.}
\end{description}
\cvCppFunc{VocData::getClassImages}
Return the classification ground truth data for all images of a given VOC object class.
\cvdefCpp{void getClassImages(const std::string\& obj\_class, const ObdDatasetType dataset, vector<ObdImage>\& images, vector<bool>\& object\_present)}
\begin{description}
\cvarg{obj\_class}{ The VOC object class identifier string for the object class for which to retrieve ground truth data.}
\cvarg{dataset}{ Either \texttt{CV\_OBD\_TRAIN} or \texttt{CV\_OBD\_TEST}. Specifies whether to extract images from the training or test set.}
\cvarg{images}{ Used to return an array of \texttt{ObdImage} containing info of all images extracted from the ground truth file for the given object class.}
\cvarg{object\_present}{ An array of bools specifying whether the object specified by \texttt{obj\_class} is present in each image or not.}
\end{description}
This function is primarily useful for the classification task, where only whether a given object is present or not in an image is required, and not each object instance's position etc. For the detection task \texttt{getClassObjects} is more suitable.
\cvCppFunc{VocData::getClassObjects}
Return the object data for all images of a given VOC object class. This function returns extended object information in addition to the absent/present classification data returned by \texttt{getClassImages}.
\cvdefCpp{void getClassObjects(const std::string\& obj\_class, const ObdDatasetType dataset,
vector<ObdImage>\& images, vector<vector<ObdObject> >\& objects)}
\begin{description}
\cvarg{obj\_class}{ The VOC object class identifier string for the object class for which to retrieve ground truth data.}
\cvarg{dataset}{ Either \texttt{CV\_OBD\_TRAIN} or \texttt{CV\_OBD\_TEST}. Specifies whether to extract images from the training or test set.}
\cvarg{images}{ Used to return an array of \texttt{ObdImage} containing info of all images extracted from the ground truth file for the given object class.}
\cvarg{objects}{ A 2D vector returning the extended object info (bounding box etc.) for each object instance in each image. The first dimension indexes the image, and the second the objects within that image. See \texttt{ObdObject} for more details.}
\end{description}
There is a further overloaded version of the function which returns extended information in addition to the basic object bounding box data encapsulated in the array of \texttt{ObdObject}'s:
\cvdefCpp{void getClassObjects(const std::string\& obj\_class, const ObdDatasetType dataset,
vector<ObdImage>\& images, vector<vector<ObdObject> >\& objects,
vector<vector<VocObjectData> >\& object\_data, vector<VocGT>\& ground\_truth}
\begin{description}
\cvarg{object\_data}{ A 2D vector returning VOC-specific extended object info (marked difficult etc.). See \texttt{VocObjectData} for more details.}
\cvarg{ground\_truth}{ Returns whether there are any difficult/non-difficult instances of the current object class within each image. If there are non-difficult instances, the value corresponding to any image is set to \texttt{CV\_VOC\_GT\_PRESENT}. If there are only difficult instances it is set to \texttt{CV\_VOC\_GT\_DIFFICULT}. Otherwise the object is not present, and it is set to \texttt{CV\_VOC\_GT\_NONE}.}
\end{description}
\cvCppFunc{VocData::getObjects}
Return ground truth data for the objects present in an image with a given VOC image code. This is used to retrieve the ground truth data for a specific image from the VOC dataset given it's identifier in the format \texttt{YYYY\_XXXXXX} where \texttt{YYYY} specifies the year of the VOC dataset the image was originally from (e.g. 2010 in the case of the VOC 2010 dataset) and \texttt{XXXXXX} is a unique identifying code\footnote{The VOC2007 dataset lacks the year portion of the code}.
\cvdefCpp{ObdImage getObjects(const std::string\& id, vector<ObdObject>\& objects)}
\begin{description}
\cvarg{id}{ VOC unique identifier of the image for which ground truth data should be retrieved (string code in form YYYY\_XXXXXX where YYYY is the year)}
\cvarg{objects}{ Returns the extended object info (bounding box etc.) for each object in the image. See \texttt{ObdObject} for more details.}
\end{description}
The function returns an instance of \texttt{ObdImage} containing the path of the image in the filesystem with the given code. There are also two extended versions of this function which return additional information:
\cvdefCpp{ObdImage getObjects(const std::string\& id, vector<ObdObject>\& objects, vector<VocObjectData>\& object\_data)}
\begin{description}
\cvarg{object\_data}{ Returns VOC-specific extended object info (marked difficult etc.) for the objects in the image. See \texttt{VocObjectData} for more details.}
\end{description}
\cvdefCpp{ObdImage getObjects(const std::string\& obj\_class, const std::string id, vector<ObdObject>\& objects,
vector<VocObjectData>\& object\_data, VocGT\& ground\_truth)}
\begin{description}
\cvarg{ground\_truth}{ Returns whether there are any difficult/non-difficult instances of the object class specified by \texttt{obj\_class} within the image. If there are non-difficult instances, the value corresponding to any image is set to \texttt{CV\_VOC\_GT\_PRESENT}. If there are only difficult instances it is set to \texttt{CV\_VOC\_GT\_DIFFICULT}. Otherwise the object is not present, and it is set to \texttt{CV\_VOC\_GT\_NONE}.}
\end{description}
\cvCppFunc{VocData::getClassifierGroundTruth}
Return ground truth classification data for the presence/absence of a given object class in an arbitrary array of images.
\cvdefCpp{void getClassifierGroundTruth(const std::string\& obj\_class, const vector<ObdImage>\& images,
vector<bool>\& ground\_truth);}
\begin{description}
\cvarg{obj\_class}{ The VOC object class identifier string for the object class for which to retrieve ground truth data for.}
\cvarg{images}{ An input array of \texttt{ObdImage} containing the images for which ground truth data will be returned.}
\cvarg{ground\_truth}{ An output array indicating the presence/absence of \texttt{obj\_class} within each image}
\end{description}
There is also an overloaded version which accepts a vector of image code strings instead of a vector of ObdImage:
\cvdefCpp{void getClassifierGroundTruth(const std::string\& obj\_class, const vector<std::string>\& images,
vector<bool>\& ground\_truth);}
\cvCppFunc{VocData::getDetectorGroundTruth}
Return ground truth detection data for the accuracy of an array of object detections.
\cvdefCpp{void getDetectorGroundTruth(const std::string\& obj\_class, const ObdDatasetType dataset, const vector<ObdImage>\& images, const vector<vector<Rect> >\& bounding\_boxes, const vector<vector<float> >\& scores, vector<vector<bool> >\& ground\_truth, vector<vector<bool> >\& detection\_difficult, bool ignore\_difficult = true);}
\begin{description}
\cvarg{obj\_class}{ The VOC object class identifier string for the object class represented by the detections in \texttt{bounding\_boxes}.}
\cvarg{dataset}{ Either \texttt{CV\_OBD\_TRAIN} or \texttt{CV\_OBD\_TEST}. Specifies whether to extract ground truth for the training or test set.}
\cvarg{images}{ An input array of image code strings relating to the images in which objects have been detected.}
\cvarg{bounding\_boxes}{ A 2D input array of detection bounding boxes. The first dimension relates to the image in which the object was detected, and the second dimension relates to the index of the detected object.}
\cvarg{scores}{ An input array containing the pre-calculated match score for each detection. This is used as in the case of multiple detections of the same object (see below), the detection with the highest match will be assigned as a true positive with all others being marked as false positives.}
\cvarg{ground\_truth}{ A 2D output array of booleans which is set to \texttt{true} for every successful detection and \texttt{false} otherwise.}
\cvarg{detection\_difficult}{ A 2D output array indicating whether the detection fired on an object marked as `difficult'. This allows it to be ignored if necessary (the VOC documentation specifies objects marked as difficult have no effects on the results and are effectively ignored).}
\cvarg{ignore\_difficult}{ Determines whether objects marked as 'difficult' should be ignored for the purposes of evaluation or not (default true - as specified in the voc documentation, in this case objects marked as difficult have no effects on the results, and even accurate detections of difficult objects are marked as \texttt{false}.)}
\end{description}
Note that as specified in the VOC development kit documentation, multiple detections of the same object in an image are considered FALSE detections e.g. 5 detections of a single object is counted as one true positive (the detection with the highest score, as per the implementation in the VOC development kit) with the remaining 4 detections being marked as false positives. This is generally not the behaviour desired, and as such it is the responsibility of the participant's system to filter such multiple detections from its output.
\cvCppFunc{VocData::writeClassifierResultsFile}
Write VOC-compliant classifier results file to the current dataset results directory (at the location defined by the VOC documentation).
\cvdefCpp{void writeClassifierResultsFile(const std::string\& obj\_class, const ObdDatasetType dataset, const vector<ObdImage>\& images, const vector<float>\& scores, const int competition = 1, const bool overwrite\_ifexists = false)}
\begin{description}
\cvarg{obj\_class}{ The VOC object class identifier string for the object class for which to write a results file.}
\cvarg{dataset}{ Either \texttt{CV\_OBD\_TRAIN} or \texttt{CV\_OBD\_TEST}. Specifies whether to extract images from the training or test set.}
\cvarg{images}{ An input array of \texttt{ObdImage} containing the images for which data will be saved to the result file.}
\cvarg{scores}{ A corresponding input array of confidence scores for the presence of the specified object class in each image of the \texttt{images} array.}
\cvarg{competition}{ If specified, defines which competition the results are for (see VOC development kit documentation -- default 1).}
\cvarg{overwrite\_ifexists}{ Specifies whether the classifier results file should be overwritten if it exists. By default, this is false and instead a new file with a numbered postfix will be created.}
\end{description}
Note that if the dataset results directory does not exist, the function call will fail. Therefore, it is important to make sure that this directory is created beforehand. Details as to it's location can be found in the VOC documentation, but in general it is a sub-directory named `results' within the dataset root directory.
\cvCppFunc{VocData::writeDetectorResultsFile}
Write VOC-compliant detector results file to the current dataset results directory (at the location defined by the VOC documentation).
\cvdefCpp{void writeDetectorResultsFile(const std::string\& obj\_class, const ObdDatasetType dataset, const vector<ObdImage>\& images, const vector<vector<float> >\& scores, const vector<vector<Rect> >\& bounding\_boxes, const int competition = 3, const bool overwrite\_ifexists = false)}
\begin{description}
\cvarg{obj\_class}{ The VOC object class identifier string for the object class for which to write a results file.}
\cvarg{dataset}{ Either \texttt{CV\_OBD\_TRAIN} or \texttt{CV\_OBD\_TEST}. Specifies whether to extract images from the training or test set.}
\cvarg{images}{ An input array of \texttt{ObdImage} containing the images for which data will be saved to the result file.}
\cvarg{scores}{ A corresponding input array of confidence scores for the presence of the specified object class in each object detection within each image of the \texttt{images} array (the first array dimension corresponds to a given image, and the second dimension corresponds to a given object detection).}
\cvarg{bounding\_boxes}{ A corresponding input array of bounding boxes for the presence of the specified object class in each object detection within each image of the \texttt{images} array.}
\cvarg{competition}{ If specified, defines which competition the results are for (see VOC development kit documentation -- default 3).}
\cvarg{overwrite\_ifexists}{ Specifies whether the classifier results file should be overwritten if it exists. By default, this is false and instead a new file with a numbered postfix will be created.}
\end{description}
Note that as with \texttt{writeClassifierResultsFile} if the dataset results directory does not exist, the function call will fail. Therefore, it is important to make sure that this directory is created beforehand. Details as to it's location can be found in the VOC documentation, but in general it is a sub-directory named `results' within the dataset root directory.
\cvCppFunc{VocData::getResultsFilename}
Used to construct the filename of a VOC-standard classification/detection results file from the object class and active dataset (see the VOC development kit documentation for more details). By default \texttt{writeClassifierResultsFile} and \texttt{writeDetectorResultsFile} both save a file in this format to the current dataset results directory (again, at the location defined by the VOC documentation), and this function can be used to reconstruct this filename to allow the saved results to again be loaded to, for example, calculate the precision-recall for the result set. An example of this usage might be as follows:
\begin{lstlisting}
VocData voc_data("/home/user/VOC/",false);
voc_data.writeClassifierResultsFile("chair", cv::CV_OBD_TEST, images,
confidences);
/* -- later read in results written by writeClassifierResultsFile and
calculate precision-recall for the result set */
const std::string result_file =
voc_data.getResultsFilename("chair", cv::CV_VOC_TASK_CLASSIFICATION,
cv::CV_OBD_TEST);
voc_data.calcClassifierPrecRecall(result_file, precision, recall, ap);
\end{lstlisting}
\cvdefCpp{std::string getResultsFilename(const std::string obj\_class, const VocTask task,
const ObdDatasetType dataset, const int competition, const int number)}
\begin{description}
\cvarg{obj\_class}{ The VOC object class identifier string for the object class for which to construct a filename.}
\cvarg{task}{ Specifies whether to generate a filename for the classification (\texttt{CV\_VOC\_TASK\_CLASSIFICATION}) or detection (\texttt{CV\_VOC\_TASK\_DETECTION}) task.}
\cvarg{dataset}{ Either \texttt{CV\_OBD\_TRAIN} or \texttt{CV\_OBD\_TEST}. Specifies whether to extract images from the training or test set.}
\cvarg{competition}{ If specified, defines which competition the results are for (see VOC development kit documentation -- default -1 sets competition number 1 for the classification task or competition number 3 for the detection task).}
\cvarg{number}{If specified and above 0, defines which of a number of duplicate results file produced for a given set of settings should be used (this number will be added as a postfix to the filename. Default -1)}
\end{description}
\cvCppFunc{VocData::calcClassifierPrecRecall}
Used to calculate precision, recall and average precision (AP) over a given set of classification results. The most straightforward way to use this function is to provide the filename of a VOC standard classification results file:
\cvdefCpp{void calcClassifierPrecRecall(const std::string input\_file, vector<float>\& precision,
vector<float>\& recall, float\& ap, bool outputRankingFile = false);}
\begin{description}
\cvarg{input\_file}{ The VOC standard classification results file from which to read data and calculate precision/recall. If a full path is not specified, it is assumed that this file is in the current dataset results directory. The filename itself can be constructed using \texttt{getResultsFilename}.}
\cvarg{precision}{ Returns a vector containing the precision calculated at each datapoint of a p-r curve generated from the result set.}
\cvarg{recall}{ Returns a vector containing the recall calculated at each datapoint of a p-r curve generated from the result set.}
\cvarg{ap}{ Returns the AP (average precision) metric calculated from the result set. This is equivalent to the area under the precision-recall curve.}
\cvarg{outputRankingFile}{ If true, also outputs a plain-text file in the same directory as the input file containing the ranking order (with scores) of the images contained in \texttt{input\_file}. This filename will be named `scoregt\_$\langle$ class$\rangle$ \_name.txt'.}
\end{description}
There is also a version of the function which can be used to calculate precision and recall from a set of input arrays instead of a VOC results file:
\cvdefCpp{void calcClassifierPrecRecall(const std::string\& obj\_class, const vector<ObdImage>\& images,
const vector<float>\& scores, vector<float>\& precision, vector<float>\& recall, float\& ap)}
\begin{description}
\cvarg{obj\_class}{ The VOC object class identifier string for the object class for which to calculate precision/recall metrics.}
\cvarg{images}{ An input array of \texttt{ObdImage} containing the images for which precision/recall will be calculated.}
\cvarg{scores}{ An input vector containing the similarity score for each input image (higher is more similar).}
\end{description}
There is no need for the input arrays (images and scores) to be sorted in any way. However, internally both are sorted in order of descending score. This ordering may be useful for constructing an ordered ranking list of results, and so there is another version of the function which returns this sorting order:
\cvdefCpp{void calcClassifierPrecRecall(const std::string\& obj\_class, const vector<ObdImage>\& images,
const vector<float>\& scores, vector<float>\& precision, vector<float>\& recall, float\& ap, vector<size\_t>\& ranking)}
\begin{description}
\cvarg{ranking}{ A output vector containing indices which can subsequently be used to retrieve elements of \texttt{images} and \texttt{scores} in descending order of similarity score. For example, to access the first sorted item in the ranked list in the \texttt{images} array use:
\[
\texttt{images}[\texttt{ranking}[\texttt{0}]]}
\]
\end{description}
Note that to calculate the average precision (AP) instead of taking the area beneath the precision-recall curve as-is, a monotonically decreasing version of the curve is generated with the precision $p_o$ at a given recall $r_o$ given by the maximum precision acheived at any recall $r \ge r_o$. Furthermore, for datasets prior to VOC2010, this curve is then sampled at discrete points $r = 0.0, 0.1, 0.2, \cdots , 0.9, 1.0$ when calculating the bounded area.
\cvCppFunc{VocData::calcDetectorPrecRecall}
Used to calculate precision, recall and average precision (AP) over a given set of detection results. The most straightforward way to use this function is to provide the filename of a VOC standard detection results file:
\cvdefCpp{void calcDetectorPrecRecall(const std::string\& input\_file, vector<float>\& precision,
vector<float>\& recall, float\& ap, bool ignore\_difficult = true);}
\begin{description}
\cvarg{input\_file}{ The VOC standard detection results file from which to read data and calculate precision/recall. If a full path is not specified, it is assumed that this file is in the current dataset results directory. The filename itself can be constructed using \texttt{getResultsFilename}.}
\cvarg{precision}{ Returns a vector containing the precision calculated at each datapoint of a p-r curve generated from the result set.}
\cvarg{recall}{ Returns a vector containing the recall calculated at each datapoint of a p-r curve generated from the result set.}
\cvarg{ap}{ Returns the AP (average precision) metric calculated from the result set. This is equivalent to the area under the precision-recall curve.}
\cvarg{ignore\_difficult}{ Determines whether objects marked as `difficult' should be ignored for the purposes of evaluation or not (default true - as specified in the voc documentation, in this case objects marked as difficult have no effects on the results.)}
\end{description}
There is also a version of the function which can be used to calculate precision and recall from a set of input arrays instead of a VOC results file:
\cvdefCpp{void calcDetectorPrecRecall(const std::string\& obj\_class, const ObdDatasetType dataset, const vector<ObdImage>\& images, const vector<vector<float> >\& scores, const vector<vector<Rect> >\& bounding\_boxes, vector<float>\& precision, vector<float>\& recall, float\& ap, bool ignore\_difficult = true);}
\begin{description}
\cvarg{obj\_class}{ The VOC object class identifier string for the object class for which to calculate precision/recall metrics.}
\cvarg{dataset}{ Either \texttt{CV\_OBD\_TRAIN} or \texttt{CV\_OBD\_TEST}. Specifies whether to extract ground truth for the training or test set.}
\cvarg{images}{ An input array of \texttt{ObdImage} containing the images for which precision/recall will be calculated.}
\cvarg{scores}{ A 2D input vector containing the similarity score for each detected object (higher is more similar -- the first dimension indexes the image within which the object was detected, and the second dimension indexes the collection of detected objects within each image).}
\cvarg{bounding\_boxes}{ A 2D input vector containing the predicted boundary box for each detected object.}
\end{description}
In both cases, the validity of a detection in the results set is calculated internally using \texttt{getDetectorGroundTruth} and the overlap criterion specified in the VOC documentation is used to determine whether a particular detection is accurate or not.
\cvCppFunc{VocData::calcClassifierConfMatRow}
Used to calculate the row of a confusion matrix given a set of classifier results for a VOC object class.
\cvdefCpp{void calcClassifierConfMatRow(const std::string\& obj\_class,
const vector<ObdImage>\& images, const vector<float>\& scores, const VocConfCond cond,
const float threshold, vector<string>\& output\_headers, vector<float>\& output\_values);}
\begin{description}
\cvarg{obj\_class}{ The VOC object class identifier string for the object class for which to calculate the confusion matrix row.}
\cvarg{images}{ An input array of \texttt{ObdImage} containing the images for which the confusion matrix row will be calculated.}
\cvarg{scores}{ An input vector containing the similarity score for each input image (higher is more similar).}
\cvarg{cond}{ The condition to use when determining the number of images which should be taken into account when calculating the confusion matrix row. If set to \texttt{CV\_VOC\_CCOND\_RECALL} all images up to a proportion recall specified by \texttt{threshold} are considered. If set to \texttt{CV\_VOC\_CCOND\_SCORETHRESH} all images with a score above the value specified by \texttt{threshold} are considered.}
\cvarg{threshold}{ The threshold to use when determining the number of images which should be taken into account when calculating the confusion matrix row. Used in conjunction with \texttt{cond}.}
\cvarg{output\_headers}{ An output vector of object class headers for the confusion matrix row.}
\cvarg{output\_values}{ An output vector of values for the confusion matrix row corresponding to the classes defined in \texttt{output\_headers}. This is normalized such that $\sum output\_headers = 1$.}
\end{description}
For the \texttt{cond} parameter \texttt{CV\_VOC\_CCOND\_SCORETHRESH} is particularly useful when the scores for each image are the output of a classifier, with +1 defining positives, -1 defining negatives and 0 being the class boundary. In this case, to account for only cases in which the object class was detected, use \texttt{CV\_VOC\_CCOND\_SCORETHRESH} and set \texttt{threshold} to 0. Alternatively, when setting \texttt{cond} to \texttt{CV\_VOC\_CCOND\_RECALL} the confusion matrix row at 50\% recall could be calculated by setting \texttt{threshold} to 0.5, for example.
The methodology used by the classifier version of this function is that true positives have a single unit added to the column corresponding to \texttt{obj\_class} in the confusion matrix row, whereas false positives have a single unit distributed in proportion between all the columns in the confusion matrix row corresponding to the objects present in the image.
A full confusion matrix can be constructed by calling this function recursively with classification results for each one of the classes of the current dataset (retrieved using \texttt{getObjectClasses}) The individual rows calculated in this way can then be concatenated into a single confusion matrix. This can be useful for inspecting the performance of a given approach to classification more thoroughly, allowing frequently confused object classes to be identified and the classification algorithm to be optimised.
\cvCppFunc{VocData::calcDetectorConfMatRow}
Used to calculate the row of a confusion matrix given a set of detection results for a VOC object class.
\cvdefCpp{virtual void calcDetectorConfMatRow(const std::string\& obj\_class, ObdDatasetType dataset, const vector<ObdImage>\& images, const vector<vector<float> >\& scores, const vector<vector<Rect> >\& bounding\_boxes, const VocConfCond cond, const float threshold, vector<string>\& output\_headers, vector<float>\& output\_values, bool ignore\_difficult = true);}
\begin{description}
\cvarg{obj\_class}{ The VOC object class identifier string for the object class for which to calculate the confusion matrix row.}
\cvarg{dataset}{ Either \texttt{CV\_OBD\_TRAIN} or \texttt{CV\_OBD\_TEST}. Specifies whether to extract ground truth for the training or test set.}
\cvarg{images}{ An input array of \texttt{ObdImage} containing the images for which the confusion matrix row will be calculated.}
\cvarg{scores}{ A 2D input vector containing the similarity score for each object detected in each input image (higher is more similar).}
\cvarg{bounding\_boxes}{ A 2D input vector containing the predicted boundary box for each detected object.}
\cvarg{cond}{ The condition to use when determining the number of images which should be taken into account when calculating the confusion matrix row. If set to \texttt{CV\_VOC\_CCOND\_RECALL} all images up to a proportion recall specified by \texttt{threshold} are considered. If set to \texttt{CV\_VOC\_CCOND\_SCORETHRESH} all images with a score above the value specified by \texttt{threshold} are considered.}
\cvarg{threshold}{ The threshold to use when determining the number of images which should be taken into account when calculating the confusion matrix row. Used in conjunction with \texttt{cond}.}
\cvarg{output\_headers}{ An output vector of object class headers for the confusion matrix row. In addition to the object classes present in the currently active dataset, a further additional object class labelled `background' is added to the end of this array and object detections are assigned to this when their bounding boxes do not overlap any objects in their parent images (as defined by the dataset groundtruth) and thus are `background'.}
\cvarg{output\_values}{ An output vector of values for the confusion matrix row corresponding to the classes defined in \texttt{output\_headers}. This is normalized such that $\sum output\_headers = 1$.}
\cvarg{ignore\_difficult}{ Determines whether objects marked as `difficult' should be ignored for the purposes of evaluation or not (default true - as specified in the voc documentation, in this case objects marked as difficult have no effects on the results.)}
\end{description}
The methodology used by the detection version of this function is as follows: each object detection is assigned to the closest matching object in the ground truth as specified by the overlap score defined in the voc development kit documentation. If the object detection passes the overlap condition, and thus counts as a true positive detection, for the class defined by \texttt{obj\_class} but the overlap score is higher for a class \emph{other} than \texttt{obj\_class}, the detection will not be assigned to \texttt{obj\_class} but to the second object category. Furthermore, unlike the ground truth returned by \texttt{getDetectorGroundTruth} and used in the \texttt{calcDetectorPrecRecall} function, multiple detections \emph{are not} accounted for explicitly. This means that if three detections overlap the same object in the ground truth, they will all be assigned to that object's class (whereas when using \texttt{getDetectorGroundTruth} only the first such detection would be counted as a valid detection). This is another reason to follow the guidelines specified by the voc development kit detection and first filter multiple detections from the input arrays before calling this function.
A full confusion matrix can be constructed by calling this function recursively with detection results for each one of the classes of the current dataset (retrieved using \texttt{getObjectClasses}) The individual rows calculated in this way can then be concatenated into a single confusion matrix. This can be useful for inspecting the performance of a given approach to detection more thoroughly, allowing frequently confused object classes to be identified and the detection algorithm to be optimised.
For further notes about the \texttt{cond} and \texttt{threshold} parameters, see the documentation for the \texttt{calcClassifierConfMatRow} function.
\cvCppFunc{VocData::savePrecRecallToGnuplot}
Used to output a set of precision-recall results (generated using \texttt{calcClassifierPrecRecall} or \texttt{calcDetectorPrecRecall}) to a GNUPlot\footnote{http://www.gnuplot.info/} compatible data file. In conjunction with GNUPlot, this can be used to easily produce a precision-recall plot and display that plot on screen or save it to a PDF file by passing the data file to GNUPlot as a parameter. For example:
\[
\texttt{>> gnuplot "datafile.dat"}
\]
\cvdefCpp{void savePrecRecallToGnuplot(const std::string\& output\_file, const vector<float>\& precision,
const vector<float>\& recall, const float ap, const std::string title, const VocPlotType plot\_type)}
\begin{description}
\cvarg{output\_file}{ The filename to save the GNUPlot datafile. If a full path is not specified, it is assumed that this file should be save to the current dataset results directory.}
\cvarg{precision}{ An input vector of precision values as returned from \texttt{calcClassifierPrecRecall} or \texttt{calcDetectorPrecRecall}.}
\cvarg{recall}{ An input vector of recall values as returned from \texttt{calcClassifierPrecRecall} or \texttt{calcDetectorPrecRecall}.}
\cvarg{ap}{ The AP (average precision) as returned from \texttt{calcClassifierPrecRecall} or \texttt{calcDetectorPrecRecall}.}
\cvarg{title}{ Title to use for the plot (if not specified, just the AP is printed as the title). This also specifies the filename of the output file if printing to pdf.}
\cvarg{plot\_type}{ Specifies whether to instruct GNUPlot to save to a PDF file (\texttt{CV\_VOC\_PLOT\_PDF}) or directly to screen (\texttt{CV\_VOC\_PLOT\_SCREEN}) in the datafile.}
\end{description}
Note that no plot file is produced nor is any plot displayed on the screen when this function is called. In order to do this, pass the generated datafile to GNUPlot via the command line as described above.
\cvCppFunc{VocData::readClassifierGroundTruth}
Utility function which extracts data from the classification ground truth file for a given object class and dataset into a set of output vectors.
\cvdefCpp{void readClassifierGroundTruth(const std::string\& obj\_class,
const ObdDatasetType dataset, vector<ObdObject>\& images, vector<bool>\& object\_present);}
\begin{description}
\cvarg{obj\_class}{ The VOC object class identifier string for the object class for which to retrieve the classifier ground truth.}
\cvarg{dataset}{ Either \texttt{CV\_OBD\_TRAIN} or \texttt{CV\_OBD\_TEST}. Specifies whether to extract ground truth for the training or test set.}
\cvarg{images}{ An output array of \texttt{ObdImage} containing the images extracted from the ground truth file.}
\cvarg{object\_present}{ An output array of bools specifying whether the object specified by \texttt{obj\_class} is present in each image extracted from the ground truth file or not.}
\end{description}
\cvCppFunc{VocData::readClassifierResultsFile}
Utility function which extracts data from a given classifier results file into a set of output vectors.
\cvdefCpp{void readClassifierResultsFile(const std:: string\& input\_file,
vector<ObdImage>\& images, vector<float>\& scores);}
\begin{description}
\cvarg{input\_file}{ The VOC standard classification results file from which to read data. If a full path is not specified, it is assumed that this file is in the current dataset results directory. The filename itself can be constructed using \texttt{getResultsFilename}.}
\cvarg{images}{ An output array of \texttt{ObdImage} containing the images extracted from the results file.}
\cvarg{scores}{ An output array containing the similarity scores of each image extracted from the results file.}
\end{description}
\cvCppFunc{VocData::readDetectorResultsFile}
Utility function which extracts data from a given detector results file into a set of output vectors.
\cvdefCpp{void readDetectorResultsFile(const std::string\& input\_file,
vector<ObdImage>\& images, vector<vector<float> >\& scores,
vector<vector<Rect> >\& bounding\_boxes);}
\begin{description}
\cvarg{input\_file}{ The VOC standard detection results file from which to read data and calculate precision/recall. If a full path is not specified, it is assumed that this file is in the current dataset results directory. The filename itself can be constructed using \texttt{getResultsFilename}.}
\cvarg{images}{ An output array of \texttt{ObdImage} containing the images extracted from the results file.}
\cvarg{scores}{ A 2D output array containing the similarity scores of each object extracted from the results file.}
\cvarg{bounding\_boxes}{ A 2D output array containing the bounding boxes of each object extracted from the results file.}
\end{description}
\cvCppFunc{VocData::getObjectClasses}
\cvdefCpp{std::vector<std::string> getObjectClasses()}
Returns an array of valid object categroy class identifiers for the current dataset.
\cvCppFunc{VocData::getResultsDirectory}
\cvdefCpp{std::string getResultsDirectory()}
Returns the path to the results directory of the current dataset.
\cvclass{ObdImage}
Used to store information related to a single image within a dataset.
\begin{lstlisting}
class ObdImage
{
public:
ObdImage(std::string p_id, std::string p_path):
id(p_id), path(p_path) {};
/* unique identifier code e.g. for VOC is in the form YYYY\_XXXXXX */
std::string id;
/* path to the image in the filesystem */
std::string path;
};
\end{lstlisting}
\cvclass{ObdObject}
Used to store bounding box information about object instances within a parent image.
\begin{lstlisting}
class ObdObject
{
public:
/* object class of the defined object */
std::string object_class;
/* bounding box coordinates of the object in the parent image */
Rect boundingBox;
};
\end{lstlisting}
\cvclass{VocObjectData}
Used to store VOC-specific datafields related to object instances as described in the VOC development kit documentation.
\begin{lstlisting}
class VocObjectData
{
public:
/* determines whether the object is marked difficult or not */
bool difficult;
/* determines whether the object is mainly occluded by other
objects or not */
bool occluded;
/* determines whether the object is truncated by the edge of the
image frame or not */
bool truncated;
/* the pose of the object. Can be one of: CV_VOC_POSE_UNSPECIFIED,
CV_VOC_POSE_FRONTAL, CV_VOC_POSE_REAR, CV_VOC_POSE_LEFT,
CV_VOC_POSE_RIGHT */
VocPose pose;
};
\end{lstlisting}
\subsection{Extended Visual Features}
\cvclass{DenseFeatureDetector}
Class used for extracting features densely from an image.
\begin{lstlisting}
class DenseFeatureDetector : public FeatureDetector
{
public:
DenseFeatureDetector(const float feature_scale, const int bound,
const int sampling_step = 6, const int scale_levels = 1,
const float scale_mul = 0.1,
const bool step_varies_with_scale = true,
const bool bound_varies_with_scale = false);
virtual void detectImpl(const cv::Mat& image, const cv::Mat& mask,
std::vector<cv::KeyPoint>& keypoints) const;
int getStep();
void setStep(const int step);
\end{lstlisting}
\cvclass{ColorSurfDescriptorExtractor}
Class used for extracting color SURF features (calculated in the Opponent color space) from an image.
\begin{lstlisting}
class ColorSurfDescriptorExtractor : public SurfDescriptorExtractor
{
public:
ColorSurfDescriptorExtractor(int nOctaves=4,
int nOctaveLayers=2, bool extended=false);
virtual void compute( const Mat& image,
vector<KeyPoint>& keypoints, Mat& descriptors) const;
};
\end{lstlisting}
\fi

@ -25,9 +25,9 @@
\chapter{features2d. Feature Detection and Descriptor Extraction}
\renewcommand{\curModule}{features2d}
\input{features2d_feature_detection}
\input{features2d_object_recognition}
\input{features2d_object_detection}
\input{features2d_detection_description}
\input{features2d_common_detection_description}
\input{features2d_object_categorization}
\chapter{flann. Clustering and Search in Multi-Dimensional Spaces}
\renewcommand{\curModule}{flann}

Loading…
Cancel
Save