mirror of https://github.com/opencv/opencv.git
Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
830 lines
31 KiB
830 lines
31 KiB
\section{Feature detection and description} |
|
|
|
\ifCpp |
|
\cvCppFunc{FAST} |
|
Detects corners using FAST algorithm by E. Rosten (''Machine learning for high-speed corner detection'', 2006). |
|
\fi |
|
|
|
\cvdefCpp{ |
|
void FAST( const Mat\& image, vector<KeyPoint>\& keypoints, |
|
int threshold, bool nonmaxSupression=true ); |
|
|
|
} |
|
|
|
\begin{description} |
|
\cvarg{image}{The image. Keypoints (corners) will be detected on this.} |
|
\cvarg{keypoints}{Keypoints detected on the image.} |
|
\cvarg{threshold}{Threshold on difference between intensity of center pixel and |
|
pixels on circle around this pixel. See description of the algorithm.} |
|
\cvarg{nonmaxSupression}{If it is true then non-maximum supression will be applied to detected corners (keypoints). } |
|
\end{description} |
|
|
|
\ifCPy |
|
\ifPy |
|
\cvclass{CvSURFPoint} |
|
A SURF keypoint, represented as a tuple \texttt{((x, y), laplacian, size, dir, hessian)}. |
|
|
|
\begin{description} |
|
\cvarg{x}{x-coordinate of the feature within the image} |
|
\cvarg{y}{y-coordinate of the feature within the image} |
|
\cvarg{laplacian}{-1, 0 or +1. sign of the laplacian at the point. Can be used to speedup feature comparison since features with laplacians of different signs can not match} |
|
\cvarg{size}{size of the feature} |
|
\cvarg{dir}{orientation of the feature: 0..360 degrees} |
|
\cvarg{hessian}{value of the hessian (can be used to approximately estimate the feature strengths; see also params.hessianThreshold)} |
|
\end{description} |
|
\fi |
|
|
|
\cvCPyFunc{ExtractSURF} |
|
Extracts Speeded Up Robust Features from an image. |
|
|
|
\cvdefC{ |
|
void cvExtractSURF( \par const CvArr* image,\par const CvArr* mask,\par CvSeq** keypoints,\par CvSeq** descriptors,\par CvMemStorage* storage,\par CvSURFParams params ); |
|
} |
|
\cvdefPy{ExtractSURF(image,mask,storage,params)-> (keypoints,descriptors)} |
|
|
|
\begin{description} |
|
\cvarg{image}{The input 8-bit grayscale image} |
|
\cvarg{mask}{The optional input 8-bit mask. The features are only found in the areas that contain more than 50\% of non-zero mask pixels} |
|
\ifC |
|
\cvarg{keypoints}{The output parameter; double pointer to the sequence of keypoints. The sequence of CvSURFPoint structures is as follows:} |
|
\begin{lstlisting} |
|
typedef struct CvSURFPoint |
|
{ |
|
CvPoint2D32f pt; // position of the feature within the image |
|
int laplacian; // -1, 0 or +1. sign of the laplacian at the point. |
|
// can be used to speedup feature comparison |
|
// (normally features with laplacians of different |
|
// signs can not match) |
|
int size; // size of the feature |
|
float dir; // orientation of the feature: 0..360 degrees |
|
float hessian; // value of the hessian (can be used to |
|
// approximately estimate the feature strengths; |
|
// see also params.hessianThreshold) |
|
} |
|
CvSURFPoint; |
|
\end{lstlisting} |
|
\cvarg{descriptors}{The optional output parameter; double pointer to the sequence of descriptors. Depending on the params.extended value, each element of the sequence will be either a 64-element or a 128-element floating-point (\texttt{CV\_32F}) vector. If the parameter is NULL, the descriptors are not computed} |
|
\else |
|
\cvarg{keypoints}{sequence of keypoints.} |
|
\cvarg{descriptors}{sequence of descriptors. Each SURF descriptor is a list of floats, of length 64 or 128.} |
|
\fi |
|
\cvarg{storage}{Memory storage where keypoints and descriptors will be stored} |
|
\ifC |
|
\cvarg{params}{Various algorithm parameters put to the structure CvSURFParams:} |
|
\begin{lstlisting} |
|
typedef struct CvSURFParams |
|
{ |
|
int extended; // 0 means basic descriptors (64 elements each), |
|
// 1 means extended descriptors (128 elements each) |
|
double hessianThreshold; // only features with keypoint.hessian |
|
// larger than that are extracted. |
|
// good default value is ~300-500 (can depend on the |
|
// average local contrast and sharpness of the image). |
|
// user can further filter out some features based on |
|
// their hessian values and other characteristics. |
|
int nOctaves; // the number of octaves to be used for extraction. |
|
// With each next octave the feature size is doubled |
|
// (3 by default) |
|
int nOctaveLayers; // The number of layers within each octave |
|
// (4 by default) |
|
} |
|
CvSURFParams; |
|
|
|
CvSURFParams cvSURFParams(double hessianThreshold, int extended=0); |
|
// returns default parameters |
|
\end{lstlisting} |
|
\else |
|
\cvarg{params}{Various algorithm parameters in a tuple \texttt{(extended, hessianThreshold, nOctaves, nOctaveLayers)}: |
|
\begin{description} |
|
\cvarg{extended}{0 means basic descriptors (64 elements each), 1 means extended descriptors (128 elements each)} |
|
\cvarg{hessianThreshold}{only features with hessian larger than that are extracted. good default value is ~300-500 (can depend on the average local contrast and sharpness of the image). user can further filter out some features based on their hessian values and other characteristics.} |
|
\cvarg{nOctaves}{the number of octaves to be used for extraction. With each next octave the feature size is doubled (3 by default)} |
|
\cvarg{nOctaveLayers}{The number of layers within each octave (4 by default)} |
|
\end{description}} |
|
\fi |
|
\end{description} |
|
|
|
The function cvExtractSURF finds robust features in the image, as |
|
described in \cite{Bay06}. For each feature it returns its location, size, |
|
orientation and optionally the descriptor, basic or extended. The function |
|
can be used for object tracking and localization, image stitching etc. |
|
|
|
\ifC |
|
See the |
|
\texttt{find\_obj.cpp} demo in OpenCV samples directory. |
|
\else |
|
To extract strong SURF features from an image |
|
|
|
\begin{lstlisting} |
|
>>> import cv |
|
>>> im = cv.LoadImageM("building.jpg", cv.CV_LOAD_IMAGE_GRAYSCALE) |
|
>>> (keypoints, descriptors) = cv.ExtractSURF(im, None, cv.CreateMemStorage(), (0, 30000, 3, 1)) |
|
>>> print len(keypoints), len(descriptors) |
|
6 6 |
|
>>> for ((x, y), laplacian, size, dir, hessian) in keypoints: |
|
... print "x=\%d y=\%d laplacian=\%d size=\%d dir=\%f hessian=\%f" \% (x, y, laplacian, size, dir, hessian) |
|
x=30 y=27 laplacian=-1 size=31 dir=69.778503 hessian=36979.789062 |
|
x=296 y=197 laplacian=1 size=33 dir=111.081039 hessian=31514.349609 |
|
x=296 y=266 laplacian=1 size=32 dir=107.092300 hessian=31477.908203 |
|
x=254 y=284 laplacian=1 size=31 dir=279.137360 hessian=34169.800781 |
|
x=498 y=525 laplacian=-1 size=33 dir=278.006592 hessian=31002.759766 |
|
x=777 y=281 laplacian=1 size=70 dir=167.940964 hessian=35538.363281 |
|
\end{lstlisting} |
|
|
|
\fi |
|
|
|
\cvCPyFunc{GetStarKeypoints} |
|
Retrieves keypoints using the StarDetector algorithm. |
|
|
|
\cvdefC{ |
|
CvSeq* cvGetStarKeypoints( \par const CvArr* image,\par CvMemStorage* storage,\par CvStarDetectorParams params=cvStarDetectorParams() ); |
|
} |
|
\cvdefPy{GetStarKeypoints(image,storage,params)-> keypoints} |
|
|
|
\begin{description} |
|
\cvarg{image}{The input 8-bit grayscale image} |
|
\cvarg{storage}{Memory storage where the keypoints will be stored} |
|
\ifC |
|
\cvarg{params}{Various algorithm parameters given to the structure CvStarDetectorParams:} |
|
\begin{lstlisting} |
|
typedef struct CvStarDetectorParams |
|
{ |
|
int maxSize; // maximal size of the features detected. The following |
|
// values of the parameter are supported: |
|
// 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128 |
|
int responseThreshold; // threshold for the approximatd laplacian, |
|
// used to eliminate weak features |
|
int lineThresholdProjected; // another threshold for laplacian to |
|
// eliminate edges |
|
int lineThresholdBinarized; // another threshold for the feature |
|
// scale to eliminate edges |
|
int suppressNonmaxSize; // linear size of a pixel neighborhood |
|
// for non-maxima suppression |
|
} |
|
CvStarDetectorParams; |
|
\end{lstlisting} |
|
\else |
|
\cvarg{params}{Various algorithm parameters in a tuple \texttt{(maxSize, responseThreshold, lineThresholdProjected, lineThresholdBinarized, suppressNonmaxSize)}: |
|
\begin{description} |
|
\cvarg{maxSize}{maximal size of the features detected. The following values of the parameter are supported: 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128} |
|
\cvarg{responseThreshold}{threshold for the approximatd laplacian, used to eliminate weak features} |
|
\cvarg{lineThresholdProjected}{another threshold for laplacian to eliminate edges} |
|
\cvarg{lineThresholdBinarized}{another threshold for the feature scale to eliminate edges} |
|
\cvarg{suppressNonmaxSize}{linear size of a pixel neighborhood for non-maxima suppression} |
|
\end{description} |
|
} |
|
\fi |
|
\end{description} |
|
|
|
The function GetStarKeypoints extracts keypoints that are local |
|
scale-space extremas. The scale-space is constructed by computing |
|
approximate values of laplacians with different sigma's at each |
|
pixel. Instead of using pyramids, a popular approach to save computing |
|
time, all of the laplacians are computed at each pixel of the original |
|
high-resolution image. But each approximate laplacian value is computed |
|
in O(1) time regardless of the sigma, thanks to the use of integral |
|
images. The algorithm is based on the paper |
|
Agrawal08 |
|
, but instead |
|
of a square, hexagon or octagon it uses an 8-end star shape, hence the name, |
|
consisting of overlapping upright and tilted squares. |
|
|
|
\ifC |
|
Each computed feature is represented by the following structure: |
|
|
|
\begin{lstlisting} |
|
typedef struct CvStarKeypoint |
|
{ |
|
CvPoint pt; // coordinates of the feature |
|
int size; // feature size, see CvStarDetectorParams::maxSize |
|
float response; // the approximated laplacian value at that point. |
|
} |
|
CvStarKeypoint; |
|
|
|
inline CvStarKeypoint cvStarKeypoint(CvPoint pt, int size, float response); |
|
\end{lstlisting} |
|
\else |
|
Each keypoint is represented by a tuple \texttt{((x, y), size, response)}: |
|
\begin{description} |
|
\cvarg{x, y}{Screen coordinates of the keypoint} |
|
\cvarg{size}{feature size, up to \texttt{maxSize}} |
|
\cvarg{response}{approximated laplacian value for the keypoint} |
|
\end{description} |
|
\fi |
|
|
|
\ifC |
|
Below is the small usage sample: |
|
|
|
\begin{lstlisting} |
|
#include "cv.h" |
|
#include "highgui.h" |
|
|
|
int main(int argc, char** argv) |
|
{ |
|
const char* filename = argc > 1 ? argv[1] : "lena.jpg"; |
|
IplImage* img = cvLoadImage( filename, 0 ), *cimg; |
|
CvMemStorage* storage = cvCreateMemStorage(0); |
|
CvSeq* keypoints = 0; |
|
int i; |
|
|
|
if( !img ) |
|
return 0; |
|
cvNamedWindow( "image", 1 ); |
|
cvShowImage( "image", img ); |
|
cvNamedWindow( "features", 1 ); |
|
cimg = cvCreateImage( cvGetSize(img), 8, 3 ); |
|
cvCvtColor( img, cimg, CV_GRAY2BGR ); |
|
|
|
keypoints = cvGetStarKeypoints( img, storage, cvStarDetectorParams(45) ); |
|
|
|
for( i = 0; i < (keypoints ? keypoints->total : 0); i++ ) |
|
{ |
|
CvStarKeypoint kpt = *(CvStarKeypoint*)cvGetSeqElem(keypoints, i); |
|
int r = kpt.size/2; |
|
cvCircle( cimg, kpt.pt, r, CV_RGB(0,255,0)); |
|
cvLine( cimg, cvPoint(kpt.pt.x + r, kpt.pt.y + r), |
|
cvPoint(kpt.pt.x - r, kpt.pt.y - r), CV_RGB(0,255,0)); |
|
cvLine( cimg, cvPoint(kpt.pt.x - r, kpt.pt.y + r), |
|
cvPoint(kpt.pt.x + r, kpt.pt.y - r), CV_RGB(0,255,0)); |
|
} |
|
cvShowImage( "features", cimg ); |
|
cvWaitKey(); |
|
} |
|
\end{lstlisting} |
|
\fi |
|
|
|
\fi |
|
\ifCpp |
|
|
|
\cvclass{MSER} |
|
Maximally-Stable Extremal Region Extractor |
|
|
|
\begin{lstlisting} |
|
class MSER : public CvMSERParams |
|
{ |
|
public: |
|
// default constructor |
|
MSER(); |
|
// constructor that initializes all the algorithm parameters |
|
MSER( int _delta, int _min_area, int _max_area, |
|
float _max_variation, float _min_diversity, |
|
int _max_evolution, double _area_threshold, |
|
double _min_margin, int _edge_blur_size ); |
|
// runs the extractor on the specified image; returns the MSERs, |
|
// each encoded as a contour (vector<Point>, see findContours) |
|
// the optional mask marks the area where MSERs are searched for |
|
void operator()( const Mat& image, vector<vector<Point> >& msers, const Mat& mask ) const; |
|
}; |
|
\end{lstlisting} |
|
|
|
The class encapsulates all the parameters of MSER (see \url{http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions}) extraction algorithm. |
|
|
|
\cvclass{StarDetector} |
|
Implements Star keypoint detector |
|
|
|
\begin{lstlisting} |
|
class StarDetector : CvStarDetectorParams |
|
{ |
|
public: |
|
// default constructor |
|
StarDetector(); |
|
// the full constructor initialized all the algorithm parameters: |
|
// maxSize - maximum size of the features. The following |
|
// values of the parameter are supported: |
|
// 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128 |
|
// responseThreshold - threshold for the approximated laplacian, |
|
// used to eliminate weak features. The larger it is, |
|
// the less features will be retrieved |
|
// lineThresholdProjected - another threshold for the laplacian to |
|
// eliminate edges |
|
// lineThresholdBinarized - another threshold for the feature |
|
// size to eliminate edges. |
|
// The larger the 2 threshold, the more points you get. |
|
StarDetector(int maxSize, int responseThreshold, |
|
int lineThresholdProjected, |
|
int lineThresholdBinarized, |
|
int suppressNonmaxSize); |
|
|
|
// finds keypoints in an image |
|
void operator()(const Mat& image, vector<KeyPoint>& keypoints) const; |
|
}; |
|
\end{lstlisting} |
|
|
|
The class implements a modified version of CenSurE keypoint detector described in |
|
\cite{Agrawal08} |
|
|
|
\cvclass{SIFT} |
|
Class for extracting keypoints and computing descriptors using approach named Scale Invariant Feature Transform (SIFT). |
|
|
|
\begin{lstlisting} |
|
class CV_EXPORTS SIFT |
|
{ |
|
public: |
|
struct CommonParams |
|
{ |
|
static const int DEFAULT_NOCTAVES = 4; |
|
static const int DEFAULT_NOCTAVE_LAYERS = 3; |
|
static const int DEFAULT_FIRST_OCTAVE = -1; |
|
enum{ FIRST_ANGLE = 0, AVERAGE_ANGLE = 1 }; |
|
|
|
CommonParams(); |
|
CommonParams( int _nOctaves, int _nOctaveLayers, int _firstOctave, |
|
int _angleMode ); |
|
int nOctaves, nOctaveLayers, firstOctave; |
|
int angleMode; |
|
}; |
|
|
|
struct DetectorParams |
|
{ |
|
static double GET_DEFAULT_THRESHOLD() |
|
{ return 0.04 / SIFT::CommonParams::DEFAULT_NOCTAVE_LAYERS / 2.0; } |
|
static double GET_DEFAULT_EDGE_THRESHOLD() { return 10.0; } |
|
|
|
DetectorParams(); |
|
DetectorParams( double _threshold, double _edgeThreshold ); |
|
double threshold, edgeThreshold; |
|
}; |
|
|
|
struct DescriptorParams |
|
{ |
|
static double GET_DEFAULT_MAGNIFICATION() { return 3.0; } |
|
static const bool DEFAULT_IS_NORMALIZE = true; |
|
static const int DESCRIPTOR_SIZE = 128; |
|
|
|
DescriptorParams(); |
|
DescriptorParams( double _magnification, bool _isNormalize, |
|
bool _recalculateAngles ); |
|
double magnification; |
|
bool isNormalize; |
|
bool recalculateAngles; |
|
}; |
|
|
|
SIFT(); |
|
//! sift-detector constructor |
|
SIFT( double _threshold, double _edgeThreshold, |
|
int _nOctaves=CommonParams::DEFAULT_NOCTAVES, |
|
int _nOctaveLayers=CommonParams::DEFAULT_NOCTAVE_LAYERS, |
|
int _firstOctave=CommonParams::DEFAULT_FIRST_OCTAVE, |
|
int _angleMode=CommonParams::FIRST_ANGLE ); |
|
//! sift-descriptor constructor |
|
SIFT( double _magnification, bool _isNormalize=true, |
|
bool _recalculateAngles = true, |
|
int _nOctaves=CommonParams::DEFAULT_NOCTAVES, |
|
int _nOctaveLayers=CommonParams::DEFAULT_NOCTAVE_LAYERS, |
|
int _firstOctave=CommonParams::DEFAULT_FIRST_OCTAVE, |
|
int _angleMode=CommonParams::FIRST_ANGLE ); |
|
SIFT( const CommonParams& _commParams, |
|
const DetectorParams& _detectorParams = DetectorParams(), |
|
const DescriptorParams& _descriptorParams = DescriptorParams() ); |
|
|
|
//! returns the descriptor size in floats (128) |
|
int descriptorSize() const { return DescriptorParams::DESCRIPTOR_SIZE; } |
|
//! finds the keypoints using SIFT algorithm |
|
void operator()(const Mat& img, const Mat& mask, |
|
vector<KeyPoint>& keypoints) const; |
|
//! finds the keypoints and computes descriptors for them using SIFT algorithm. |
|
//! Optionally it can compute descriptors for the user-provided keypoints |
|
void operator()(const Mat& img, const Mat& mask, |
|
vector<KeyPoint>& keypoints, |
|
Mat& descriptors, |
|
bool useProvidedKeypoints=false) const; |
|
|
|
CommonParams getCommonParams () const { return commParams; } |
|
DetectorParams getDetectorParams () const { return detectorParams; } |
|
DescriptorParams getDescriptorParams () const { return descriptorParams; } |
|
protected: |
|
... |
|
}; |
|
\end{lstlisting} |
|
|
|
\cvclass{SURF}\label{cv.class.SURF} |
|
Class for extracting Speeded Up Robust Features from an image. |
|
|
|
\begin{lstlisting} |
|
class SURF : public CvSURFParams |
|
{ |
|
public: |
|
// default constructor |
|
SURF(); |
|
// constructor that initializes all the algorithm parameters |
|
SURF(double _hessianThreshold, int _nOctaves=4, |
|
int _nOctaveLayers=2, bool _extended=false); |
|
// returns the number of elements in each descriptor (64 or 128) |
|
int descriptorSize() const; |
|
// detects keypoints using fast multi-scale Hessian detector |
|
void operator()(const Mat& img, const Mat& mask, |
|
vector<KeyPoint>& keypoints) const; |
|
// detects keypoints and computes the SURF descriptors for them; |
|
// output vector "descriptors" stores elements of descriptors and has size |
|
// equal descriptorSize()*keypoints.size() as each descriptor is |
|
// descriptorSize() elements of this vector. |
|
void operator()(const Mat& img, const Mat& mask, |
|
vector<KeyPoint>& keypoints, |
|
vector<float>& descriptors, |
|
bool useProvidedKeypoints=false) const; |
|
}; |
|
\end{lstlisting} |
|
|
|
The class \texttt{SURF} implements Speeded Up Robust Features descriptor \cite{Bay06}. |
|
There is fast multi-scale Hessian keypoint detector that can be used to find the keypoints |
|
(which is the default option), but the descriptors can be also computed for the user-specified keypoints. |
|
The function can be used for object tracking and localization, image stitching etc. See the |
|
\texttt{find\_obj.cpp} demo in OpenCV samples directory. |
|
|
|
\cvclass{RandomizedTree} |
|
The class contains base structure for \texttt{RTreeClassifier} |
|
|
|
\begin{lstlisting} |
|
class CV_EXPORTS RandomizedTree |
|
{ |
|
public: |
|
friend class RTreeClassifier; |
|
|
|
RandomizedTree(); |
|
~RandomizedTree(); |
|
|
|
void train(std::vector<BaseKeypoint> const& base_set, |
|
cv::RNG &rng, int depth, int views, |
|
size_t reduced_num_dim, int num_quant_bits); |
|
void train(std::vector<BaseKeypoint> const& base_set, |
|
cv::RNG &rng, PatchGenerator &make_patch, int depth, |
|
int views, size_t reduced_num_dim, int num_quant_bits); |
|
|
|
// following two funcs are EXPERIMENTAL |
|
//(do not use unless you know exactly what you do) |
|
static void quantizeVector(float *vec, int dim, int N, float bnds[2], |
|
int clamp_mode=0); |
|
static void quantizeVector(float *src, int dim, int N, float bnds[2], |
|
uchar *dst); |
|
|
|
// patch_data must be a 32x32 array (no row padding) |
|
float* getPosterior(uchar* patch_data); |
|
const float* getPosterior(uchar* patch_data) const; |
|
uchar* getPosterior2(uchar* patch_data); |
|
|
|
void read(const char* file_name, int num_quant_bits); |
|
void read(std::istream &is, int num_quant_bits); |
|
void write(const char* file_name) const; |
|
void write(std::ostream &os) const; |
|
|
|
int classes() { return classes_; } |
|
int depth() { return depth_; } |
|
|
|
void discardFloatPosteriors() { freePosteriors(1); } |
|
|
|
inline void applyQuantization(int num_quant_bits) |
|
{ makePosteriors2(num_quant_bits); } |
|
|
|
private: |
|
int classes_; |
|
int depth_; |
|
int num_leaves_; |
|
std::vector<RTreeNode> nodes_; |
|
float **posteriors_; // 16-bytes aligned posteriors |
|
uchar **posteriors2_; // 16-bytes aligned posteriors |
|
std::vector<int> leaf_counts_; |
|
|
|
void createNodes(int num_nodes, cv::RNG &rng); |
|
void allocPosteriorsAligned(int num_leaves, int num_classes); |
|
void freePosteriors(int which); |
|
// which: 1=posteriors_, 2=posteriors2_, 3=both |
|
void init(int classes, int depth, cv::RNG &rng); |
|
void addExample(int class_id, uchar* patch_data); |
|
void finalize(size_t reduced_num_dim, int num_quant_bits); |
|
int getIndex(uchar* patch_data) const; |
|
inline float* getPosteriorByIndex(int index); |
|
inline uchar* getPosteriorByIndex2(int index); |
|
inline const float* getPosteriorByIndex(int index) const; |
|
void convertPosteriorsToChar(); |
|
void makePosteriors2(int num_quant_bits); |
|
void compressLeaves(size_t reduced_num_dim); |
|
void estimateQuantPercForPosteriors(float perc[2]); |
|
}; |
|
\end{lstlisting} |
|
|
|
\cvCppFunc{RandomizedTree::train} |
|
Trains a randomized tree using input set of keypoints |
|
|
|
\cvdefCpp{ |
|
void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng, |
|
PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim, |
|
int num\_quant\_bits); |
|
} |
|
\cvdefCpp{ |
|
void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng, |
|
PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim, |
|
int num\_quant\_bits); |
|
} |
|
\begin{description} |
|
\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training} |
|
\cvarg{rng} {Random numbers generator is used for training} |
|
\cvarg{make\_patch} {Patch generator is used for training} |
|
\cvarg{depth} {Maximum tree depth} |
|
%\cvarg{views} {} |
|
\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature} |
|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization} |
|
\end{description} |
|
|
|
\cvCppFunc{RandomizedTree::read} |
|
Reads pre-saved randomized tree from file or stream |
|
\cvdefCpp{read(const char* file\_name, int num\_quant\_bits)} |
|
\cvdefCpp{read(std::istream \&is, int num\_quant\_bits)} |
|
\begin{description} |
|
\cvarg{file\_name}{Filename of file contains randomized tree data} |
|
\cvarg{is}{Input stream associated with file contains randomized tree data} |
|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization} |
|
\end{description} |
|
|
|
\cvCppFunc{RandomizedTree::write} |
|
Writes current randomized tree to a file or stream |
|
\cvdefCpp{void write(const char* file\_name) const;} |
|
\cvdefCpp{void write(std::ostream \&os) const;} |
|
\begin{description} |
|
\cvarg{file\_name}{Filename of file where randomized tree data will be stored} |
|
\cvarg{is}{Output stream associated with file where randomized tree data will be stored} |
|
\end{description} |
|
|
|
|
|
\cvCppFunc{RandomizedTree::applyQuantization} |
|
Applies quantization to the current randomized tree |
|
\cvdefCpp{void applyQuantization(int num\_quant\_bits)} |
|
\begin{description} |
|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization} |
|
\end{description} |
|
|
|
\cvclass{RTreeNode} |
|
The class contains base structure for \texttt{RandomizedTree} |
|
|
|
\begin{lstlisting} |
|
struct RTreeNode |
|
{ |
|
short offset1, offset2; |
|
|
|
RTreeNode() {} |
|
|
|
RTreeNode(uchar x1, uchar y1, uchar x2, uchar y2) |
|
: offset1(y1*PATCH_SIZE + x1), |
|
offset2(y2*PATCH_SIZE + x2) |
|
{} |
|
|
|
//! Left child on 0, right child on 1 |
|
inline bool operator() (uchar* patch_data) const |
|
{ |
|
return patch_data[offset1] > patch_data[offset2]; |
|
} |
|
}; |
|
\end{lstlisting} |
|
|
|
|
|
\cvclass{RTreeClassifier} |
|
The class contains \texttt{RTreeClassifier}. It represents calonder descriptor which was originally introduced by Michael Calonder |
|
|
|
\begin{lstlisting} |
|
class CV_EXPORTS RTreeClassifier |
|
{ |
|
public: |
|
static const int DEFAULT_TREES = 48; |
|
static const size_t DEFAULT_NUM_QUANT_BITS = 4; |
|
|
|
RTreeClassifier(); |
|
|
|
void train(std::vector<BaseKeypoint> const& base_set, |
|
cv::RNG &rng, |
|
int num_trees = RTreeClassifier::DEFAULT_TREES, |
|
int depth = DEFAULT_DEPTH, |
|
int views = DEFAULT_VIEWS, |
|
size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM, |
|
int num_quant_bits = DEFAULT_NUM_QUANT_BITS, |
|
bool print_status = true); |
|
void train(std::vector<BaseKeypoint> const& base_set, |
|
cv::RNG &rng, |
|
PatchGenerator &make_patch, |
|
int num_trees = RTreeClassifier::DEFAULT_TREES, |
|
int depth = DEFAULT_DEPTH, |
|
int views = DEFAULT_VIEWS, |
|
size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM, |
|
int num_quant_bits = DEFAULT_NUM_QUANT_BITS, |
|
bool print_status = true); |
|
|
|
// sig must point to a memory block of at least |
|
//classes()*sizeof(float|uchar) bytes |
|
void getSignature(IplImage *patch, uchar *sig); |
|
void getSignature(IplImage *patch, float *sig); |
|
void getSparseSignature(IplImage *patch, float *sig, |
|
float thresh); |
|
|
|
static int countNonZeroElements(float *vec, int n, double tol=1e-10); |
|
static inline void safeSignatureAlloc(uchar **sig, int num_sig=1, |
|
int sig_len=176); |
|
static inline uchar* safeSignatureAlloc(int num_sig=1, |
|
int sig_len=176); |
|
|
|
inline int classes() { return classes_; } |
|
inline int original_num_classes() |
|
{ return original_num_classes_; } |
|
|
|
void setQuantization(int num_quant_bits); |
|
void discardFloatPosteriors(); |
|
|
|
void read(const char* file_name); |
|
void read(std::istream &is); |
|
void write(const char* file_name) const; |
|
void write(std::ostream &os) const; |
|
|
|
std::vector<RandomizedTree> trees_; |
|
|
|
private: |
|
int classes_; |
|
int num_quant_bits_; |
|
uchar **posteriors_; |
|
ushort *ptemp_; |
|
int original_num_classes_; |
|
bool keep_floats_; |
|
}; |
|
\end{lstlisting} |
|
|
|
\cvCppFunc{RTreeClassifier::train} |
|
Trains a randomized tree classificator using input set of keypoints |
|
\cvdefCpp{ |
|
void train(std::vector<BaseKeypoint> const\& base\_set, |
|
cv::RNG \&rng, |
|
int num\_trees = RTreeClassifier::DEFAULT\_TREES, |
|
int depth = DEFAULT\_DEPTH, |
|
int views = DEFAULT\_VIEWS, |
|
size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM, |
|
int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true); |
|
} |
|
\cvdefCpp{ |
|
void train(std::vector<BaseKeypoint> const\& base\_set, |
|
cv::RNG \&rng, |
|
PatchGenerator \&make\_patch, |
|
int num\_trees = RTreeClassifier::DEFAULT\_TREES, |
|
int depth = DEFAULT\_DEPTH, |
|
int views = DEFAULT\_VIEWS, |
|
size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM, |
|
int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true); |
|
} |
|
\begin{description} |
|
\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training} |
|
\cvarg{rng} {Random numbers generator is used for training} |
|
\cvarg{make\_patch} {Patch generator is used for training} |
|
\cvarg{num\_trees} {Number of randomized trees used in RTreeClassificator} |
|
\cvarg{depth} {Maximum tree depth} |
|
%\cvarg{views} {} |
|
\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature} |
|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization} |
|
\cvarg{print\_status} {Print current status of training on the console} |
|
\end{description} |
|
|
|
\cvCppFunc{RTreeClassifier::getSignature} |
|
Returns signature for image patch |
|
\cvdefCpp{ |
|
void getSignature(IplImage *patch, uchar *sig) |
|
} |
|
\cvdefCpp{ |
|
void getSignature(IplImage *patch, float *sig) |
|
} |
|
\begin{description} |
|
\cvarg{patch} {Image patch to calculate signature for} |
|
\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}} |
|
\end{description} |
|
|
|
\cvCppFunc{RTreeClassifier::getSparseSignature} |
|
The function is simular to \texttt{getSignature} but uses the threshold for removing all signature elements less than the threshold. So that the signature is compressed |
|
\cvdefCpp{ |
|
void getSparseSignature(IplImage *patch, float *sig, |
|
float thresh); |
|
} |
|
\begin{description} |
|
\cvarg{patch} {Image patch to calculate signature for} |
|
\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}} |
|
\cvarg{tresh} {The threshold that is used for compressing the signature} |
|
\end{description} |
|
|
|
\cvCppFunc{RTreeClassifier::countNonZeroElements} |
|
The function returns the number of non-zero elements in the input array. |
|
\cvdefCpp{ |
|
static int countNonZeroElements(float *vec, int n, double tol=1e-10); |
|
} |
|
\begin{description} |
|
\cvarg{vec}{Input vector contains float elements} |
|
\cvarg{n}{Input vector size} |
|
\cvarg{tol} {The threshold used for elements counting. We take all elements are less than \texttt{tol} as zero elements} |
|
\end{description} |
|
|
|
\cvCppFunc{RTreeClassifier::read} |
|
Reads pre-saved RTreeClassifier from file or stream |
|
\cvdefCpp{read(const char* file\_name)} |
|
\cvdefCpp{read(std::istream \&is)} |
|
\begin{description} |
|
\cvarg{file\_name}{Filename of file contains randomized tree data} |
|
\cvarg{is}{Input stream associated with file contains randomized tree data} |
|
\end{description} |
|
|
|
\cvCppFunc{RTreeClassifier::write} |
|
Writes current RTreeClassifier to a file or stream |
|
\cvdefCpp{void write(const char* file\_name) const;} |
|
\cvdefCpp{void write(std::ostream \&os) const;} |
|
\begin{description} |
|
\cvarg{file\_name}{Filename of file where randomized tree data will be stored} |
|
\cvarg{is}{Output stream associated with file where randomized tree data will be stored} |
|
\end{description} |
|
|
|
|
|
\cvCppFunc{RTreeClassifier::setQuantization} |
|
Applies quantization to the current randomized tree |
|
\cvdefCpp{void setQuantization(int num\_quant\_bits)} |
|
\begin{description} |
|
\cvarg{num\_quant\_bits} {Number of bits are used for quantization} |
|
\end{description} |
|
|
|
Below there is an example of \texttt{RTreeClassifier} usage for feature matching. There are test and train images and we extract features from both with SURF. Output is $best\_corr$ and $best\_corr\_idx$ arrays which keep the best probabilities and corresponding features indexes for every train feature. |
|
% ===== Example. Using RTreeClassifier for features matching ===== |
|
\begin{lstlisting} |
|
CvMemStorage* storage = cvCreateMemStorage(0); |
|
CvSeq *objectKeypoints = 0, *objectDescriptors = 0; |
|
CvSeq *imageKeypoints = 0, *imageDescriptors = 0; |
|
CvSURFParams params = cvSURFParams(500, 1); |
|
cvExtractSURF( test_image, 0, &imageKeypoints, &imageDescriptors, |
|
storage, params ); |
|
cvExtractSURF( train_image, 0, &objectKeypoints, &objectDescriptors, |
|
storage, params ); |
|
|
|
cv::RTreeClassifier detector; |
|
int patch_width = cv::PATCH_SIZE; |
|
iint patch_height = cv::PATCH_SIZE; |
|
vector<cv::BaseKeypoint> base_set; |
|
int i=0; |
|
CvSURFPoint* point; |
|
for (i=0;i<(n_points > 0 ? n_points : objectKeypoints->total);i++) |
|
{ |
|
point=(CvSURFPoint*)cvGetSeqElem(objectKeypoints,i); |
|
base_set.push_back( |
|
cv::BaseKeypoint(point->pt.x,point->pt.y,train_image)); |
|
} |
|
|
|
//Detector training |
|
cv::RNG rng( cvGetTickCount() ); |
|
cv::PatchGenerator gen(0,255,2,false,0.7,1.3,-CV_PI/3,CV_PI/3, |
|
-CV_PI/3,CV_PI/3); |
|
|
|
printf("RTree Classifier training...\n"); |
|
detector.train(base_set,rng,gen,24,cv::DEFAULT_DEPTH,2000, |
|
(int)base_set.size(), detector.DEFAULT_NUM_QUANT_BITS); |
|
printf("Done\n"); |
|
|
|
float* signature = new float[detector.original_num_classes()]; |
|
float* best_corr; |
|
int* best_corr_idx; |
|
if (imageKeypoints->total > 0) |
|
{ |
|
best_corr = new float[imageKeypoints->total]; |
|
best_corr_idx = new int[imageKeypoints->total]; |
|
} |
|
|
|
for(i=0; i < imageKeypoints->total; i++) |
|
{ |
|
point=(CvSURFPoint*)cvGetSeqElem(imageKeypoints,i); |
|
int part_idx = -1; |
|
float prob = 0.0f; |
|
|
|
CvRect roi = cvRect((int)(point->pt.x) - patch_width/2, |
|
(int)(point->pt.y) - patch_height/2, |
|
patch_width, patch_height); |
|
cvSetImageROI(test_image, roi); |
|
roi = cvGetImageROI(test_image); |
|
if(roi.width != patch_width || roi.height != patch_height) |
|
{ |
|
best_corr_idx[i] = part_idx; |
|
best_corr[i] = prob; |
|
} |
|
else |
|
{ |
|
cvSetImageROI(test_image, roi); |
|
IplImage* roi_image = |
|
cvCreateImage(cvSize(roi.width, roi.height), |
|
test_image->depth, test_image->nChannels); |
|
cvCopy(test_image,roi_image); |
|
|
|
detector.getSignature(roi_image, signature); |
|
for (int j = 0; j< detector.original_num_classes();j++) |
|
{ |
|
if (prob < signature[j]) |
|
{ |
|
part_idx = j; |
|
prob = signature[j]; |
|
} |
|
} |
|
|
|
best_corr_idx[i] = part_idx; |
|
best_corr[i] = prob; |
|
|
|
|
|
if (roi_image) |
|
cvReleaseImage(&roi_image); |
|
} |
|
cvResetImageROI(test_image); |
|
} |
|
|
|
\end{lstlisting} |
|
\fi
|
|
|