Open Source Computer Vision Library
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2161 lines
61 KiB

* Here:
* 1.) SIFT imlementation of Andrea Vedaldi
* 2.) wrapper of Vedaldi`s SIFT
1.) Implementation of SIFT taken from
// Copyright (c) 2006 The Regents of the University of California
// All Rights Reserved.
// Created by Andrea Vedaldi (UCLA VisionLab)
// Permission to use, copy, modify, and distribute this software and its
// documentation for educational, research and non-profit purposes,
// without fee, and without a written agreement is hereby granted,
// provided that the above copyright notice, this paragraph and the
// following three paragraphs appear in all copies.
// This software program and documentation are copyrighted by The Regents
// of the University of California. The software program and
// documentation are supplied "as is", without any accompanying services
// from The Regents. The Regents does not warrant that the operation of
// the program will be uninterrupted or error-free. The end-user
// understands that the program was developed for research purposes and
// is advised not to rely exclusively on the program for any reason.
// This software embodies a method for which the following patent has
// been issued: "Method and apparatus for identifying scale invariant
// features in an image and use of same for locating an object in an
// image," David G. Lowe, US Patent 6,711,293 (March 23,
// 2004). Provisional application filed March 8, 1999. Asignee: The
// University of British Columbia.
#include "precomp.hpp"
#ifdef __arm__
#define ARM_NO_SIFT
#ifdef ANDROID
#undef ARM_NO_SIFT
#endif //ANDROID
#ifndef ARM_NO_SIFT
#include <iostream>
#include <limits>
#define log2(a) (log((a))/CV_LOG2)
#if defined _MSC_VER && _MSC_VER >= 1400
#pragma warning(disable: 4100 4244 4267 4305)
* from sift.hpp of original code
#if defined (VL_USEFASTMATH)
#if defined (VL_MAC)
#define VL_FASTFLOAT float
#define VL_FASTFLOAT double
#define VL_FASTFLOAT float
/** @brief VisionLab namespace */
namespace VL {
/** @brief Pixel data type */
typedef float pixel_t ;
/** @brief Floating point data type
** Although floats are precise enough for this applicatgion, on Intel
** based architecture using doubles for floating point computations
** turns out to be much faster.
typedef VL_FASTFLOAT float_t ;
/** @brief 32-bit floating data type */
typedef float float32_t ;
/** @brief 64-bit floating data type */
typedef double float64_t ;
/** @brief 32-bit integer data type */
typedef int int32_t ;
/** @brief 64-bit integer data type */
typedef long long int int64_t ;
/** @brief 32-bit unsigned integer data type */
typedef int uint32_t ;
/** @brief 8-bit unsigned integer data type */
typedef char unsigned uint8_t ;
/** @name Fast math
** We provide approximate mathematical functions. These are usually
** rather faster than the corresponding standard library functions.
float fast_resqrt(float x) ;
double fast_resqrt(double x) ;
float_t fast_expn(float_t x) ;
float_t fast_abs(float_t x) ;
float_t fast_mod_2pi(float_t x) ;
float_t fast_atan2(float_t y, float_t x) ;
float_t fast_sqrt(float_t x) ;
int32_t fast_floor(float_t x) ;
/** @brief PGM buffer descriptor
** The structure describes a gray scale image and it is used by the
** PGM input/output functions. The fileds are self-explanatory.
struct PgmBuffer
int width ; ///< Image width
int height ; ///< Image hegith
pixel_t* data ; ///< Image data
} ;
/** @brief SIFT filter
** This class is a filter computing the Scale Invariant Feature
** Transform (SIFT).
class Sift
/** @brief SIFT keypoint
** A SIFT keypoint is charactedized by a location x,y and a scale
** @c sigma. The scale is obtained from the level index @c s and
** the octave index @c o through a simple formula (see the PDF
** documentation).
** In addition to the location, scale indexes and scale, we also
** store the integer location and level. The integer location is
** unnormalized, i.e. relative to the resolution of the octave
** containing the keypoint (octaves are downsampled).
struct Keypoint
int o ; ///< Keypoint octave index
int ix ; ///< Keypoint integer X coordinate (unnormalized)
int iy ; ///< Keypoint integer Y coordinate (unnormalized)
int is ; ///< Keypoint integer scale indiex
float_t x ; ///< Keypoint fractional X coordinate
float_t y ; ///< Keypoint fractional Y coordinate
float_t s ; ///< Keypoint fractional scale index
float_t sigma ; ///< Keypoint scale
} ;
typedef std::vector<Keypoint> Keypoints ; ///< Keypoint list datatype
typedef Keypoints::iterator KeypointsIter ; ///< Keypoint list iter datatype
typedef Keypoints::const_iterator KeypointsConstIter ; ///< Keypoint list const iter datatype
#undef _S
/** @brief Constructors and destructors */
Sift(const pixel_t* _im_pt, int _width, int _height,
float_t _sigman,
float_t _sigma0,
int _O, int _S,
int _omin, int _smin, int _smax) ;
~Sift() ;
void process(const pixel_t* _im_pt, int _width, int _height) ;
/** @brief Querying the Gaussian scale space */
VL::pixel_t* getOctave(int o) ;
VL::pixel_t* getLevel(int o, int s) ;
int getWidth() const ;
int getHeight() const ;
int getOctaveWidth(int o) const ;
int getOctaveHeight(int o) const ;
VL::float_t getOctaveSamplingPeriod(int o) const ;
VL::float_t getScaleFromIndex(VL::float_t o, VL::float_t s) const ;
Keypoint getKeypoint(VL::float_t x, VL::float_t y, VL::float_t s) const ;
/** @brief Descriptor parameters */
bool getNormalizeDescriptor() const ;
void setNormalizeDescriptor(bool) ;
void setMagnification(VL::float_t) ;
VL::float_t getMagnification() const ;
/** @brief Detector and descriptor */
void detectKeypoints(VL::float_t threshold, VL::float_t edgeThreshold) ;
int computeKeypointOrientations(VL::float_t angles [4], Keypoint keypoint) ;
void computeKeypointDescriptor(VL::float_t* descr_pt, Keypoint keypoint, VL::float_t angle) ;
KeypointsIter keypointsBegin() ;
KeypointsIter keypointsEnd() ;
void prepareBuffers() ;
void freeBuffers() ;
void smooth(VL::pixel_t * dst,
VL::pixel_t * temp,
VL::pixel_t const * src, int width, int height,
VL::float_t s) ;
void prepareGrad(int o) ;
// scale space parameters
VL::float_t sigman ;
VL::float_t sigma0 ;
VL::float_t sigmak ;
int O ;
int S ;
int omin ;
int smin ;
int smax ;
int width ;
int height ;
// descriptor parameters
VL::float_t magnif ;
bool normalizeDescriptor ;
// buffers
VL::pixel_t* temp ;
int tempReserved ;
bool tempIsGrad ;
int tempOctave ;
VL::pixel_t** octaves ;
VL::pixel_t* filter ;
int filterReserved ;
Keypoints keypoints ;
} ;
* from sift.ipp of original code
namespace VL
namespace Detail
extern int const expnTableSize ;
extern VL::float_t const expnTableMax ;
extern VL::float_t expnTable [] ;
/** @brief Get width of source image
** @result width.
Sift::getWidth() const
return width ;
/** @brief Get height of source image
** @result height.
Sift::getHeight() const
return height ;
/** @brief Get width of an octave
** @param o octave index.
** @result width of octave @a o.
Sift::getOctaveWidth(int o) const
assert( omin <= o && o < omin + O ) ;
return (o >= 0) ? (width >> o) : (width << -o) ;
/** @brief Get height of an octave
** @param o octave index.
** @result height of octave @a o.
Sift::getOctaveHeight(int o) const
assert( omin <= o && o < omin + O ) ;
return (o >= 0) ? (height >> o) : (height << -o) ;
/** @brief Get octave
** @param o octave index.
** @return pointer to octave @a o.
VL::pixel_t *
Sift::getOctave(int o)
assert( omin <= o && o < omin + O ) ;
return octaves[o-omin] ;
/** @brief Get level
** @param o octave index.
** @param s level index.
** @result pointer to level @c (o,s).
VL::pixel_t *
Sift::getLevel(int o, int s)
assert( omin <= o && o < omin + O ) ;
assert( smin <= s && s <= smax ) ;
return octaves[o - omin] +
getOctaveWidth(o)*getOctaveHeight(o) * (s-smin) ;
/** @brief Get octave sampling period
** @param o octave index.
** @result Octave sampling period (in pixels).
Sift::getOctaveSamplingPeriod(int o) const
return (o >= 0) ? (1 << o) : 1.0f / (1 << -o) ;
/** @brief Convert index into scale
** @param o octave index.
** @param s scale index.
** @return scale.
Sift::getScaleFromIndex(VL::float_t o, VL::float_t s) const
return sigma0 * powf( 2.0f, o + s / S ) ;
/** @brief Get keypoint list begin
** @return iterator to the beginning.
return keypoints.begin() ;
/** @brief Get keypoint list end
** @return iterator to the end.
return keypoints.end() ;
/** @brief Set normalize descriptor flag */
Sift::setNormalizeDescriptor(bool flag)
normalizeDescriptor = flag ;
/** @brief Get normalize descriptor flag */
Sift::getNormalizeDescriptor() const
return normalizeDescriptor ;
/** @brief Set descriptor magnification */
Sift::setMagnification(VL::float_t _magnif)
magnif = _magnif ;
/** @brief Get descriptor magnification */
Sift::getMagnification() const
return magnif ;
/** @brief Fast @ exp(-x)
** The argument must be in the range 0-25.0 (bigger arguments may be
** truncated to zero).
** @param x argument.
** @return @c exp(-x)
fast_expn(VL::float_t x)
assert(VL::float_t(0) <= x && x <= Detail::expnTableMax) ;
x *= Detail::expnTableSize / Detail::expnTableMax ;
VL::int32_t i = fast_floor(x) ;
VL::float_t r = x - i ;
VL::float_t a = VL::Detail::expnTable[i] ;
VL::float_t b = VL::Detail::expnTable[i+1] ;
return a + r * (b - a) ;
return exp(-x) ;
/** @brief Fast @c mod(x,2pi)
** The function quickly computes the value @c mod(x,2pi).
** @remark The computation is fast only for arguments @a x which are
** small in modulus.
** @remark For negative arguments, the semantic of the function is
** not equivalent to the standard library @c fmod function.
** @param x function argument.
** @return @c mod(x,2pi)
fast_mod_2pi(VL::float_t x)
while(x < VL::float_t(0) ) x += VL::float_t(2*CV_PI) ;
while(x > VL::float_t(2*CV_PI) ) x -= VL::float_t(2*CV_PI) ;
return x ;
return (x>=0) ? std::fmod(x, VL::float_t(2*CV_PI))
: 2*CV_PI + std::fmod(x, VL::float_t(2*CV_PI)) ;
/** @brief Fast @c (int) floor(x)
** @param x argument.
** @return @c float(x)
fast_floor(VL::float_t x)
return (x>=0)? int32_t(x) : std::floor(x) ;
// return int32_t( x - ((x>=0)?0:1) ) ;
return int32_t( std::floor(x) ) ;
/** @brief Fast @c abs(x)
** @param x argument.
** @return @c abs(x)
fast_abs(VL::float_t x)
return (x >= 0) ? x : -x ;
return std::fabs(x) ;
/** @brief Fast @c atan2
** @param x argument.
** @param y argument.
** @return Approximation of @c atan2(x).
fast_atan2(VL::float_t y, VL::float_t x)
The function f(r)=atan((1-r)/(1+r)) for r in [-1,1] is easier to
approximate than atan(z) for z in [0,inf]. To approximate f(r) to
the third degree we may solve the system
f(+1) = c0 + c1 + c2 + c3 = atan(0) = 0
f(-1) = c0 - c1 + c2 - c3 = atan(inf) = pi/2
f(0) = c0 = atan(1) = pi/4
which constrains the polynomial to go through the end points and
the middle point.
We still miss a constrain, which might be simply a constarint on
the derivative in 0. Instead we minimize the Linf error in the
range [0,1] by searching for an optimal value of the free
parameter. This turns out to correspond to the solution
c0=pi/4, c1=-0.9675, c2=0, c3=0.1821
which has maxerr = 0.0061 rad = 0.35 grad.
VL::float_t angle, r ;
VL::float_t const c3 = 0.1821 ;
VL::float_t const c1 = 0.9675 ;
VL::float_t abs_y = fast_abs(y) + VL::float_t(1e-10) ;
if (x >= 0) {
r = (x - abs_y) / (x + abs_y) ;
angle = VL::float_t(CV_PI/4.0) ;
} else {
r = (x + abs_y) / (abs_y - x) ;
angle = VL::float_t(3*CV_PI/4.0) ;
angle += (c3*r*r - c1) * r ;
return (y < 0) ? -angle : angle ;
return std::atan2(y,x) ;
/** @brief Fast @c resqrt
** @param x argument.
** @return Approximation to @c resqrt(x).
fast_resqrt(float x)
// Works if VL::float_t is 32 bit ...
union {
float x ;
VL::int32_t i ;
} u ;
float xhalf = float(0.5) * x ;
u.x = x ; // get bits for floating value
u.i = 0x5f3759df - (u.i>>1); // gives initial guess y0
//u.i = 0xdf59375f - (u.i>>1); // gives initial guess y0
u.x = u.x*(float(1.5) - xhalf*u.x*u.x); // Newton step (may repeat)
u.x = u.x*(float(1.5) - xhalf*u.x*u.x); // Newton step (may repeat)
return u.x ;
return float(1.0) / std::sqrt(x) ;
/** @brief Fast @c resqrt
** @param x argument.
** @return Approximation to @c resqrt(x).
fast_resqrt(double x)
// Works if double is 64 bit ...
union {
double x ;
VL::int64_t i ;
} u ;
double xhalf = double(0.5) * x ;
u.x = x ; // get bits for floating value
u.i = 0x5fe6ec85e7de30daLL - (u.i>>1); // gives initial guess y0
u.x = u.x*(double(1.5) - xhalf*u.x*u.x); // Newton step (may repeat)
u.x = u.x*(double(1.5) - xhalf*u.x*u.x); // Newton step (may repeat)
return u.x ;
return double(1.0) / std::sqrt(x) ;
/** @brief Fast @c sqrt
** @param x argument.
** @return Approximation to @c sqrt(x).
fast_sqrt(VL::float_t x)
return (x < 1e-8) ? 0 : x * fast_resqrt(x) ;
return std::sqrt(x) ;
* from sift.tpp of original code
template<typename T>
normalize(T* filter, int W)
T acc = 0 ;
T* iter = filter ;
T* end = filter + 2*W+1 ;
while(iter != end) acc += *iter++ ;
iter = filter ;
while(iter != end) *iter++ /= acc ;
template<typename T>
convolve(T* dst_pt,
const T* src_pt, int M, int N,
const T* filter_pt, int W)
typedef T const TC ;
// convolve along columns, save transpose
// image is M by N
// buffer is N by M
// filter is (2*W+1) by 1
for(int j = 0 ; j < N ; ++j) {
int i = 0 ;
// top
for(; i <= std::min(W-1, M-1) ; ++i) {
TC* start = src_pt ;
TC* stop = src_pt + std::min(i+W, M-1) + 1 ;
TC* g = filter_pt + W-i ;
T acc = 0.0 ;
while(stop != start) acc += (*g++) * (*start++) ;
*dst_pt = acc ;
dst_pt += N ;
// middle
// run this for W <= i <= M-1-W, only if M >= 2*W+1
for(; i <= M-1-W ; ++i) {
TC* start = src_pt + i-W ;
TC* stop = src_pt + i+W + 1 ;
TC* g = filter_pt ;
T acc = 0.0 ;
while(stop != start) acc += (*g++) * (*start++) ;
*dst_pt = acc ;
dst_pt += N ;
// bottom
// run this for M-W <= i <= M-1, only if M >= 2*W+1
for(; i <= M-1 ; ++i) {
TC* start = src_pt + i-W ;
TC* stop = src_pt + std::min(i+W, M-1) + 1 ;
TC* g = filter_pt ;
T acc = 0.0 ;
while(stop != start) acc += (*g++) * (*start++) ;
*dst_pt = acc ;
dst_pt += N ;
// next column
src_pt += M ;
dst_pt -= M*N - 1 ;
// works with symmetric filters only
template<typename T>
nconvolve(T* dst_pt,
const T* src_pt, int M, int N,
const T* filter_pt, int W,
T* scratch_pt )
typedef T const TC ;
for(int i = 0 ; i <= W ; ++i) {
T acc = 0.0 ;
TC* iter = filter_pt + std::max(W-i, 0) ;
TC* stop = filter_pt + std::min(M-1-i,W) + W + 1 ;
while(iter != stop) acc += *iter++ ;
scratch_pt [i] = acc ;
for(int j = 0 ; j < N ; ++j) {
int i = 0 ;
// top margin
for(; i <= std::min(W, M-1) ; ++i) {
TC* start = src_pt ;
TC* stop = src_pt + std::min(i+W, M-1) + 1 ;
TC* g = filter_pt + W-i ;
T acc = 0.0 ;
while(stop != start) acc += (*g++) * (*start++) ;
*dst_pt = acc / scratch_pt [i] ;
dst_pt += N ;
// middle
for(; i <= M-1-W ; ++i) {
TC* start = src_pt + i-W ;
TC* stop = src_pt + i+W + 1 ;
TC* g = filter_pt ;
T acc = 0.0 ;
while(stop != start) acc += (*g++) * (*start++) ;
*dst_pt = acc ;
dst_pt += N ;
// bottom
for(; i <= M-1 ; ++i) {
TC* start = src_pt + i-W ;
TC* stop = src_pt + std::min(i+W, M-1) + 1 ;
TC* g = filter_pt ;
T acc = 0.0 ;
while(stop != start) acc += (*g++) * (*start++) ;
*dst_pt = acc / scratch_pt [M-1-i];
dst_pt += N ;
// next column
src_pt += M ;
dst_pt -= M*N - 1 ;
template<typename T>
econvolve(T* dst_pt,
const T* src_pt, int M, int N,
const T* filter_pt, int W)
typedef T const TC ;
// convolve along columns, save transpose
// image is M by N
// buffer is N by M
// filter is (2*W+1) by 1
for(int j = 0 ; j < N ; ++j) {
for(int i = 0 ; i < M ; ++i) {
T acc = 0.0 ;
TC* g = filter_pt ;
TC* start = src_pt + (i-W) ;
TC* stop ;
T x ;
// beginning
stop = src_pt + std::max(0, i-W) ;
x = *stop ;
while( start <= stop ) { acc += (*g++) * x ; start++ ; }
// middle
stop = src_pt + std::min(M-1, i+W) ;
while( start < stop ) acc += (*g++) * (*start++) ;
// end
x = *start ;
stop = src_pt + (i+W) ;
while( start <= stop ) { acc += (*g++) * x ; start++ ; }
// save
*dst_pt = acc ;
dst_pt += N ;
assert( g - filter_pt == 2*W+1 ) ;
// next column
src_pt += M ;
dst_pt -= M*N - 1 ;
* from sift.cpp of original code
extern "C" {
#if defined (VL_MAC)
// on startup, pre-compute expn(x) = exp(-x)
namespace VL {
namespace Detail {
int const expnTableSize = 256 ;
VL::float_t const expnTableMax = VL::float_t(25.0) ;
VL::float_t expnTable [ expnTableSize + 1 ] ;
struct buildExpnTable
buildExpnTable() {
for(int k = 0 ; k < expnTableSize + 1 ; ++k) {
expnTable[k] = exp( - VL::float_t(k) / expnTableSize * expnTableMax ) ;
} _buildExpnTable ;
} }
namespace VL {
// ===================================================================
// Low level image operations
// -------------------------------------------------------------------
namespace Detail {
/** @brief Copy an image
** @param dst output imgage buffer.
** @param src input image buffer.
** @param width input image width.
** @param height input image height.
copy(pixel_t* dst, pixel_t const* src, int width, int height)
memcpy(dst, src, sizeof(pixel_t)*width*height) ;
/** @brief Copy an image upsampling two times
** The destination buffer must be at least as big as two times the
** input buffer. Bilinear interpolation is used.
** @param dst output imgage buffer.
** @param src input image buffer.
** @param width input image width.
** @param height input image height.
(pixel_t* dst, pixel_t const* src, int width, int height)
for(int y = 0 ; y < height ; ++y) {
pixel_t b, a ;
b = a = *src++ ;
for(int x = 0 ; x < width-1 ; ++x) {
b = *src++ ;
*dst = a ; dst += height ;
*dst = 0.5*(a+b) ; dst += height ;
a = b ;
*dst = b ; dst += height ;
*dst = b ; dst += height ;
dst += 1 - width * 2 * height ;
/** @brief Copy and downasample an image
** The image is downsampled @a d times, i.e. reduced to @c 1/2^d of
** its original size. The parameters @a width and @a height are the
** size of the input image. The destination image is assumed to be @c
** floor(width/2^d) pixels wide and @c floor(height/2^d) pixels high.
** @param dst output imgage buffer.
** @param src input image buffer.
** @param width input image width.
** @param height input image height.
** @param d downsampling factor.
copyAndDownsample(pixel_t* dst, pixel_t const* src,
int width, int height, int d)
for(int y = 0 ; y < height ; y+=d) {
pixel_t const * srcrowp = src + y * width ;
for(int x = 0 ; x < width - (d-1) ; x+=d) {
*dst++ = *srcrowp ;
srcrowp += d ;
/** @brief Smooth an image
** The function convolves the image @a src by a Gaussian kernel of
** variance @a s and writes the result to @a dst. The function also
** needs a scratch buffer @a dst of the same size of @a src and @a
** dst.
** @param dst output image buffer.
** @param temp scratch image buffer.
** @param src input image buffer.
** @param width width of the buffers.
** @param height height of the buffers.
** @param s standard deviation of the Gaussian kernel.
(pixel_t* dst, pixel_t* temp,
pixel_t const* src, int width, int height,
VL::float_t s)
// make sure a buffer larege enough has been allocated
// to hold the filter
int W = int( ceil( VL::float_t(4.0) * s ) ) ;
if( ! filter ) {
filterReserved = 0 ;
if( filterReserved < W ) {
filterReserved = W ;
if( filter ) delete [] filter ;
filter = new pixel_t [ 2* filterReserved + 1 ] ;
// pre-compute filter
for(int j = 0 ; j < 2*W+1 ; ++j)
filter[j] = VL::pixel_t
(-0.5 * (j-W) * (j-W) / (s*s) ))) ;
// normalize to one
normalize(filter, W) ;
// convolve
econvolve(temp, src, width, height, filter, W) ;
econvolve(dst, temp, height, width, filter, W) ;
// ===================================================================
// Sift(), ~Sift()
// -------------------------------------------------------------------
/** @brief Initialize Gaussian scale space parameters
** @param _im_pt Source image data
** @param _width Soruce image width
** @param _height Soruce image height
** @param _sigman Nominal smoothing value of the input image.
** @param _sigma0 Base smoothing level.
** @param _O Number of octaves.
** @param _S Number of levels per octave.
** @param _omin First octave.
** @param _smin First level in each octave.
** @param _smax Last level in each octave.
Sift::Sift(const pixel_t* _im_pt, int _width, int _height,
VL::float_t _sigman,
VL::float_t _sigma0,
int _O, int _S,
int _omin, int _smin, int _smax)
: sigman( _sigman ),
sigma0( _sigma0 ),
O( _O ),
S( _S ),
omin( _omin ),
smin( _smin ),
smax( _smax ),
magnif( 3.0f ),
normalizeDescriptor( true ),
temp( NULL ),
octaves( NULL ),
filter( NULL )
process(_im_pt, _width, _height) ;
/** @brief Destroy SIFT filter.
freeBuffers() ;
/** Allocate buffers. Buffer sizes depend on the image size and the
** value of omin.
// compute buffer size
int w = (omin >= 0) ? (width >> omin) : (width << -omin) ;
int h = (omin >= 0) ? (height >> omin) : (height << -omin) ;
int size = w*h* std::max
((smax - smin), 2*((smax+1) - (smin-2) +1)) ;
if( temp && tempReserved == size ) return ;
freeBuffers() ;
// allocate
temp = new pixel_t [ size ] ;
tempReserved = size ;
tempIsGrad = false ;
tempOctave = 0 ;
octaves = new pixel_t* [ O ] ;
for(int o = 0 ; o < O ; ++o) {
octaves[o] = new pixel_t [ (smax - smin + 1) * w * h ] ;
w >>= 1 ;
h >>= 1 ;
/** @brief Free buffers.
** This function releases any buffer allocated by prepareBuffers().
** @sa prepareBuffers().
if( filter ) {
delete [] filter ;
filter = 0 ;
if( octaves ) {
for(int o = 0 ; o < O ; ++o) {
delete [] octaves[ o ] ;
delete [] octaves ;
octaves = 0 ;
if( temp ) {
delete [] temp ;
temp = 0 ;
// ===================================================================
// getKeypoint
// -------------------------------------------------------------------
/** @brief Get keypoint from position and scale
** The function returns a keypoint with a given position and
** scale. Note that the keypoint structure contains fields that make
** sense only in conjunction with a specific scale space. Therefore
** the keypoint structure should be re-calculated whenever the filter
** is applied to a new image, even if the parameters @a x, @a y and
** @a sigma do not change.
** @param x x coordinate of the center.
** @peram y y coordinate of the center.
** @param sigma scale.
** @return Corresponing keypoint.
Sift::getKeypoint(VL::float_t x, VL::float_t y, VL::float_t sigma) const
The formula linking the keypoint scale sigma to the octave and
scale index is
(1) sigma(o,s) = sigma0 2^(o+s/S)
for which
(2) o + s/S = log2 sigma/sigma0 == phi.
In addition to the scale index s (which can be fractional due to
scale interpolation) a keypoint has an integer scale index is too
(which is the index of the scale level where it was detected in
the DoG scale space). We have the constraints:
- o and is are integer
- is is in the range [smin+1, smax-2 ]
- o is in the range [omin, omin+O-1]
- is = rand(s) most of the times (but not always, due to the way s
is obtained by quadratic interpolation of the DoG scale space).
Depending on the values of smin and smax, often (2) has multiple
solutions is,o that satisfy all constraints. In this case we
choose the one with biggest index o (this saves a bit of
From (2) we have o = phi - s/S and we want to pick the biggest
possible index o in the feasible range. This corresponds to
selecting the smallest possible index s. We write s = is + ds
where in most cases |ds|<.5 (but in general |ds|<1). So we have
o = phi - s/S, s = is + ds , |ds| < .5 (or |ds| < 1).
Since is is in the range [smin+1,smax-2], s is in the range
[smin+.5,smax-1.5] (or [smin,smax-1]), the number o is an integer
in the range phi+[-smax+1.5,-smin-.5] (or
phi+[-smax+1,-smin]). Thus the maximum value of o is obtained for
o = floor(phi-smin-.5) (or o = floor(phi-smin)).
Finally o is clamped to make sure it is contained in the feasible
Given o we can derive is by writing (2) as
s = is + ds = S(phi - o).
We then take is = round(s) and clamp its value to be in the
feasible range.
int o,ix,iy,is ;
VL::float_t s,phi ;
phi = log2(sigma/sigma0) ;
o = fast_floor( phi - (VL::float_t(smin)+.5)/S ) ;
o = std::min(o, omin+O-1) ;
o = std::max(o, omin ) ;
s = S * (phi - o) ;
is = int(s + 0.5) ;
is = std::min(is, smax - 2) ;
is = std::max(is, smin + 1) ;
VL::float_t per = getOctaveSamplingPeriod(o) ;
ix = int(x / per + 0.5) ;
iy = int(y / per + 0.5) ;
Keypoint key ;
key.o = o ;
key.ix = ix ;
key.iy = iy ; = is ;
key.x = x ;
key.y = y ;
key.s = s ;
key.sigma = sigma ;
return key ;
// ===================================================================
// process()
// -------------------------------------------------------------------
/** @brief Compute Gaussian Scale Space
** The method computes the Gaussian scale space of the specified
** image. The scale space data is managed internally and can be
** accessed by means of getOctave() and getLevel().
** @remark Calling this method will delete the list of keypoints
** constructed by detectKeypoints().
** @param _im_pt pointer to image data.
** @param _width image width.
** @param _height image height .
process(const pixel_t* _im_pt, int _width, int _height)
using namespace Detail ;
width = _width ;
height = _height ;
prepareBuffers() ;
VL::float_t sigmak = powf(2.0f, 1.0 / S) ;
VL::float_t dsigma0 = sigma0 * sqrt (1.0f - 1.0f / (sigmak*sigmak) ) ;
// -----------------------------------------------------------------
// Make pyramid base
// -----------------------------------------------------------------
if( omin < 0 ) {
copyAndUpsampleRows(temp, _im_pt, width, height ) ;
copyAndUpsampleRows(octaves[0], temp, height, 2*width ) ;
for(int o = -1 ; o > omin ; --o) {
copyAndUpsampleRows(temp, octaves[0], width << -o, height << -o) ;
copyAndUpsampleRows(octaves[0], temp, height << -o, 2*(width << -o)) ; }
} else if( omin > 0 ) {
copyAndDownsample(octaves[0], _im_pt, width, height, 1 << omin) ;
} else {
copy(octaves[0], _im_pt, width, height) ;
VL::float_t sa = sigma0 * powf(sigmak, smin) ;
VL::float_t sb = sigman / powf(2.0f, omin) ; // review this
if( sa > sb ) {
VL::float_t sd = sqrt ( sa*sa - sb*sb ) ;
smooth( octaves[0], temp, octaves[0],
sd ) ;
// -----------------------------------------------------------------
// Make octaves
// -----------------------------------------------------------------
for(int o = omin ; o < omin+O ; ++o) {
// Prepare octave base
if( o > omin ) {
int sbest = std::min(smin + S, smax) ;
copyAndDownsample(getLevel(o, smin ),
getLevel(o-1, sbest),
getOctaveHeight(o-1), 2 ) ;
VL::float_t sa = sigma0 * powf(sigmak, smin ) ;
VL::float_t sb = sigma0 * powf(sigmak, sbest - S ) ;
if(sa > sb ) {
VL::float_t sd = sqrt ( sa*sa - sb*sb ) ;
smooth( getLevel(o,0), temp, getLevel(o,0),
getOctaveWidth(o), getOctaveHeight(o),
sd ) ;
// Make other levels
for(int s = smin+1 ; s <= smax ; ++s) {
VL::float_t sd = dsigma0 * powf(sigmak, s) ;
smooth( getLevel(o,s), temp, getLevel(o,s-1),
getOctaveWidth(o), getOctaveHeight(o),
sd ) ;
/** @brief Sift detector
** The function runs the SIFT detector on the stored Gaussian scale
** space (see process()). The detector consists in three steps
** - local maxima detection;
** - subpixel interpolation;
** - rejection of weak keypoints (@a threhsold);
** - rejection of keypoints on edge-like structures (@a edgeThreshold).
** As they are found, keypoints are added to an internal list. This
** list can be accessed by means of the member functions
** getKeypointsBegin() and getKeypointsEnd(). The list is ordered by
** octave, which is usefult to speed-up computeKeypointOrientations()
** and computeKeypointDescriptor().
Sift::detectKeypoints(VL::float_t threshold, VL::float_t edgeThreshold)
keypoints.clear() ;
int nValidatedKeypoints = 0 ;
// Process one octave per time
for(int o = omin ; o < omin + O ; ++o) {
int const xo = 1 ;
int const yo = getOctaveWidth(o) ;
int const so = getOctaveWidth(o) * getOctaveHeight(o) ;
int const ow = getOctaveWidth(o) ;
int const oh = getOctaveHeight(o) ;
VL::float_t xperiod = getOctaveSamplingPeriod(o) ;
// -----------------------------------------------------------------
// Difference of Gaussians
// -----------------------------------------------------------------
pixel_t* dog = temp ;
tempIsGrad = false ;
pixel_t* pt = dog ;
for(int s = smin ; s <= smax-1 ; ++s) {
pixel_t* srca = getLevel(o, s ) ;
pixel_t* srcb = getLevel(o, s+1) ;
pixel_t* enda = srcb ;
while( srca != enda ) {
*pt++ = *srcb++ - *srca++ ;
// -----------------------------------------------------------------
// Find points of extremum
// -----------------------------------------------------------------
pixel_t* pt = dog + xo + yo + so ;
for(int s = smin+1 ; s <= smax-2 ; ++s) {
for(int y = 1 ; y < oh - 1 ; ++y) {
for(int x = 1 ; x < ow - 1 ; ++x) {
pixel_t v = *pt ;
// assert( (pt - x*xo - y*yo - (s-smin)*so) - dog == 0 ) ;
( v CMP ## = SGN 0.8 * threshold && \
v CMP *(pt + xo) && \
v CMP *(pt - xo) && \
v CMP *(pt + so) && \
v CMP *(pt - so) && \
v CMP *(pt + yo) && \
v CMP *(pt - yo) && \
v CMP *(pt + yo + xo) && \
v CMP *(pt + yo - xo) && \
v CMP *(pt - yo + xo) && \
v CMP *(pt - yo - xo) && \
v CMP *(pt + xo + so) && \
v CMP *(pt - xo + so) && \
v CMP *(pt + yo + so) && \
v CMP *(pt - yo + so) && \
v CMP *(pt + yo + xo + so) && \
v CMP *(pt + yo - xo + so) && \
v CMP *(pt - yo + xo + so) && \
v CMP *(pt - yo - xo + so) && \
v CMP *(pt + xo - so) && \
v CMP *(pt - xo - so) && \
v CMP *(pt + yo - so) && \
v CMP *(pt - yo - so) && \
v CMP *(pt + yo + xo - so) && \
v CMP *(pt + yo - xo - so) && \
v CMP *(pt - yo + xo - so) && \
v CMP *(pt - yo - xo - so) )
Keypoint k ;
k.ix = x ;
k.iy = y ; = s ;
keypoints.push_back(k) ;
pt += 1 ;
pt += 2 ;
pt += 2*yo ;
// -----------------------------------------------------------------
// Refine local maxima
// -----------------------------------------------------------------
{ // refine
KeypointsIter siter ;
KeypointsIter diter ;
for(diter = siter = keypointsBegin() + nValidatedKeypoints ;
siter != keypointsEnd() ;
++siter) {
int x = int( siter->ix ) ;
int y = int( siter->iy ) ;
int s = int( siter->is ) ;
VL::float_t Dx=0,Dy=0,Ds=0,Dxx=0,Dyy=0,Dss=0,Dxy=0,Dxs=0,Dys=0 ;
VL::float_t b [3] ;
pixel_t* pt ;
int dx = 0 ;
int dy = 0 ;
// must be exec. at least once
for(int iter = 0 ; iter < 5 ; ++iter) {
VL::float_t A[3*3] ;
x += dx ;
y += dy ;
pt = dog
+ xo * x
+ yo * y
+ so * (s - smin) ;
#define at(dx,dy,ds) (*( pt + (dx)*xo + (dy)*yo + (ds)*so))
#define Aat(i,j) (A[(i)+(j)*3])
/* Compute the gradient. */
Dx = 0.5 * (at(+1,0,0) - at(-1,0,0)) ;
Dy = 0.5 * (at(0,+1,0) - at(0,-1,0));
Ds = 0.5 * (at(0,0,+1) - at(0,0,-1)) ;
/* Compute the Hessian. */
Dxx = (at(+1,0,0) + at(-1,0,0) - 2.0 * at(0,0,0)) ;
Dyy = (at(0,+1,0) + at(0,-1,0) - 2.0 * at(0,0,0)) ;
Dss = (at(0,0,+1) + at(0,0,-1) - 2.0 * at(0,0,0)) ;
Dxy = 0.25 * ( at(+1,+1,0) + at(-1,-1,0) - at(-1,+1,0) - at(+1,-1,0) ) ;
Dxs = 0.25 * ( at(+1,0,+1) + at(-1,0,-1) - at(-1,0,+1) - at(+1,0,-1) ) ;
Dys = 0.25 * ( at(0,+1,+1) + at(0,-1,-1) - at(0,-1,+1) - at(0,+1,-1) ) ;
/* Solve linear system. */
Aat(0,0) = Dxx ;
Aat(1,1) = Dyy ;
Aat(2,2) = Dss ;
Aat(0,1) = Aat(1,0) = Dxy ;
Aat(0,2) = Aat(2,0) = Dxs ;
Aat(1,2) = Aat(2,1) = Dys ;
b[0] = - Dx ;
b[1] = - Dy ;
b[2] = - Ds ;
// Gauss elimination
for(int j = 0 ; j < 3 ; ++j) {
// look for leading pivot
VL::float_t maxa = 0 ;
VL::float_t maxabsa = 0 ;
int maxi = -1 ;
int i ;
for(i = j ; i < 3 ; ++i) {
VL::float_t a = Aat(i,j) ;
VL::float_t absa = fabsf( a ) ;
if ( absa > maxabsa ) {
maxa = a ;
maxabsa = absa ;
maxi = i ;
// singular?
if( maxabsa < 1e-10f ) {
b[0] = 0 ;
b[1] = 0 ;
b[2] = 0 ;
break ;
i = maxi ;
// swap j-th row with i-th row and
// normalize j-th row
for(int jj = j ; jj < 3 ; ++jj) {
std::swap( Aat(j,jj) , Aat(i,jj) ) ;
Aat(j,jj) /= maxa ;
std::swap( b[j], b[i] ) ;
b[j] /= maxa ;
// elimination
for(int ii = j+1 ; ii < 3 ; ++ii) {
VL::float_t x = Aat(ii,j) ;
for(int jj = j ; jj < 3 ; ++jj) {
Aat(ii,jj) -= x * Aat(j,jj) ;
b[ii] -= x * b[j] ;
// backward substitution
for(int i = 2 ; i > 0 ; --i) {
VL::float_t x = b[i] ;
for(int ii = i-1 ; ii >= 0 ; --ii) {
b[ii] -= x * Aat(ii,i) ;
/* If the translation of the keypoint is big, move the keypoint
* and re-iterate the computation. Otherwise we are all set.
dx= ((b[0] > 0.6 && x < ow-2) ? 1 : 0 )
+ ((b[0] < -0.6 && x > 1 ) ? -1 : 0 ) ;
dy= ((b[1] > 0.6 && y < oh-2) ? 1 : 0 )
+ ((b[1] < -0.6 && y > 1 ) ? -1 : 0 ) ;
<<at(0,0,0)+0.5 * (Dx * b[0] + Dy * b[1] + Ds * b[2])<<")"
<<" "<<std::flush ;
if( dx == 0 && dy == 0 ) break ;
/* std::cout<<std::endl ; */
// Accept-reject keypoint
VL::float_t val = at(0,0,0) + 0.5 * (Dx * b[0] + Dy * b[1] + Ds * b[2]) ;
VL::float_t score = (Dxx+Dyy)*(Dxx+Dyy) / (Dxx*Dyy - Dxy*Dxy) ;
VL::float_t xn = x + b[0] ;
VL::float_t yn = y + b[1] ;
VL::float_t sn = s + b[2] ;
if(fast_abs(val) > threshold &&
score < (edgeThreshold+1)*(edgeThreshold+1)/edgeThreshold &&
score >= 0 &&
fast_abs(b[0]) < 1.5 &&
fast_abs(b[1]) < 1.5 &&
fast_abs(b[2]) < 1.5 &&
xn >= 0 &&
xn <= ow-1 &&
yn >= 0 &&
yn <= oh-1 &&
sn >= smin &&
sn <= smax ) {
diter->o = o ;
diter->ix = x ;
diter->iy = y ;
diter->is = s ;
diter->x = xn * xperiod ;
diter->y = yn * xperiod ;
diter->s = sn ;
diter->sigma = getScaleFromIndex(o,sn) ;
++diter ;
} // next candidate keypoint
// prepare for next octave
keypoints.resize( diter - keypoints.begin() ) ;
nValidatedKeypoints = keypoints.size() ;
} // refine block
} // next octave
// ===================================================================
// computeKeypointOrientations()
// -------------------------------------------------------------------
/** @brief Compute modulus and phase of the gradient
** The function computes the modulus and the angle of the gradient of
** the specified octave @a o. The result is stored in a temporary
** internal buffer accessed by computeKeypointDescriptor() and
** computeKeypointOrientations().
** The SIFT detector provides keypoint with scale index s in the
** range @c smin+1 and @c smax-2. As such, the buffer contains only
** these levels.
** If called mutliple time on the same data, the function exits
** immediately.
** @param o octave of interest.
Sift::prepareGrad(int o)
int const ow = getOctaveWidth(o) ;
int const oh = getOctaveHeight(o) ;
int const xo = 1 ;
int const yo = ow ;
int const so = oh*ow ;
if( ! tempIsGrad || tempOctave != o ) {
// compute dx/dy
for(int s = smin+1 ; s <= smax-2 ; ++s) {
for(int y = 1 ; y < oh-1 ; ++y ) {
pixel_t* src = getLevel(o, s) + xo + yo*y ;
pixel_t* end = src + ow - 1 ;
pixel_t* grad = 2 * (xo + yo*y + (s - smin -1)*so) + temp ;
while(src != end) {
VL::float_t Gx = 0.5 * ( *(src+xo) - *(src-xo) ) ;
VL::float_t Gy = 0.5 * ( *(src+yo) - *(src-yo) ) ;
VL::float_t m = fast_sqrt( Gx*Gx + Gy*Gy ) ;
VL::float_t t = fast_mod_2pi( fast_atan2(Gy, Gx) + VL::float_t(2*CV_PI) );
*grad++ = pixel_t( m ) ;
*grad++ = pixel_t( t ) ;
++src ;
tempIsGrad = true ;
tempOctave = o ;
/** @brief Compute the orientation(s) of a keypoint
** The function computes the orientation of the specified keypoint.
** The function returns up to four different orientations, obtained
** as strong peaks of the histogram of gradient orientations (a
** keypoint can theoretically generate more than four orientations,
** but this is very unlikely).
** @remark The function needs to compute the gradient modululs and
** orientation of the Gaussian scale space octave to which the
** keypoint belongs. The result is cached, but discarded if different
** octaves are visited. Thereofre it is much quicker to evaluate the
** keypoints in their natural octave order.
** The keypoint must lie within the scale space. In particular, the
** scale index is supposed to be in the range @c smin+1 and @c smax-1
** (this is from the SIFT detector). If this is not the case, the
** computation is silently aborted and no orientations are returned.
** @param angles buffers to store the resulting angles.
** @param keypoint keypoint to process.
** @return number of orientations found.
Sift::computeKeypointOrientations(VL::float_t angles [4], Keypoint keypoint)
int const nbins = 36 ;
VL::float_t const winFactor = 1.5 ;
VL::float_t hist [nbins] ;
// octave
int o = keypoint.o ;
VL::float_t xperiod = getOctaveSamplingPeriod(o) ;
// offsets to move in the Gaussian scale space octave
const int ow = getOctaveWidth(o) ;
const int oh = getOctaveHeight(o) ;
const int xo = 2 ;
const int yo = xo * ow ;
const int so = yo * oh ;
// keypoint fractional geometry
VL::float_t x = keypoint.x / xperiod ;
VL::float_t y = keypoint.y / xperiod ;
VL::float_t sigma = keypoint.sigma / xperiod ;
// shall we use keypoints.ix,iy,is here?
int xi = ((int) (x+0.5)) ;
int yi = ((int) (y+0.5)) ;
int si = ;
VL::float_t const sigmaw = winFactor * sigma ;
int W = (int) floor(3.0 * sigmaw) ;
// skip the keypoint if it is out of bounds
if(o < omin ||
o >=omin+O ||
xi < 0 ||
xi > ow-1 ||
yi < 0 ||
yi > oh-1 ||
si < smin+1 ||
si > smax-2 ) {
std::cerr<<"!"<<std::endl ;
return 0 ;
// make sure that the gradient buffer is filled with octave o
prepareGrad(o) ;
// clear the SIFT histogram
std::fill(hist, hist + nbins, 0) ;
// fill the SIFT histogram
pixel_t* pt = temp + xi * xo + yi * yo + (si - smin -1) * so ;
#undef at
#define at(dx,dy) (*(pt + (dx)*xo + (dy)*yo))
for(int ys = std::max(-W, 1-yi) ; ys <= std::min(+W, oh -2 -yi) ; ++ys) {
for(int xs = std::max(-W, 1-xi) ; xs <= std::min(+W, ow -2 -xi) ; ++xs) {
VL::float_t dx = xi + xs - x;
VL::float_t dy = yi + ys - y;
VL::float_t r2 = dx*dx + dy*dy ;
// limit to a circular window
if(r2 >= W*W+0.5) continue ;
VL::float_t wgt = VL::fast_expn( r2 / (2*sigmaw*sigmaw) ) ;
VL::float_t mod = *(pt + xs*xo + ys*yo) ;
VL::float_t ang = *(pt + xs*xo + ys*yo + 1) ;
// int bin = (int) floor( nbins * ang / (2*CV_PI) ) ;
int bin = (int) floor( nbins * ang / (2*CV_PI) ) ;
hist[bin] += mod * wgt ;
// smooth the histogram
#if defined VL_LOWE_STRICT
// Lowe's version apparently has a little issue with orientations
// around + or - pi, which we reproduce here for compatibility
for (int iter = 0; iter < 6; iter++) {
VL::float_t prev = hist[nbins/2] ;
for (int i = nbins/2-1; i >= -nbins/2 ; --i) {
int const j = (i + nbins) % nbins ;
int const jp = (i - 1 + nbins) % nbins ;
VL::float_t newh = (prev + hist[j] + hist[jp]) / 3.0;
prev = hist[j] ;
hist[j] = newh ;
// this is slightly more correct
for (int iter = 0; iter < 6; iter++) {
VL::float_t prev = hist[nbins-1] ;
VL::float_t first = hist[0] ;
int i ;
for (i = 0; i < nbins - 1; i++) {
VL::float_t newh = (prev + hist[i] + hist[(i+1) % nbins]) / 3.0;
prev = hist[i] ;
hist[i] = newh ;
hist[i] = (prev + hist[i] + first)/3.0 ;
// find the histogram maximum
VL::float_t maxh = * std::max_element(hist, hist + nbins) ;
// find peaks within 80% from max
int nangles = 0 ;
for(int i = 0 ; i < nbins ; ++i) {
VL::float_t h0 = hist [i] ;
VL::float_t hm = hist [(i-1+nbins) % nbins] ;
VL::float_t hp = hist [(i+1+nbins) % nbins] ;
// is this a peak?
if( h0 > 0.8*maxh && h0 > hm && h0 > hp ) {
// quadratic interpolation
// VL::float_t di = -0.5 * (hp - hm) / (hp+hm-2*h0) ;
VL::float_t di = -0.5 * (hp - hm) / (hp+hm-2*h0) ;
VL::float_t th = 2*CV_PI * (i+di+0.5) / nbins ;
angles [ nangles++ ] = th ;
if( nangles == 4 )
goto enough_angles ;
return nangles ;
// ===================================================================
// computeKeypointDescriptor()
// -------------------------------------------------------------------
namespace Detail {
/** Normalizes in norm L_2 a descriptor. */
normalize_histogram(VL::float_t* L_begin, VL::float_t* L_end)
VL::float_t* L_iter ;
VL::float_t norm = 0.0 ;
for(L_iter = L_begin; L_iter != L_end ; ++L_iter)
norm += (*L_iter) * (*L_iter) ;
norm = fast_sqrt(norm) ;
for(L_iter = L_begin; L_iter != L_end ; ++L_iter)
*L_iter /= (norm + std::numeric_limits<VL::float_t>::epsilon() ) ;
/** @brief SIFT descriptor
** The function computes the descriptor of the keypoint @a keypoint.
** The function fills the buffer @a descr_pt which must be large
** enough. The funciton uses @a angle0 as rotation of the keypoint.
** By calling the function multiple times, different orientations can
** be evaluated.
** @remark The function needs to compute the gradient modululs and
** orientation of the Gaussian scale space octave to which the
** keypoint belongs. The result is cached, but discarded if different
** octaves are visited. Thereofre it is much quicker to evaluate the
** keypoints in their natural octave order.
** The function silently abort the computations of keypoints without
** the scale space boundaries. See also siftComputeOrientations().
(VL::float_t* descr_pt,
Keypoint keypoint,
VL::float_t angle0)
/* The SIFT descriptor is a three dimensional histogram of the position
* and orientation of the gradient. There are NBP bins for each spatial
* dimesions and NBO bins for the orientation dimesion, for a total of
* NBP x NBP x NBO bins.
* The support of each spatial bin has an extension of SBP = 3sigma
* pixels, where sigma is the scale of the keypoint. Thus all the bins
* together have a support SBP x NBP pixels wide . Since weighting and
* interpolation of pixel is used, another half bin is needed at both
* ends of the extension. Therefore, we need a square window of SBP x
* (NBP + 1) pixels. Finally, since the patch can be arbitrarly rotated,
* we need to consider a window 2W += sqrt(2) x SBP x (NBP + 1) pixels
* wide.
// octave
int o = keypoint.o ;
VL::float_t xperiod = getOctaveSamplingPeriod(o) ;
// offsets to move in Gaussian scale space octave
const int ow = getOctaveWidth(o) ;
const int oh = getOctaveHeight(o) ;
const int xo = 2 ;
const int yo = xo * ow ;
const int so = yo * oh ;
// keypoint fractional geometry
VL::float_t x = keypoint.x / xperiod;
VL::float_t y = keypoint.y / xperiod ;
VL::float_t sigma = keypoint.sigma / xperiod ;
VL::float_t st0 = sinf( angle0 ) ;
VL::float_t ct0 = cosf( angle0 ) ;
// shall we use keypoints.ix,iy,is here?
int xi = ((int) (x+0.5)) ;
int yi = ((int) (y+0.5)) ;
int si = ;
// const VL::float_t magnif = 3.0f ;
const int NBO = 8 ;
const int NBP = 4 ;
const VL::float_t SBP = magnif * sigma ;
const int W = (int) floor (sqrt(2.0) * SBP * (NBP + 1) / 2.0 + 0.5) ;
/* Offsets to move in the descriptor. */
/* Use Lowe's convention. */
const int binto = 1 ;
const int binyo = NBO * NBP ;
const int binxo = NBO ;
// const int bino = NBO * NBP * NBP ;
int bin ;
// check bounds
if(o < omin ||
o >=omin+O ||
xi < 0 ||
xi > ow-1 ||
yi < 0 ||
yi > oh-1 ||
si < smin+1 ||
si > smax-2 )
return ;
// make sure gradient buffer is up-to-date
prepareGrad(o) ;
std::fill( descr_pt, descr_pt + NBO*NBP*NBP, 0 ) ;
/* Center the scale space and the descriptor on the current keypoint.
* Note that dpt is pointing to the bin of center (SBP/2,SBP/2,0).
pixel_t const * pt = temp + xi*xo + yi*yo + (si - smin - 1)*so ;
VL::float_t * dpt = descr_pt + (NBP/2) * binyo + (NBP/2) * binxo ;
#define atd(dbinx,dbiny,dbint) *(dpt + (dbint)*binto + (dbiny)*binyo + (dbinx)*binxo)
* Process pixels in the intersection of the image rectangle
* (1,1)-(M-1,N-1) and the keypoint bounding box.
for(int dyi = std::max(-W, 1-yi) ; dyi <= std::min(+W, oh-2-yi) ; ++dyi) {
for(int dxi = std::max(-W, 1-xi) ; dxi <= std::min(+W, ow-2-xi) ; ++dxi) {
// retrieve
VL::float_t mod = *( pt + dxi*xo + dyi*yo + 0 ) ;
VL::float_t angle = *( pt + dxi*xo + dyi*yo + 1 ) ;
VL::float_t theta = fast_mod_2pi(-angle + angle0) ; // lowe compatible ?
// fractional displacement
VL::float_t dx = xi + dxi - x;
VL::float_t dy = yi + dyi - y;
// get the displacement normalized w.r.t. the keypoint
// orientation and extension.
VL::float_t nx = ( ct0 * dx + st0 * dy) / SBP ;
VL::float_t ny = (-st0 * dx + ct0 * dy) / SBP ;
VL::float_t nt = NBO * theta / (2*CV_PI) ;
// Get the gaussian weight of the sample. The gaussian window
// has a standard deviation equal to NBP/2. Note that dx and dy
// are in the normalized frame, so that -NBP/2 <= dx <= NBP/2.
VL::float_t const wsigma = NBP/2 ;
VL::float_t win = VL::fast_expn((nx*nx + ny*ny)/(2.0 * wsigma * wsigma)) ;
// The sample will be distributed in 8 adjacent bins.
// We start from the ``lower-left'' bin.
int binx = fast_floor( nx - 0.5 ) ;
int biny = fast_floor( ny - 0.5 ) ;
int bint = fast_floor( nt ) ;
VL::float_t rbinx = nx - (binx+0.5) ;
VL::float_t rbiny = ny - (biny+0.5) ;
VL::float_t rbint = nt - bint ;
int dbinx ;
int dbiny ;
int dbint ;
// Distribute the current sample into the 8 adjacent bins
for(dbinx = 0 ; dbinx < 2 ; ++dbinx) {
for(dbiny = 0 ; dbiny < 2 ; ++dbiny) {
for(dbint = 0 ; dbint < 2 ; ++dbint) {
if( binx+dbinx >= -(NBP/2) &&
binx+dbinx < (NBP/2) &&
biny+dbiny >= -(NBP/2) &&
biny+dbiny < (NBP/2) ) {
VL::float_t weight = win
* mod
* fast_abs (1 - dbinx - rbinx)
* fast_abs (1 - dbiny - rbiny)
* fast_abs (1 - dbint - rbint) ;
atd(binx+dbinx, biny+dbiny, (bint+dbint) % NBO) += weight ;
/* Standard SIFT descriptors are normalized, truncated and normalized again */
if( normalizeDescriptor ) {
/* Normalize the histogram to L2 unit length. */
Detail::normalize_histogram(descr_pt, descr_pt + NBO*NBP*NBP) ;
/* Truncate at 0.2. */
for(bin = 0; bin < NBO*NBP*NBP ; ++bin) {
if (descr_pt[bin] > 0.2) descr_pt[bin] = 0.2;
/* Normalize again. */
Detail::normalize_histogram(descr_pt, descr_pt + NBO*NBP*NBP) ;
// namespace VL
2.) wrapper of Vedaldi`s SIFT
using namespace cv;
SIFT::CommonParams::CommonParams() :
SIFT::CommonParams::CommonParams( int _nOctaves, int _nOctaveLayers, int _firstOctave, int _angleMode ) :
nOctaves(_nOctaves), nOctaveLayers(_nOctaveLayers),
firstOctave(_firstOctave), angleMode(_angleMode)
SIFT::DetectorParams::DetectorParams() :
SIFT::DetectorParams::DetectorParams( double _threshold, double _edgeThreshold ) :
threshold(_threshold), edgeThreshold(_edgeThreshold)
SIFT::DescriptorParams::DescriptorParams() :
SIFT::DescriptorParams::DescriptorParams( double _magnification, bool _isNormalize, bool _recalculateAngles ) :
magnification(_magnification), isNormalize(_isNormalize),
SIFT::SIFT( double _threshold, double _edgeThreshold, int _nOctaves,
int _nOctaveLayers, int _firstOctave, int _angleMode )
detectorParams = DetectorParams(_threshold, _edgeThreshold);
commParams = CommonParams(_nOctaves, _nOctaveLayers, _firstOctave, _angleMode);
SIFT::SIFT( double _magnification, bool _isNormalize, bool _recalculateAngles, int _nOctaves,
int _nOctaveLayers, int _firstOctave, int _angleMode )
descriptorParams = DescriptorParams(_magnification, _isNormalize, _recalculateAngles);
commParams = CommonParams(_nOctaves, _nOctaveLayers, _firstOctave, _angleMode);
SIFT::SIFT( const CommonParams& _commParams,
const DetectorParams& _detectorParams,
const DescriptorParams& _descriptorParams )
commParams = _commParams;
detectorParams = _detectorParams;
descriptorParams = _descriptorParams;
inline KeyPoint vlKeypointToOcv( const VL::Sift& vlSift, const VL::Sift::Keypoint& vlKeypoint, float angle )
float size = vlKeypoint.sigma*SIFT::DescriptorParams::GET_DEFAULT_MAGNIFICATION()*4;// 4==NBP
return KeyPoint( vlKeypoint.x, vlKeypoint.y, size, angle, 0, vlKeypoint.o, 0 );
inline void ocvKeypointToVl( const VL::Sift& vlSift, const KeyPoint& ocvKeypoint,
VL::Sift::Keypoint& vlKeypoint, int magnification )
float sigma = ocvKeypoint.size/(SIFT::DescriptorParams::GET_DEFAULT_MAGNIFICATION()*4);// 4==NBP
vlKeypoint = vlSift.getKeypoint(,, sigma);
float computeKeypointOrientations( VL::Sift& sift, const VL::Sift::Keypoint& keypoint, int angleMode )
float angleVal = -1;
VL::float_t angles[4];
int angleCount = sift.computeKeypointOrientations(angles, keypoint);
if( angleCount > 0 )
if( angleMode == SIFT::CommonParams::FIRST_ANGLE )
angleVal = angles[0];
else if( angleMode == SIFT::CommonParams::AVERAGE_ANGLE )
for( int i = 0; i < angleCount; i++ )
angleVal += angles[i];
angleVal /= angleCount;
return angleVal;
// detectors
void SIFT::operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints) const
if( img.empty() || img.type() != CV_8UC1 )
CV_Error( CV_StsBadArg, "img is empty or has incorrect type" );
Mat fimg;
img.convertTo(fimg, CV_32FC1, 1.0/255.0);
const double sigman = .5 ;
const double sigma0 = 1.6 * powf(2.0f, 1.0f / commParams.nOctaveLayers) ;
VL::Sift vlsift((float*), fimg.cols, fimg.rows,
sigman, sigma0, commParams.nOctaves, commParams.nOctaveLayers,
commParams.firstOctave, -1, commParams.nOctaveLayers+1);
vlsift.detectKeypoints(detectorParams.threshold, detectorParams.edgeThreshold);
int d = std::abs(int(vlsift.keypointsBegin()-vlsift.keypointsEnd()));
for( VL::Sift::KeypointsConstIter iter = vlsift.keypointsBegin(); iter != vlsift.keypointsEnd(); ++iter )
float angleVal = computeKeypointOrientations( vlsift, *iter, commParams.angleMode );
if( angleVal >= 0 )
keypoints.push_back( vlKeypointToOcv(vlsift, *iter, angleVal*180.0/CV_PI) );
// descriptors
void SIFT::operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints,
Mat& descriptors,
bool useProvidedKeypoints) const
if( img.empty() || img.type() != CV_8UC1 )
CV_Error( CV_StsBadArg, "img is empty or has incorrect type" );
Mat fimg;
img.convertTo(fimg, CV_32FC1, 1.0/255.0);
const double sigman = .5 ;
const double sigma0 = 1.6 * powf(2.0f, 1.0f / commParams.nOctaveLayers) ;
if( !useProvidedKeypoints )
(*this)(img, mask, keypoints);
VL::Sift vlsift((float*), fimg.cols, fimg.rows,
sigman, sigma0, commParams.nOctaves, commParams.nOctaveLayers,
commParams.firstOctave, -1, commParams.nOctaveLayers+1);
descriptors.create( keypoints.size(), DescriptorParams::DESCRIPTOR_SIZE, DataType<VL::float_t>::type );
vector<KeyPoint>::const_iterator iter = keypoints.begin();
for( int pi = 0 ; iter != keypoints.end(); ++iter, pi++ )
VL::Sift::Keypoint vlkpt;
ocvKeypointToVl( vlsift, *iter, vlkpt, descriptorParams.magnification );
float angleVal = iter->angle*CV_PI/180.0;
if( descriptorParams.recalculateAngles )
float recalcAngleVal = computeKeypointOrientations( vlsift, vlkpt, commParams.angleMode );
if( recalcAngleVal >= 0 )
angleVal = recalcAngleVal;
vlsift.computeKeypointDescriptor((VL::float_t*)descriptors.ptr(pi), vlkpt, angleVal);