Integration object detection using Latent SVM. Sample was added.

Valentina Kustikova 15 years ago
parent a22f74c362
commit fbfccffbaa
  1. 2
  2. 123
  3. 140
  4. 16
  5. 81
  6. 401
  7. 66
  8. 396
  9. 11
  10. 36
  11. 93
  12. 395
  13. 576
  14. 246
  15. 611
  16. 134
  17. 800
  18. 1462
  19. 2
  20. 244
  21. 103
  22. BIN
  23. BIN
  24. 49

@ -1 +1 @@
define_opencv_module(objdetect opencv_core opencv_imgproc)
define_opencv_module(objdetect opencv_core opencv_imgproc opencv_highgui)

@ -139,6 +139,129 @@ CVAPI(void) cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* cascad
CVAPI(int) cvRunHaarClassifierCascade( const CvHaarClassifierCascade* cascade,
CvPoint pt, int start_stage CV_DEFAULT(0));
* Latent SVM Object Detection functions *
// DataType: STRUCT position
// Structure describes the position of the filter in the feature pyramid
// l - level in the feature pyramid
// (x, y) - coordinate in level l
typedef struct
unsigned int x;
unsigned int y;
unsigned int l;
} position;
// DataType: STRUCT filterObject
// Description of the filter, which corresponds to the part of the object
// V - ideal (penalty = 0) position of the partial filter
// from the root filter position (V_i in the paper)
// penaltyFunction - vector describes penalty function (d_i in the paper)
// pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
// Rectangular map (sizeX x sizeY),
// every cell stores feature vector (dimension = p)
// H - matrix of feature vectors
// to set and get feature vectors (i,j)
// used formula H[(j * sizeX + i) * p + k], where
// k - component of feature vector in cell (i, j)
// xp - auxillary parameter for internal use
// size of row in feature vectors
// (yp = (int) (p / xp); p = xp * yp)
typedef struct{
position V;
float fineFunction[4];
unsigned int sizeX;
unsigned int sizeY;
unsigned int p;
unsigned int xp;
float *H;
} filterObject;
// data type: STRUCT CvLatentSvmDetector
// structure contains internal representation of trained Latent SVM detector
// num_filters - total number of filters (root plus part) in model
// num_components - number of components in model
// num_part_filters - array containing number of part filters for each component
// filters - root and part filters for all model components
// b - biases for all model components
// score_threshold - confidence level threshold
typedef struct CvLatentSvmDetector
int num_filters;
int num_components;
int* num_part_filters;
filterObject** filters;
float* b;
float score_threshold;
// data type: STRUCT CvObjectDetection
// structure contains the bounding box and confidence level for detected object
// rect - bounding box for a detected object
// score - confidence level
typedef struct CvObjectDetection
CvRect rect;
float score;
} CvObjectDetection;
//////////////// Object Detection using Latent SVM //////////////
// load trained detector from a file
// API
// CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename);
// filename - path to the file containing the parameters of
- trained Latent SVM detector
// trained Latent SVM detector in internal representation
CVAPI(CvLatentSvmDetector*) cvLoadLatentSvmDetector(const char* filename);
// release memory allocated for CvLatentSvmDetector structure
// API
// void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
// detector - CvLatentSvmDetector structure to be released
CVAPI(void) cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
// find rectangular regions in the given image that are likely
// to contain objects and corresponding confidence levels
// API
// CvSeq* cvLatentSvmDetectObjects(const IplImage* image,
// CvLatentSvmDetector* detector,
// CvMemStorage* storage,
// float overlap_threshold = 0.5f);
// image - image to detect objects in
// detector - Latent SVM detector in internal representation
// storage - memory storage to store the resultant sequence
// of the object candidate rectangles
// overlap_threshold - threshold for the non-maximum suppression algorithm
= 0.5f [here will be the reference to original paper]
// sequence of detected objects (bounding boxes and confidence levels stored in CvObjectDetection structures)
CVAPI(CvSeq*) cvLatentSvmDetectObjects(IplImage* image,
CvLatentSvmDetector* detector,
CvMemStorage* storage,
float overlap_threshold CV_DEFAULT(0.5f));
#ifdef __cplusplus

@ -0,0 +1,140 @@
#include "precomp.hpp"
#include "_types.h"
#include "_error.h"
// Computation the point of intersection functions
// (parabolas on the variable y)
// a(y - q1) + b(q1 - y)(q1 - y) + f[q1]
// a(y - q2) + b(q2 - y)(q2 - y) + f[q2]
// API
// int GetPointOfIntersection(const F_type *f,
const F_type a, const F_type b,
int q1, int q2, F_type *point);
// f - function on the regular grid
// a - coefficient of the function
// b - coefficient of the function
// q1 - parameter of the function
// q2 - parameter of the function
// point - point of intersection
// Error status
int GetPointOfIntersection(const float *f,
const float a, const float b,
int q1, int q2, float *point);
// Decision of one dimensional problem generalized distance transform
// on the regular grid at all points
// min (a(y' - y) + b(y' - y)(y' - y) + f(y')) (on y')
// API
// int DistanceTransformOneDimensionalProblem(const F_type *f, const int n,
const F_type a, const F_type b,
F_type *distanceTransform,
int *points);
// f - function on the regular grid
// n - grid dimension
// a - coefficient of optimizable function
// b - coefficient of optimizable function
// distanceTransform - values of generalized distance transform
// points - arguments that corresponds to the optimal value of function
// Error status
int DistanceTransformOneDimensionalProblem(const float *f, const int n,
const float a, const float b,
float *distanceTransform,
int *points);
// Computation next cycle element
// API
// int GetNextCycleElement(int k, int n, int q);
// k - index of the previous cycle element
// n - number of matrix rows
// q - parameter that equal (number_of_rows * number_of_columns - 1)
// None
// Next cycle element
int GetNextCycleElement(int k, int n, int q);
// Transposition of cycle elements
// API
// void TransposeCycleElements(F_type *a, int *cycle, int cycle_len);
// a - initial matrix
// cycle - cycle
// cycle_len - cycle length
// a - matrix with transposed elements
// None
void TransposeCycleElements(float *a, int *cycle, int cycle_len);
// Getting transposed matrix
// API
// void Transpose(F_type *a, int n, int m);
// a - initial matrix
// n - number of rows
// m - number of columns
// a - transposed matrix
// Error status
void Transpose(float *a, int n, int m);
// Decision of two dimensional problem generalized distance transform
// on the regular grid at all points
// min{d2(y' - y) + d4(y' - y)(y' - y) +
min(d1(x' - x) + d3(x' - x)(x' - x) + f(x',y'))} (on x', y')
// API
// int DistanceTransformTwoDimensionalProblem(const F_type *f,
const int n, const int m,
const F_type coeff[4],
F_type *distanceTransform,
int *pointsX, int *pointsY);
// f - function on the regular grid
// n - number of rows
// m - number of columns
// coeff - coefficients of optimizable function
coeff[0] = d1, coeff[1] = d2,
coeff[2] = d3, coeff[3] = d4
// distanceTransform - values of generalized distance transform
// pointsX - arguments x' that correspond to the optimal value
// pointsY - arguments y' that correspond to the optimal value
// Error status
int DistanceTransformTwoDimensionalProblem(const float *f,
const int n, const int m,
const float coeff[4],
float *distanceTransform,
int *pointsX, int *pointsY);

@ -0,0 +1,16 @@
#ifndef SVM_ERROR
#define SVM_ERROR
#define LATENT_SVM_OK 0
#define FFT_OK 2
#define FFT_ERROR -8

@ -0,0 +1,81 @@
#ifndef _FFT_H
#define _FFT_H
#include "precomp.hpp"
#include "_types.h"
#include "_error.h"
#include <math.h>
// 1-dimensional FFT
// API
// int fft(float *x_in, float *x_out, int n, int shift);
// x_in - input signal
// n - number of elements for searching Fourier image
// shift - shift between input elements
// x_out - output signal (contains 2n elements in order
Re(x_in[0]), Im(x_in[0]), Re(x_in[1]), Im(x_in[1]) and etc.)
// Error status
int fft(float *x_in, float *x_out, int n, int shift);
// Inverse 1-dimensional FFT
// API
// int fftInverse(float *x_in, float *x_out, int n, int shift);
// x_in - Fourier image of 1d input signal(contains 2n elements
in order Re(x_in[0]), Im(x_in[0]),
Re(x_in[1]), Im(x_in[1]) and etc.)
// n - number of elements for searching counter FFT image
// shift - shift between input elements
// x_in - input signal (contains n elements)
// Error status
int fftInverse(float *x_in, float *x_out, int n, int shift);
// 2-dimensional FFT
// API
// int fft2d(float *x_in, float *x_out, int numRows, int numColls);
// x_in - input signal (matrix, launched by rows)
// numRows - number of rows
// numColls - number of collumns
// x_out - output signal (contains (2 * numRows * numColls) elements
in order Re(x_in[0][0]), Im(x_in[0][0]),
Re(x_in[0][1]), Im(x_in[0][1]) and etc.)
// Error status
int fft2d(float *x_in, float *x_out, int numRows, int numColls);
// Inverse 2-dimensional FFT
// API
// int fftInverse2d(float *x_in, float *x_out, int numRows, int numColls);
// x_in - Fourier image of matrix (contains (2 * numRows * numColls)
elements in order Re(x_in[0][0]), Im(x_in[0][0]),
Re(x_in[0][1]), Im(x_in[0][1]) and etc.)
// numRows - number of rows
// numColls - number of collumns
// x_out - initial signal (matrix, launched by rows)
// Error status
int fftInverse2d(float *x_in, float *x_out, int numRows, int numColls);

@ -0,0 +1,401 @@
/* Latent SVM prediction API */
#include <stdio.h>
#include "precomp.hpp"
#include "_types.h"
#include "_error.h"
#include "_routine.h"
// Building feature pyramid
// (pyramid constructed both contrast and non-contrast image)
// Getting feature pyramid
// API
// int getFeaturePyramid(IplImage * image, const filterObject **all_F,
const int n_f,
const int lambda, const int k,
const int startX, const int startY,
const int W, const int H, featurePyramid **maps);
// image - image
// lambda - resize scale
// k - size of cells
// startX - X coordinate of the image rectangle to search
// startY - Y coordinate of the image rectangle to search
// W - width of the image rectangle to search
// H - height of the image rectangle to search
// maps - feature maps for all levels
// Error status
int getFeaturePyramid(IplImage * image,
const int lambda, const int k,
const int startX, const int startY,
const int W, const int H, featurePyramid **maps);
// Getting feature map for the selected subimage
// API
// int getFeatureMaps(const IplImage * image, const int k, featureMap **map);
// image - selected subimage
// k - size of cells
// map - feature map
// Error status
int getFeatureMaps_dp(const IplImage * image, const int k, featureMap **map);
// Feature map Normalization and Truncation
// API
// int normalizationAndTruncationFeatureMaps(featureMap *map, const float alfa);
// map - feature map
// alfa - truncation threshold
// map - truncated and normalized feature map
// Error status
int normalizationAndTruncationFeatureMaps(featureMap *map, const float alfa);
// Feature map reduction
// In each cell we reduce dimension of the feature vector
// according to original paper special procedure
// API
// int PCAFeatureMaps(featureMap *map)
// map - feature map
// map - feature map
// Error status
int PCAFeatureMaps(featureMap *map);
// search object
// Transformation filter displacement from the block space
// to the space of pixels at the initial image
// API
// int convertPoints(int countLevel, int lambda,
int initialImageLevel,
CvPoint *points, int *levels,
CvPoint **partsDisplacement, int kPoints, int n,
int maxXBorder,
int maxYBorder);
// countLevel - the number of levels in the feature pyramid
// lambda - method parameter
// initialImageLevel - level of feature pyramid that contains feature map
for initial image
// points - the set of root filter positions (in the block space)
// levels - the set of levels
// partsDisplacement - displacement of part filters (in the block space)
// kPoints - number of root filter positions
// n - number of part filters
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// points - the set of root filter positions (in the space of pixels)
// partsDisplacement - displacement of part filters (in the space of pixels)
// Error status
int convertPoints(int countLevel, int lambda,
int initialImageLevel,
CvPoint *points, int *levels,
CvPoint **partsDisplacement, int kPoints, int n,
int maxXBorder,
int maxYBorder);
// Elimination boxes that are outside the image boudaries
// API
// int clippingBoxes(int width, int height,
CvPoint *points, int kPoints);
// width - image wediht
// height - image heigth
// points - a set of points (coordinates of top left or
bottom right corners)
// kPoints - points number
// points - updated points (if coordinates less than zero then
set zero coordinate, if coordinates more than image
size then set coordinates equal image size)
// Error status
#ifdef __cplusplus
extern "C"
int clippingBoxes(int width, int height,
CvPoint *points, int kPoints);
// Creation feature pyramid with nullable border
// API
// featurePyramid* createFeaturePyramidWithBorder(const IplImage *image,
int maxXBorder, int maxYBorder);
// image - initial image
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// Feature pyramid with nullable border
#ifdef __cplusplus
extern "C"
featurePyramid* createFeaturePyramidWithBorder(IplImage *image,
int maxXBorder, int maxYBorder);
// Computation of the root filter displacement and values of score function
// API
// int searchObject(const featurePyramid *H, const filterObject **all_F, int n,
float b,
int maxXBorder,
int maxYBorder,
CvPoint **points, int **levels, int *kPoints, float *score,
CvPoint ***partsDisplacement);
// H - feature pyramid
// all_F - the set of filters (the first element is root filter,
other elements - part filters)
// n - the number of part filters
// b - linear term of the score function
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// points - positions (x, y) of the upper-left corner
of root filter frame
// levels - levels that correspond to each position
// kPoints - number of positions
// score - value of the score function
// partsDisplacement - part filters displacement for each position
of the root filter
// Error status
int searchObject(const featurePyramid *H, const filterObject **all_F, int n,
float b,
int maxXBorder,
int maxYBorder,
CvPoint **points, int **levels, int *kPoints, float *score,
CvPoint ***partsDisplacement);
// Computation of the root filter displacement and values of score function
// API
// int searchObjectThreshold(const featurePyramid *H,
const filterObject **all_F, int n,
float b,
int maxXBorder, int maxYBorder,
float scoreThreshold,
CvPoint **points, int **levels, int *kPoints,
float **score, CvPoint ***partsDisplacement);
// H - feature pyramid
// all_F - the set of filters (the first element is root filter,
other elements - part filters)
// n - the number of part filters
// b - linear term of the score function
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// scoreThreshold - score threshold
// points - positions (x, y) of the upper-left corner
of root filter frame
// levels - levels that correspond to each position
// kPoints - number of positions
// score - values of the score function
// partsDisplacement - part filters displacement for each position
of the root filter
// Error status
int searchObjectThreshold(const featurePyramid *H,
const filterObject **all_F, int n,
float b,
int maxXBorder, int maxYBorder,
float scoreThreshold,
CvPoint **points, int **levels, int *kPoints,
float **score, CvPoint ***partsDisplacement);
// Computation root filters displacement and values of score function
// API
// int searchObjectThresholdSomeComponents(const featurePyramid *H,
const filterObject **filters,
int kComponents, const int *kPartFilters,
const float *b, float scoreThreshold,
CvPoint **points, CvPoint **oppPoints,
float **score, int *kPoints);
// H - feature pyramid
// filters - filters (root filter then it's part filters, etc.)
// kComponents - root filters number
// kPartFilters - array of part filters number for each component
// b - array of linear terms
// scoreThreshold - score threshold
// points - root filters displacement (top left corners)
// oppPoints - root filters displacement (bottom right corners)
// score - array of score values
// kPoints - number of boxes
// Error status
#ifdef __cplusplus
extern "C"
int searchObjectThresholdSomeComponents(const featurePyramid *H,
const filterObject **filters,
int kComponents, const int *kPartFilters,
const float *b, float scoreThreshold,
CvPoint **points, CvPoint **oppPoints,
float **score, int *kPoints);
// Compute opposite point for filter box
// API
// int getOppositePoint(CvPoint point,
int sizeX, int sizeY,
float step, int degree,
CvPoint *oppositePoint);
// point - coordinates of filter top left corner
(in the space of pixels)
// (sizeX, sizeY) - filter dimension in the block space
// step - scaling factor
// degree - degree of the scaling factor
// oppositePoint - coordinates of filter bottom corner
(in the space of pixels)
// Error status
int getOppositePoint(CvPoint point,
int sizeX, int sizeY,
float step, int degree,
CvPoint *oppositePoint);
// Drawing root filter boxes
// API
// int showRootFilterBoxes(const IplImage *image,
const filterObject *filter,
CvPoint *points, int *levels, int kPoints,
CvScalar color, int thickness,
int line_type, int shift);
// image - initial image
// filter - root filter object
// points - a set of points
// levels - levels of feature pyramid
// kPoints - number of points
// color - line color for each box
// thickness - line thickness
// line_type - line type
// shift - shift
// window contained initial image and filter boxes
// Error status
int showRootFilterBoxes(IplImage *image,
const filterObject *filter,
CvPoint *points, int *levels, int kPoints,
CvScalar color, int thickness,
int line_type, int shift);
// Drawing part filter boxes
// API
// int showPartFilterBoxes(const IplImage *image,
const filterObject *filter,
CvPoint *points, int *levels, int kPoints,
CvScalar color, int thickness,
int line_type, int shift);
// image - initial image
// filters - a set of part filters
// n - number of part filters
// partsDisplacement - a set of points
// levels - levels of feature pyramid
// kPoints - number of foot filter positions
// color - line color for each box
// thickness - line thickness
// line_type - line type
// shift - shift
// window contained initial image and filter boxes
// Error status
int showPartFilterBoxes(IplImage *image,
const filterObject **filters,
int n, CvPoint **partsDisplacement,
int *levels, int kPoints,
CvScalar color, int thickness,
int line_type, int shift);
// Drawing boxes
// API
// int showBoxes(const IplImage *img,
const CvPoint *points, const CvPoint *oppositePoints, int kPoints,
CvScalar color, int thickness, int line_type, int shift);
// img - initial image
// points - top left corner coordinates
// oppositePoints - right bottom corner coordinates
// kPoints - points number
// color - line color for each box
// thickness - line thickness
// line_type - line type
// shift - shift
// Error status
int showBoxes(IplImage *img,
const CvPoint *points, const CvPoint *oppositePoints, int kPoints,
CvScalar color, int thickness, int line_type, int shift);

@ -0,0 +1,66 @@
#include "precomp.hpp"
#include "_types.h"
#define MODEL 1
#define P 2
#define COMP 3
#define SCORE 4
#define RFILTER 100
#define PFILTERs 101
#define PFILTER 200
#define SIZEX 150
#define SIZEY 151
#define WEIGHTS 152
#define TAGV 300
#define Vx 350
#define Vy 351
#define TAGD 400
#define Dx 451
#define Dy 452
#define Dxx 453
#define Dyy 454
#define BTAG 500
#define STEP_END 1000
#define EP (STEP_END + P)
#define EVx (STEP_END + Vx)
#define EVy (STEP_END + Vy)
#define EDx (STEP_END + Dx)
#define EDy (STEP_END + Dy)
#define EDxx (STEP_END + Dxx)
#define EDyy (STEP_END + Dyy)
//extern "C" {
void LSVMparser(const char * filename, filterObject *** model, int *last, int *max, int **comp, float **b, int *count, float * score);
#ifdef __cplusplus
extern "C"
int loadModel(
// Âõîäíûå ïàðàìåòðû
const char *modelPath,// - ïóòü äî ôàéëà ñ ìîäåëüþ
// Âûõîäíûå ïàðàìåòðû
filterObject ***filters,// - ìàññèâ óêàçàòåëåé íà ôèëüòðû êîìïîíåíò
int *kFilters, //- îáùåå êîëè÷åñòâî ôèëüòðîâ âî âñåõ ìîäåëÿõ
int *kComponents, //- êîëè÷åñòâî êîìïîíåíò
int **kPartFilters, //- ìàññèâ, ñîäåðæàùèé êîëè÷åñòâî òî÷íûõ ôèëüòðîâ â êàæäîé êîìïîíåíòå
float **b, //- ìàññèâ ëèíåéíûõ ÷ëåíîâ â îöåíî÷íîé ôóíêöèè
float *scoreThreshold); //- ïîðîã äëÿ score)

@ -0,0 +1,396 @@
/* Matching procedure API */
#include "_latentsvm.h"
#include "_error.h"
#include "_distancetransform.h"
#include "_fft.h"
#include "_routine.h"
//extern "C" {
// Function for convolution computation
// API
// int convolution(const filterObject *Fi, const featureMap *map, float *f);
// Fi - filter object
// map - feature map
// f - the convolution
// Error status
int convolution(const filterObject *Fi, const featureMap *map, float *f);
// Computation multiplication of FFT images
// API
// int fftImagesMulti(float *fftImage1, float *fftImage2, int numRows, int numColls,
float *multi);
// fftImage1 - first fft image
// fftImage2 - second fft image
// (numRows, numColls) - image dimesions
// multi - multiplication
// Error status
int fftImagesMulti(float *fftImage1, float *fftImage2, int numRows, int numColls,
float *multi);
// Turnover filter matrix for the single feature
// API
// int rot2PI(float *filter, int dimX, int dimY, float *rot2PIFilter,
int p, int shift);
// filter - filter weight matrix
// (dimX, dimY) - dimension of filter matrix
// p - number of features
// shift - number of feature (or channel)
// rot2PIFilter - rotated matrix
// Error status
int rot2PI(float *filter, int dimX, int dimY, float *rot2PIFilter,
int p, int shift);
// Addition nullable bars to the dimension of feature map (single feature)
// API
// int addNullableBars(float *rot2PIFilter, int dimX, int dimY,
float *newFilter, int newDimX, int newDimY);
// rot2PIFilter - filter matrix for the single feature that was rotated
// (dimX, dimY) - dimension rot2PIFilter
// (newDimX, newDimY)- dimension of feature map for the single feature
// newFilter - filter matrix with nullable bars
// Error status
int addNullableBars(float *rot2PIFilter, int dimX, int dimY,
float *newFilter, int newDimX, int newDimY);
// Computation FFT image for filter object
// API
// int getFFTImageFilterObject(const filterObject *filter,
int mapDimX, int mapDimY,
fftImage **image);
// filter - filter object
// (mapDimX, mapDimY)- dimension of feature map
// image - fft image
// Error status
int getFFTImageFilterObject(const filterObject *filter,
int mapDimX, int mapDimY,
fftImage **image);
// Computation FFT image for feature map
// API
// int getFFTImageFeatureMap(const featureMap *map, fftImage **image);
// Error status
int getFFTImageFeatureMap(const featureMap *map, fftImage **image);
// Function for convolution computation using FFT
// API
// int convFFTConv2d(const fftImage *featMapImage, const fftImage *filterImage,
int filterDimX, int filterDimY, float **conv);
// featMapImage - feature map image
// filterImage - filter image
// (filterDimX,filterDimY) - filter dimension
// conv - the convolution
// Error status
int convFFTConv2d(const fftImage *featMapImage, const fftImage *filterImage,
int filterDimX, int filterDimY, float **conv);
// Computation objective function D according the original paper
// API
// int filterDispositionLevel(const filterObject *Fi, const featureMap *pyramid,
float **scoreFi,
int **pointsX, int **pointsY);
// Fi - filter object (weights and coefficients of penalty
function that are used in this routine)
// pyramid - feature map
// scoreFi - values of distance transform on the level at all positions
// (pointsX, pointsY)- positions that correspond to the maximum value
of distance transform at all grid nodes
// Error status
int filterDispositionLevel(const filterObject *Fi, const featureMap *pyramid,
float **scoreFi,
int **pointsX, int **pointsY);
// Computation objective function D according the original paper using FFT
// API
// int filterDispositionLevelFFT(const filterObject *Fi, const fftImage *featMapImage,
float **scoreFi,
int **pointsX, int **pointsY);
// Fi - filter object (weights and coefficients of penalty
function that are used in this routine)
// featMapImage - FFT image of feature map
// scoreFi - values of distance transform on the level at all positions
// (pointsX, pointsY)- positions that correspond to the maximum value
of distance transform at all grid nodes
// Error status
int filterDispositionLevelFFT(const filterObject *Fi, const fftImage *featMapImage,
float **scoreFi,
int **pointsX, int **pointsY);
// Computation border size for feature map
// API
// int computeBorderSize(int maxXBorder, int maxYBorder, int *bx, int *by);
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// bx - border size (X-direction)
// by - border size (Y-direction)
// Error status
int computeBorderSize(int maxXBorder, int maxYBorder, int *bx, int *by);
// Addition nullable border to the feature map
// API
// int addNullableBorder(featureMap *map, int bx, int by);
// map - feature map
// bx - border size (X-direction)
// by - border size (Y-direction)
// Error status
int addNullableBorder(featureMap *map, int bx, int by);
// Computation the maximum of the score function at the level
// API
// int maxFunctionalScoreFixedLevel(const filterObject **all_F, int n,
const featurePyramid *H,
int level, float b,
int maxXBorder, int maxYBorder,
float *score, CvPoint **points, int *kPoints,
CvPoint ***partsDisplacement);
// all_F - the set of filters (the first element is root filter,
the other - part filters)
// n - the number of part filters
// H - feature pyramid
// level - feature pyramid level for computation maximum score
// b - linear term of the score function
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// score - the maximum of the score function at the level
// points - the set of root filter positions (in the block space)
// levels - the set of levels
// kPoints - number of root filter positions
// partsDisplacement - displacement of part filters (in the block space)
// Error status
int maxFunctionalScoreFixedLevel(const filterObject **all_F, int n,
const featurePyramid *H,
int level, float b,
int maxXBorder, int maxYBorder,
float *score, CvPoint **points, int *kPoints,
CvPoint ***partsDisplacement);
// Computation score function at the level that exceed threshold
// API
// int thresholdFunctionalScoreFixedLevel(const filterObject **all_F, int n,
const featurePyramid *H,
int level, float b,
int maxXBorder, int maxYBorder,
float scoreThreshold,
float **score, CvPoint **points, int *kPoints,
CvPoint ***partsDisplacement);
// all_F - the set of filters (the first element is root filter,
the other - part filters)
// n - the number of part filters
// H - feature pyramid
// level - feature pyramid level for computation maximum score
// b - linear term of the score function
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// scoreThreshold - score threshold
// score - score function at the level that exceed threshold
// points - the set of root filter positions (in the block space)
// levels - the set of levels
// kPoints - number of root filter positions
// partsDisplacement - displacement of part filters (in the block space)
// Error status
int thresholdFunctionalScoreFixedLevel(const filterObject **all_F, int n,
const featurePyramid *H,
int level, float b,
int maxXBorder, int maxYBorder,
float scoreThreshold,
float **score, CvPoint **points, int *kPoints,
CvPoint ***partsDisplacement);
// Computation the maximum of the score function
// API
// int maxFunctionalScore(const filterObject **all_F, int n,
const featurePyramid *H, float b,
int maxXBorder, int maxYBorder,
float *score,
CvPoint **points, int **levels, int *kPoints,
CvPoint ***partsDisplacement);
// all_F - the set of filters (the first element is root filter,
the other - part filters)
// n - the number of part filters
// H - feature pyramid
// b - linear term of the score function
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// score - the maximum of the score function
// points - the set of root filter positions (in the block space)
// levels - the set of levels
// kPoints - number of root filter positions
// partsDisplacement - displacement of part filters (in the block space)
// Error status
int maxFunctionalScore(const filterObject **all_F, int n,
const featurePyramid *H, float b,
int maxXBorder, int maxYBorder,
float *score,
CvPoint **points, int **levels, int *kPoints,
CvPoint ***partsDisplacement);
// Computation score function that exceed threshold
// API
// int thresholdFunctionalScore(const filterObject **all_F, int n,
const featurePyramid *H,
float b,
int maxXBorder, int maxYBorder,
float scoreThreshold,
float **score,
CvPoint **points, int **levels, int *kPoints,
CvPoint ***partsDisplacement);
// all_F - the set of filters (the first element is root filter,
the other - part filters)
// n - the number of part filters
// H - feature pyramid
// b - linear term of the score function
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// scoreThreshold - score threshold
// score - score function values that exceed threshold
// points - the set of root filter positions (in the block space)
// levels - the set of levels
// kPoints - number of root filter positions
// partsDisplacement - displacement of part filters (in the block space)
// Error status
int thresholdFunctionalScore(const filterObject **all_F, int n,
const featurePyramid *H,
float b,
int maxXBorder, int maxYBorder,
float scoreThreshold,
float **score,
CvPoint **points, int **levels, int *kPoints,
CvPoint ***partsDisplacement);
// Perform non-maximum suppression algorithm (described in original paper)
// to remove "similar" bounding boxes
// API
// int nonMaximumSuppression(int numBoxes, const CvPoint *points,
const CvPoint *oppositePoints, const float *score,
float overlapThreshold,
int *numBoxesout, CvPoint **pointsOut,
CvPoint **oppositePointsOut, float **scoreOut);
// numBoxes - number of bounding boxes
// points - array of left top corner coordinates
// oppositePoints - array of right bottom corner coordinates
// score - array of detection scores
// overlapThreshold - threshold: bounding box is removed if overlap part
is greater than passed value
// numBoxesOut - the number of bounding boxes algorithm returns
// pointsOut - array of left top corner coordinates
// oppositePointsOut - array of right bottom corner coordinates
// scoreOut - array of detection scores
// Error status
#ifdef __cplusplus
extern "C"
int nonMaximumSuppression(int numBoxes, const CvPoint *points,
const CvPoint *oppositePoints, const float *score,
float overlapThreshold,
int *numBoxesOut, CvPoint **pointsOut,
CvPoint **oppositePointsOut, float **scoreOut);
#ifdef __cplusplus
extern "C"
int getMaxFilterDims(const filterObject **filters, int kComponents,
const int *kPartFilters,
unsigned int *maxXBorder, unsigned int *maxYBorder);

@ -0,0 +1,11 @@
#include "precomp.hpp"
#include "_types.h"
IplImage * resize_opencv (IplImage * img, float scale);
IplImage * resize_article_dp1(IplImage * img, float scale, const int k);
IplImage * resize_article_dp(IplImage * img, float scale, const int k);

@ -0,0 +1,36 @@
#ifndef _ROUTINE_H
#define _ROUTINE_H
#include "precomp.hpp"
#include "_types.h"
#include "_error.h"
// Memory management routines
// All paramaters names correspond to previous data structures description
// All "alloc" functions return allocated memory for 1 object
// with all fields including arrays
// Error status is return value
int allocFilterObject(filterObject **obj, const int sizeX, const int sizeY,
const int p, const int xp);
int freeFilterObject (filterObject **obj);
int allocFeatureMapObject(featureMap **obj, const int sizeX, const int sizeY,
const int p, const int xp);
int freeFeatureMapObject (featureMap **obj);
#ifdef __cplusplus
extern "C"
int allocFeaturePyramidObject(featurePyramid **obj,
const int lambda, const int countLevel);
#ifdef __cplusplus
extern "C"
int freeFeaturePyramidObject (featurePyramid **obj);
int allocFFTImage(fftImage **image, int p, int dimX, int dimY);
int freeFFTImage(fftImage **image);

@ -0,0 +1,93 @@
#ifndef SVM_TYPE
#define SVM_TYPE
//#include "opencv2/core/core.hpp"
//#include "opencv2/highgui/highgui.hpp"
#include "precomp.hpp"
//#define FFT_CONV
// Çíà÷åíèå ÷èñëà PI
#define PI 3.1415926535897932384626433832795
// Òî÷íîñòü ñðàâíåíèÿ ïàðû âåùåñòâåííûõ ÷èñåë
#define EPS 0.000001
// Ìèíèìàëüíîå è ìàêñèìàëüíîå çíà÷åíèå äëÿ âåùåñòâåííîãî òèïà äàííûõ
#define F_MAX 3.402823466e+38
#define F_MIN -3.402823465e+38
// The number of elements in bin
// The number of sectors in gradient histogram building
#define CNTPARTION 9
// The number of levels in image resize procedure
// We need Lambda levels to resize image twice
#define LAMBDA 10
// Block size. Used in feature pyramid building procedure
#define SIDE_LENGTH 8
// main data structures //
// DataType: STRUCT featureMap
// Rectangular map (sizeX x sizeY),
// every cell stores feature vector (dimension = p)
// H - matrix of feature vectors
// to set and get feature vectors (i,j)
// used formula Map[(j * sizeX + i) * p + k], where
// k - component of feature vector in cell (i, j)
// xp - auxillary parameter for internal use
// size of row in feature vectors
// (yp = (int) (p / xp); p = xp * yp)
typedef struct{
int sizeX;
int sizeY;
int p;
int xp;
float *Map;
} featureMap;
// DataType: STRUCT featurePyramid
// countLevel - number of levels in the feature pyramid
// lambda - resize scale coefficient
// pyramid - array of pointers to feature map at different levels
typedef struct{
int countLevel;
int lambda;
featureMap **pyramid;
} featurePyramid;
// DataType: STRUCT filterDisposition
// The structure stores preliminary results in optimization process
// with objective function D
// x - array with X coordinates of optimization problems solutions
// y - array with Y coordinates of optimization problems solutions
// score - array with optimal objective values
typedef struct{
float *score;
int *x;
int *y;
} filterDisposition;
// DataType: STRUCT fftImage
// The structure stores FFT image
// p - number of channels
// x - array of FFT images for 2d signals
// n - number of rows
// m - number of collums
typedef struct{
unsigned int p;
unsigned int dimX;
unsigned int dimY;
float **channels;
} fftImage;

@ -0,0 +1,395 @@
#include "_distancetransform.h"
// Computation the point of intersection functions
// (parabolas on the variable y)
// a(y - q1) + b(q1 - y)(q1 - y) + f[q1]
// a(y - q2) + b(q2 - y)(q2 - y) + f[q2]
// API
// int GetPointOfIntersection(const float *f,
const float a, const float b,
int q1, int q2, float *point);
// f - function on the regular grid
// a - coefficient of the function
// b - coefficient of the function
// q1 - parameter of the function
// q2 - parameter of the function
// point - point of intersection
// Error status
int GetPointOfIntersection(const float *f,
const float a, const float b,
int q1, int q2, float *point)
if (q1 == q2)
} /* if (q1 == q2) */
(*point) = ( (f[q2] - a * q2 + b *q2 * q2) -
(f[q1] - a * q1 + b * q1 * q1) ) / (2 * b * (q2 - q1));
// Decision of one dimensional problem generalized distance transform
// on the regular grid at all points
// min (a(y' - y) + b(y' - y)(y' - y) + f(y')) (on y')
// API
// int DistanceTransformOneDimensionalProblem(const float *f, const int n,
const float a, const float b,
float *distanceTransform,
int *points);
// f - function on the regular grid
// n - grid dimension
// a - coefficient of optimizable function
// b - coefficient of optimizable function
// distanceTransform - values of generalized distance transform
// points - arguments that corresponds to the optimal value of function
// Error status
int DistanceTransformOneDimensionalProblem(const float *f, const int n,
const float a, const float b,
float *distanceTransform,
int *points)
int i, k;
int tmp;
int diff;
float pointIntersection;
int *v;
float *z;
k = 0;
// Allocation memory (must be free in this function)
v = (int *)malloc (sizeof(int) * n);
z = (float *)malloc (sizeof(float) * (n + 1));
v[0] = 0;
z[0] = (float)F_MIN; // left border of envelope
z[1] = (float)F_MAX; // right border of envelope
for (i = 1; i < n; i++)
tmp = GetPointOfIntersection(f, a, b, v[k], i, &pointIntersection);
} /* if (tmp != DISTANCE_TRANSFORM_OK) */
if (pointIntersection <= z[k])
// Envelope doesn't contain current parabola
tmp = GetPointOfIntersection(f, a, b, v[k], i, &pointIntersection);
} /* if (tmp != DISTANCE_TRANSFORM_OK) */
}while (pointIntersection <= z[k]);
// Addition parabola to the envelope
v[k] = i;
z[k] = pointIntersection;
z[k + 1] = (float)F_MAX;
// Addition parabola to the envelope
v[k] = i;
z[k] = pointIntersection;
z[k + 1] = (float)F_MAX;
} /* if (pointIntersection <= z[k]) */
// Computation values of generalized distance transform at all grid points
k = 0;
for (i = 0; i < n; i++)
while (z[k + 1] < i)
points[i] = v[k];
diff = i - v[k];
distanceTransform[i] = a * diff + b * diff * diff + f[v[k]];
// Release allocated memory
// Computation next cycle element
// API
// int GetNextCycleElement(int k, int n, int q);
// k - index of the previous cycle element
// n - number of matrix rows
// q - parameter that equal
(number_of_rows * number_of_columns - 1)
// None
// Next cycle element
int GetNextCycleElement(int k, int n, int q)
return ((k * n) % q);
// Transpose cycle elements
// API
// void TransposeCycleElements(float *a, int *cycle, int cycle_len)
// a - initial matrix
// cycle - indeces array of cycle
// cycle_len - number of elements in the cycle
// a - matrix with transposed elements
// Error status
void TransposeCycleElements(float *a, int *cycle, int cycle_len)
int i;
float buf;
for (i = cycle_len - 1; i > 0 ; i--)
buf = a[ cycle[i] ];
a[ cycle[i] ] = a[ cycle[i - 1] ];
a[ cycle[i - 1] ] = buf;
// Transpose cycle elements
// API
// void TransposeCycleElements(int *a, int *cycle, int cycle_len)
// a - initial matrix
// cycle - indeces array of cycle
// cycle_len - number of elements in the cycle
// a - matrix with transposed elements
// Error status
void TransposeCycleElements_int(int *a, int *cycle, int cycle_len)
int i;
int buf;
for (i = cycle_len - 1; i > 0 ; i--)
buf = a[ cycle[i] ];
a[ cycle[i] ] = a[ cycle[i - 1] ];
a[ cycle[i - 1] ] = buf;
// Getting transposed matrix
// API
// void Transpose(float *a, int n, int m);
// a - initial matrix
// n - number of rows
// m - number of columns
// a - transposed matrix
// None
void Transpose(float *a, int n, int m)
int *cycle;
int i, k, q, cycle_len;
int max_cycle_len;
max_cycle_len = n * m;
// Allocation memory (must be free in this function)
cycle = (int *)malloc(sizeof(int) * max_cycle_len);
cycle_len = 0;
q = n * m - 1;
for (i = 1; i < q; i++)
k = GetNextCycleElement(i, n, q);
cycle[cycle_len] = i;
while (k > i)
cycle[cycle_len] = k;
k = GetNextCycleElement(k, n, q);
if (k == i)
TransposeCycleElements(a, cycle, cycle_len);
} /* if (k == i) */
cycle_len = 0;
// Release allocated memory
// Getting transposed matrix
// API
// void Transpose_int(int *a, int n, int m);
// a - initial matrix
// n - number of rows
// m - number of columns
// a - transposed matrix
// None
void Transpose_int(int *a, int n, int m)
int *cycle;
int i, k, q, cycle_len;
int max_cycle_len;
max_cycle_len = n * m;
// Allocation memory (must be free in this function)
cycle = (int *)malloc(sizeof(int) * max_cycle_len);
cycle_len = 0;
q = n * m - 1;
for (i = 1; i < q; i++)
k = GetNextCycleElement(i, n, q);
cycle[cycle_len] = i;
while (k > i)
cycle[cycle_len] = k;
k = GetNextCycleElement(k, n, q);
if (k == i)
TransposeCycleElements_int(a, cycle, cycle_len);
} /* if (k == i) */
cycle_len = 0;
// Release allocated memory
// Decision of two dimensional problem generalized distance transform
// on the regular grid at all points
// min{d2(y' - y) + d4(y' - y)(y' - y) +
min(d1(x' - x) + d3(x' - x)(x' - x) + f(x',y'))} (on x', y')
// API
// int DistanceTransformTwoDimensionalProblem(const float *f,
const int n, const int m,
const float coeff[4],
float *distanceTransform,
int *pointsX, int *pointsY);
// f - function on the regular grid
// n - number of rows
// m - number of columns
// coeff - coefficients of optimizable function
coeff[0] = d1, coeff[1] = d2,
coeff[2] = d3, coeff[3] = d4
// distanceTransform - values of generalized distance transform
// pointsX - arguments x' that correspond to the optimal value
// pointsY - arguments y' that correspond to the optimal value
// Error status
int DistanceTransformTwoDimensionalProblem(const float *f,
const int n, const int m,
const float coeff[4],
float *distanceTransform,
int *pointsX, int *pointsY)
int i, j, tmp;
int resOneDimProblem;
float *internalDistTrans;
int *internalPointsX;
int size = n * m;
// Allocation memory (must be free in this function)
internalDistTrans = (float *)malloc(sizeof(float) * size);
internalPointsX = (int *)malloc(sizeof(int) * size);
for (i = 0; i < n; i++)
resOneDimProblem = DistanceTransformOneDimensionalProblem(
f + i * m, m,
coeff[0], coeff[2],
internalDistTrans + i * m,
internalPointsX + i * m);
if (resOneDimProblem != DISTANCE_TRANSFORM_OK)
} /* if (resOneDimProblem != DISTANCE_TRANSFORM_OK) */
Transpose(internalDistTrans, n, m);
for (j = 0; j < m; j++)
resOneDimProblem = DistanceTransformOneDimensionalProblem(
internalDistTrans + j * n, n,
coeff[1], coeff[3],
distanceTransform + j * n,
pointsY + j * n);
if (resOneDimProblem != DISTANCE_TRANSFORM_OK)
} /* if (resOneDimProblem != DISTANCE_TRANSFORM_OK) */
Transpose(distanceTransform, m, n);
Transpose_int(pointsY, m, n);
for (i = 0; i < n; i++)
for (j = 0; j < m; j++)
tmp = pointsY[i * m + j];
pointsX[i * m + j] = internalPointsX[tmp * m + j];
// Release allocated memory

@ -0,0 +1,576 @@
#include "_latentsvm.h"
#include "_resizeimg.h"
#ifndef max
#define max(a,b) (((a) > (b)) ? (a) : (b))
#ifndef min
#define min(a,b) (((a) < (b)) ? (a) : (b))
int sign(float r){
if(r > 0.0001f) return 1;
if(r < -0.0001f) return -1;
return 0;
// Getting feature map for the selected subimage
// API
// int getFeatureMaps(const IplImage * image, const int k, featureMap **map);
// image - selected subimage
// k - size of cells
// map - feature map
// Error status
int getFeatureMaps_dp(const IplImage * image,const int k, featureMap **map)
int sizeX, sizeY;
int p, px, strsz;
int height, width, channels;
int i, j, kk, c, ii, jj, d;
float * datadx, * datady;
float tmp, x, y, tx, ty;
IplImage * dx, * dy;
int *nearest_x, *nearest_y;
float *w, a_x, b_x;
float kernel[3] = {-1.f, 0.f, 1.f};
CvMat kernel_dx = cvMat(1, 3, CV_32F, kernel);
CvMat kernel_dy = cvMat(3, 1, CV_32F, kernel);
float * r;
int * alfa;
float boundary_x[CNTPARTION+1];
float boundary_y[CNTPARTION+1];
float max, tmp_scal;
int maxi;
height = image->height;
width = image->width ;
channels = image->nChannels;
dx = cvCreateImage(cvSize(image->width , image->height) , IPL_DEPTH_32F , 3);
dy = cvCreateImage(cvSize(image->width , image->height) , IPL_DEPTH_32F , 3);
sizeX = width / k;
sizeY = height / k;
px = CNTPARTION + 2 * CNTPARTION; // êîíòðàñòíîå è íå êîíòðàñòíîå èçîáðàæåíèå
p = px;
strsz = sizeX * p;
allocFeatureMapObject(map, sizeX, sizeY, p, px);
cvFilter2D(image, dx, &kernel_dx, cvPoint(-1, 0));
cvFilter2D(image, dy, &kernel_dy, cvPoint(0, -1));
for(i = 0; i <= CNTPARTION; i++)
boundary_x[i] = cosf((((float)i) * (((float)PI) / (float) (CNTPARTION))));
boundary_y[i] = sinf((((float)i) * (((float)PI) / (float) (CNTPARTION))));
}/*for(i = 0; i <= CNTPARTION; i++) */
r = (float *)malloc( sizeof(float) * (width * height));
alfa = (int *)malloc( sizeof(int ) * (width * height * 2));
for(j = 1; j < height-1; j++)
datadx = (float*)(dx->imageData + dx->widthStep *j);
datady = (float*)(dy->imageData + dy->widthStep *j);
for(i = 1; i < width-1; i++)
c = 0;
x = (datadx[i*channels+c]);
y = (datady[i*channels+c]);
r[j * width + i] =sqrtf(x*x + y*y);
for(kk = 1; kk < channels; kk++)
tx = (datadx[i*channels+kk]);
ty = (datady[i*channels+kk]);
tmp =sqrtf(tx*tx + ty*ty);
if(tmp > r[j * width + i])
r[j * width + i] = tmp;
c = kk;
x = tx;
y = ty;
}/*for(kk = 1; kk < channels; kk++)*/
max = boundary_x[0]*x + boundary_y[0]*y;
maxi = 0;
for (kk = 0; kk < CNTPARTION; kk++) {
tmp_scal = boundary_x[kk]*x + boundary_y[kk]*y;
if (tmp_scal> max) {
max = tmp_scal;
maxi = kk;
}else if (-tmp_scal> max) {
max = -tmp_scal;
maxi = kk + CNTPARTION;
alfa[j * width * 2 + i * 2 ] = maxi % CNTPARTION;
alfa[j * width * 2 + i * 2 + 1] = maxi;
}/*for(i = 0; i < width; i++)*/
}/*for(j = 0; j < height; j++)*/
//ïîäñ÷åò âåñîâ è ñìåùåíèé
nearest_x = (int *)malloc(sizeof(int) * k);
nearest_y = (int *)malloc(sizeof(int) * k);
w = (float*)malloc(sizeof(float) * (k * 2));
for(i = 0; i < k / 2; i++)
nearest_x[i] = -1;
nearest_y[i] = -1;
}/*for(i = 0; i < k / 2; i++)*/
for(i = k / 2; i < k; i++)
nearest_x[i] = 1;
nearest_y[i] = 1;
}/*for(i = k / 2; i < k; i++)*/
for(j = 0; j < k / 2; j++)
b_x = k / 2 + j + 0.5f;
a_x = k / 2 - j - 0.5f;
w[j * 2 ] = 1.0f/a_x * ((a_x * b_x) / ( a_x + b_x));
w[j * 2 + 1] = 1.0f/b_x * ((a_x * b_x) / ( a_x + b_x));
}/*for(j = 0; j < k / 2; j++)*/
for(j = k / 2; j < k; j++)
a_x = j - k / 2 + 0.5f;
b_x =-j + k / 2 - 0.5f + k;
w[j * 2 ] = 1.0f/a_x * ((a_x * b_x) / ( a_x + b_x));
w[j * 2 + 1] = 1.0f/b_x * ((a_x * b_x) / ( a_x + b_x));
}/*for(j = k / 2; j < k; j++)*/
for(i = 0; i < sizeY; i++)
for(j = 0; j < sizeX; j++)
for(ii = 0; ii < k; ii++)
for(jj = 0; jj < k; jj++)
if ((i * k + ii > 0) && (i * k + ii < height - 1) && (j * k + jj > 0) && (j * k + jj < width - 1))
d = (k*i + ii)* width + (j*k + jj);
(*map)->Map[(i ) * strsz + (j ) * (*map)->p + alfa[d * 2 ] ] +=
r[d] * w[ii * 2 ] * w[jj * 2 ];
(*map)->Map[(i ) * strsz + (j ) * (*map)->p + alfa[d * 2 + 1] + CNTPARTION] +=
r[d] * w[ii * 2 ] * w[jj * 2 ];
if ((i + nearest_y[ii] >= 0) && (i + nearest_y[ii] <= sizeY - 1))
(*map)->Map[(i + nearest_y[ii]) * strsz + (j ) * (*map)->p + alfa[d * 2 ] ] +=
r[d] * w[ii * 2 + 1] * w[jj * 2 ];
(*map)->Map[(i + nearest_y[ii]) * strsz + (j ) * (*map)->p + alfa[d * 2 + 1] + CNTPARTION] +=
r[d] * w[ii * 2 + 1] * w[jj * 2 ];
if ((j + nearest_x[jj] >= 0) && (j + nearest_x[jj] <= sizeX - 1))
(*map)->Map[(i ) * strsz + (j + nearest_x[jj]) * (*map)->p + alfa[d * 2 ] ] +=
r[d] * w[ii * 2 ] * w[jj * 2 + 1];
(*map)->Map[(i ) * strsz + (j + nearest_x[jj]) * (*map)->p + alfa[d * 2 + 1] + CNTPARTION] +=
r[d] * w[ii * 2 ] * w[jj * 2 + 1];
if ((i + nearest_y[ii] >= 0) && (i + nearest_y[ii] <= sizeY - 1) && (j + nearest_x[jj] >= 0) && (j + nearest_x[jj] <= sizeX - 1))
(*map)->Map[(i + nearest_y[ii]) * strsz + (j + nearest_x[jj]) * (*map)->p + alfa[d * 2 ] ] +=
r[d] * w[ii * 2 + 1] * w[jj * 2 + 1];
(*map)->Map[(i + nearest_y[ii]) * strsz + (j + nearest_x[jj]) * (*map)->p + alfa[d * 2 + 1] + CNTPARTION] +=
r[d] * w[ii * 2 + 1] * w[jj * 2 + 1];
}/*for(jj = 0; jj < k; jj++)*/
}/*for(ii = 0; ii < k; ii++)*/
}/*for(j = 1; j < sizeX - 1; j++)*/
}/*for(i = 1; i < sizeY - 1; i++)*/
// Feature map Normalization and Truncation
// API
// int normalizationAndTruncationFeatureMaps(featureMap *map, const float alfa);
// map - feature map
// alfa - truncation threshold
// map - truncated and normalized feature map
// Error status
int normalizationAndTruncationFeatureMaps(featureMap *map, const float alfa)
int i,j, ii;
int sizeX, sizeY, p, pos, pp, xp, pos1, pos2;
float * part_noma; // norm of C(i, j)
float * new_data;
float norm_val;
sizeX = map->sizeX;
sizeY = map->sizeY;
part_noma = (float *)malloc (sizeof(float) * (sizeX * sizeY));
p = map->xp / 3;
for(i = 0; i < sizeX * sizeY; i++)
norm_val = 0.0;
pos = i * map->p;
for(j = 0; j < p; j++)
norm_val += map->Map[pos + j] * map->Map[pos + j];
}/*for(j = 0; j < p; j++)*/
part_noma[i] = norm_val;
}/*for(i = 0; i < sizeX * sizeY; i++)*/
xp = map->xp;
pp = xp * 4;
sizeX -= 2;
sizeY -= 2;
new_data = (float *)malloc (sizeof(float) * (sizeX * sizeY * pp));
for(i = 1; i <= sizeY; i++)
for(j = 1; j <= sizeX; j++)
norm_val = sqrtf(
part_noma[(i )*(sizeX + 2) + (j )] +
part_noma[(i )*(sizeX + 2) + (j + 1)] +
part_noma[(i + 1)*(sizeX + 2) + (j )] +
part_noma[(i + 1)*(sizeX + 2) + (j + 1)]);
pos1 = (i ) * (sizeX + 2) * xp + (j ) * xp;
pos2 = (i-1) * (sizeX ) * pp + (j-1) * pp;
for(ii = 0; ii < p; ii++)
new_data[pos2 + ii ] = map->Map[pos1 + ii ] / norm_val;
}/*for(ii = 0; ii < p; ii++)*/
for(ii = 0; ii < 2 * p; ii++)
new_data[pos2 + ii + p * 4] = map->Map[pos1 + ii + p] / norm_val;
}/*for(ii = 0; ii < 2 * p; ii++)*/
norm_val = sqrtf(
part_noma[(i )*(sizeX + 2) + (j )] +
part_noma[(i )*(sizeX + 2) + (j + 1)] +
part_noma[(i - 1)*(sizeX + 2) + (j )] +
part_noma[(i - 1)*(sizeX + 2) + (j + 1)]);
for(ii = 0; ii < p; ii++)
new_data[pos2 + ii + p ] = map->Map[pos1 + ii ] / norm_val;
}/*for(ii = 0; ii < p; ii++)*/
for(ii = 0; ii < 2 * p; ii++)
new_data[pos2 + ii + p * 6] = map->Map[pos1 + ii + p] / norm_val;
}/*for(ii = 0; ii < 2 * p; ii++)*/
norm_val = sqrtf(
part_noma[(i )*(sizeX + 2) + (j )] +
part_noma[(i )*(sizeX + 2) + (j - 1)] +
part_noma[(i + 1)*(sizeX + 2) + (j )] +
part_noma[(i + 1)*(sizeX + 2) + (j - 1)]);
for(ii = 0; ii < p; ii++)
new_data[pos2 + ii + p * 2] = map->Map[pos1 + ii ] / norm_val;
}/*for(ii = 0; ii < p; ii++)*/
for(ii = 0; ii < 2 * p; ii++)
new_data[pos2 + ii + p * 8] = map->Map[pos1 + ii + p] / norm_val;
}/*for(ii = 0; ii < 2 * p; ii++)*/
norm_val = sqrtf(
part_noma[(i )*(sizeX + 2) + (j )] +
part_noma[(i )*(sizeX + 2) + (j - 1)] +
part_noma[(i - 1)*(sizeX + 2) + (j )] +
part_noma[(i - 1)*(sizeX + 2) + (j - 1)]);
for(ii = 0; ii < p; ii++)
new_data[pos2 + ii + p * 3 ] = map->Map[pos1 + ii ] / norm_val;
}/*for(ii = 0; ii < p; ii++)*/
for(ii = 0; ii < 2 * p; ii++)
new_data[pos2 + ii + p * 10] = map->Map[pos1 + ii + p] / norm_val;
}/*for(ii = 0; ii < 2 * p; ii++)*/
}/*for(j = 1; j <= sizeX; j++)*/
}/*for(i = 1; i <= sizeY; i++)*/
for(i = 0; i < sizeX * sizeY * pp; i++)
if(new_data [i] > alfa) new_data [i] = alfa;
}/*for(i = 0; i < sizeX * sizeY * pp; i++)*/
//swop data
map->p = pp;
map->xp = xp;
map->sizeX = sizeX;
map->sizeY = sizeY;
free (map->Map);
free (part_noma);
map->Map = new_data;
// Feature map reduction
// In each cell we reduce dimension of the feature vector
// according to original paper special procedure
// API
// int PCAFeatureMaps(featureMap *map)
// map - feature map
// map - feature map
// Error status
int PCAFeatureMaps(featureMap *map)
int i,j, ii, jj, k;
int sizeX, sizeY, p, pp, xp, yp, pos1, pos2;
float * new_data;
float val;
float nx, ny;
sizeX = map->sizeX;
sizeY = map->sizeY;
p = map->p;
pp = map->xp + 4;
yp = 4;
xp = (map->xp / 3);
nx = 1.0f / sqrtf((float)(xp * 2));
ny = 1.0f / sqrtf((float)(yp ));
new_data = (float *)malloc (sizeof(float) * (sizeX * sizeY * pp));
for(i = 0; i < sizeY; i++)
for(j = 0; j < sizeX; j++)
pos1 = ((i)*sizeX + j)*p;
pos2 = ((i)*sizeX + j)*pp;
k = 0;
for(jj = 0; jj < xp * 2; jj++)
val = 0;
for(ii = 0; ii < yp; ii++)
val += map->Map[pos1 + yp * xp + ii * xp * 2 + jj];
}/*for(ii = 0; ii < yp; ii++)*/
new_data[pos2 + k] = val * ny;
}/*for(jj = 0; jj < xp * 2; jj++)*/
for(jj = 0; jj < xp; jj++)
val = 0;
for(ii = 0; ii < yp; ii++)
val += map->Map[pos1 + ii * xp + jj];
}/*for(ii = 0; ii < yp; ii++)*/
new_data[pos2 + k] = val * ny;
}/*for(jj = 0; jj < xp; jj++)*/
for(ii = 0; ii < yp; ii++)
val = 0;
for(jj = 0; jj < 2 * xp; jj++)
val += map->Map[pos1 + yp * xp + ii * xp * 2 + jj];
}/*for(jj = 0; jj < xp; jj++)*/
new_data[pos2 + k] = val * nx;
} /*for(ii = 0; ii < yp; ii++)*/
}/*for(j = 0; j < sizeX; j++)*/
}/*for(i = 0; i < sizeY; i++)*/
//swop data
map->p = pp;
map->xp = pp;
free (map->Map);
map->Map = new_data;
// Getting feature pyramid
// API
// int getFeaturePyramid(IplImage * image, const filterObject **all_F,
const int n_f,
const int lambda, const int k,
const int startX, const int startY,
const int W, const int H, featurePyramid **maps);
// image - image
// lambda - resize scale
// k - size of cells
// startX - X coordinate of the image rectangle to search
// startY - Y coordinate of the image rectangle to search
// W - width of the image rectangle to search
// H - height of the image rectangle to search
// maps - feature maps for all levels
// Error status
int getFeaturePyramid(IplImage * image,
const int lambda, const int k,
const int startX, const int startY,
const int W, const int H, featurePyramid **maps)
IplImage *img2, *imgTmp, *imgResize;
float step, tmp;
int cntStep;
int maxcall;
int i;
int err;
featureMap *map;
//geting subimage
cvSetImageROI(image, cvRect(startX, startY, W, H));
img2 = cvCreateImage(cvGetSize(image), image->depth, image->nChannels);
cvCopy(image, img2, NULL);
if(img2->depth != IPL_DEPTH_32F)
imgResize = cvCreateImage(cvSize(img2->width , img2->height) , IPL_DEPTH_32F , 3);
cvConvert(img2, imgResize);
imgResize = img2;
step = powf(2.0f, 1.0f/ ((float)lambda));
maxcall = W/k;
if( maxcall > H/k )
maxcall = H/k;
cntStep = (int)(logf((float)maxcall/(5.0f))/logf(step)) + 1;
//printf("Count step: %f %d\n", step, cntStep);
allocFeaturePyramidObject(maps, lambda, cntStep + lambda);
for(i = 0; i < lambda; i++)
tmp = 1.0f / powf(step, (float)i);
imgTmp = resize_opencv (imgResize, tmp);
//imgTmp = resize_article_dp(img2, tmp, 4);
err = getFeatureMaps_dp(imgTmp, 4, &map);
err = normalizationAndTruncationFeatureMaps(map, 0.2f);
err = PCAFeatureMaps(map);
(*maps)->pyramid[i] = map;
//printf("%d, %d\n", map->sizeY, map->sizeX);
for(i = 0; i < cntStep; i++)
tmp = 1.0f / powf(step, (float)i);
imgTmp = resize_opencv (imgResize, tmp);
//imgTmp = resize_article_dp(imgResize, tmp, 8);
err = getFeatureMaps_dp(imgTmp, 8, &map);
err = normalizationAndTruncationFeatureMaps(map, 0.2f);
err = PCAFeatureMaps(map);
(*maps)->pyramid[i + lambda] = map;
//printf("%d, %d\n", map->sizeY, map->sizeX);
}/*for(i = 0; i < cntStep; i++)*/
if(img2->depth != IPL_DEPTH_32F)
// add zero border to feature map
// API
// int addBordersToFeatureMaps(featureMap *map, const int bX, const int bY);
// map - feature map
// bX - border size in x
// bY - border size in y
// map - feature map
// Error status
int addBordersToFeatureMaps(featureMap *map, const int bX, const int bY){
int i,j, jj;
int sizeX, sizeY, p, pos1, pos2;
float * new_data;
sizeX = map->sizeX;
sizeY = map->sizeY;
p = map->p;
new_data = (float *)malloc (sizeof(float) * ((sizeX + 2 * bX) * (sizeY + 2 * bY) * p));
for(i = 0; i < ((sizeX + 2 * bX) * (sizeY + 2 * bY) * p); i++)
new_data[i] = (float)0;
}/*for(i = 0; i < ((sizeX + 2 * bX) * (sizeY + 2 * bY) * p); i++)*/
for(i = 0; i < sizeY; i++)
for(j = 0; j < sizeX; j++)
pos1 = ((i )*sizeX + (j )) * p;
pos2 = ((i + bY)*(sizeX + 2 * bX) + (j + bX)) * p;
for(jj = 0; jj < p; jj++)
new_data[pos2 + jj] = map->Map[pos1 + jj];
}/*for(jj = 0; jj < p; jj++)*/
}/*for(j = 0; j < sizeX; j++)*/
}/*for(i = 0; i < sizeY; i++)*/
//swop data
map->sizeX = sizeX + 2 * bX;
map->sizeY = sizeY + 2 * bY;
free (map->Map);
map->Map = new_data;

@ -0,0 +1,246 @@
#include "_fft.h"
int getEntireRes(int number, int divisor, int *entire, int *res)
*entire = number / divisor;
*res = number % divisor;
return FFT_OK;
int getMultipliers(int n, int *n1, int *n2)
int multiplier, i;
if (n == 1)
*n1 = 1;
*n2 = 1;
return FFT_ERROR; // n = 1
multiplier = n / 2;
for (i = multiplier; i >= 2; i--)
if (n % i == 0)
*n1 = i;
*n2 = n / i;
return FFT_OK; // n = n1 * n2
*n1 = 1;
*n2 = n;
return FFT_ERROR; // n - prime number
// 1-dimensional FFT
// API
// int fft(float *x_in, float *x_out, int n, int shift);
// x_in - input signal
// n - number of elements for searching Fourier image
// shift - shift between input elements
// x_out - output signal (contains 2n elements in order
Re(x_in[0]), Im(x_in[0]), Re(x_in[1]), Im(x_in[1]) and etc.)
// Error status
int fft(float *x_in, float *x_out, int n, int shift)
int n1, n2, res, k1, k2, m1, m2, index, idx;
float alpha, beta, gamma, angle, cosAngle, sinAngle;
float tmpGamma, tmpAlpha, tmpBeta;
float tmpRe, tmpIm, phaseRe, phaseIm;
res = getMultipliers(n, &n1, &n2);
if (res == FFT_OK)
fft(x_in, x_out, n1, shift);
fft(x_in, x_out, n2, shift);
alpha = (float)(2.0 * PI / ((float)n));
beta = (float)(2.0 * PI / ((float)n1));
gamma = (float)(2.0 * PI / ((float)n2));
for (k1 = 0; k1 < n1; k1++)
tmpBeta = beta * k1;
for (k2 = 0; k2 < n2; k2++)
idx = shift * (n2 * k1 + k2);
x_out[idx] = 0.0;
x_out[idx + 1] = 0.0;
tmpGamma = gamma * k2;
tmpAlpha = alpha * k2;
for (m1 = 0; m1 < n1; m1++)
tmpRe = 0.0;
tmpIm = 0.0;
for (m2 = 0; m2 < n2; m2++)
angle = tmpGamma * m2;
index = shift * (n1 * m2 + m1);
cosAngle = cosf(angle);
sinAngle = sinf(angle);
tmpRe += x_in[index] * cosAngle + x_in[index + 1] * sinAngle;
tmpIm += x_in[index + 1] * cosAngle - x_in[index] * sinAngle;
angle = tmpAlpha * m1;
cosAngle = cosf(angle);
sinAngle = sinf(angle);
phaseRe = cosAngle * tmpRe + sinAngle * tmpIm;
phaseIm = cosAngle * tmpIm - sinAngle * tmpRe;
angle = tmpBeta * m1;
cosAngle = cosf(angle);
sinAngle = sinf(angle);
x_out[idx] += (cosAngle * phaseRe + sinAngle * phaseIm);
x_out[idx + 1] += (cosAngle * phaseIm - sinAngle * phaseRe);
return FFT_OK;
// Inverse 1-dimensional FFT
// API
// int fftInverse(float *x_in, float *x_out, int n, int shift);
// x_in - Fourier image of 1d input signal(contains 2n elements
in order Re(x_in[0]), Im(x_in[0]),
Re(x_in[1]), Im(x_in[1]) and etc.)
// n - number of elements for searching counter FFT image
// shift - shift between input elements
// x_in - input signal (contains n elements)
// Error status
int fftInverse(float *x_in, float *x_out, int n, int shift)
int n1, n2, res, k1, k2, m1, m2, index, idx;
float alpha, beta, gamma, angle, cosAngle, sinAngle;
float tmpRe, tmpIm, phaseRe, phaseIm;
res = getMultipliers(n, &n1, &n2);
if (res == FFT_OK)
fftInverse(x_in, x_out, n1, shift);
fftInverse(x_in, x_out, n2, shift);
alpha = (float)(2.0f * PI / ((float)n));
beta = (float)(2.0f * PI / ((float)n1));
gamma = (float)(2.0f * PI / ((float)n2));
for (m1 = 0; m1 < n1; m1++)
for (m2 = 0; m2 < n2; m2++)
idx = (n1 * m2 + m1) * shift;
x_out[idx] = 0.0;
x_out[idx + 1] = 0.0;
for (k2 = 0; k2 < n2; k2++)
tmpRe = 0.0;
tmpIm = 0.0;
for (k1 = 0; k1 < n1; k1++)
angle = beta * k1 * m1;
index = shift *(n2 * k1 + k2);
sinAngle = sinf(angle);
cosAngle = cosf(angle);
tmpRe += x_in[index] * cosAngle - x_in[index + 1] * sinAngle;
tmpIm += x_in[index] * sinAngle + x_in[index + 1] * cosAngle;
angle = alpha * m1 * k2;
sinAngle = sinf(angle);
cosAngle = cosf(angle);
phaseRe = cosAngle * tmpRe - sinAngle * tmpIm;
phaseIm = cosAngle * tmpIm + sinAngle * tmpRe;
angle = gamma * k2 * m2;
sinAngle = sinf(angle);
cosAngle = cosf(angle);
x_out[idx] += cosAngle * phaseRe - sinAngle * phaseIm;
x_out[idx + 1] += cosAngle * phaseIm + sinAngle * phaseRe;
x_out[idx] /= n;
x_out[idx + 1] /= n;
return FFT_OK;
// 2-dimensional FFT
// API
// int fft2d(float *x_in, float *x_out, int numRows, int numColls);
// x_in - input signal (matrix, launched by rows)
// numRows - number of rows
// numColls - number of collumns
// x_out - output signal (contains (2 * numRows * numColls) elements
in order Re(x_in[0][0]), Im(x_in[0][0]),
Re(x_in[0][1]), Im(x_in[0][1]) and etc.)
// Error status
int fft2d(float *x_in, float *x_out, int numRows, int numColls)
int i, size;
float *x_outTmp;
size = numRows * numColls;
x_outTmp = (float *)malloc(sizeof(float) * (2 * size));
for (i = 0; i < numRows; i++)
fft(x_in + i * 2 * numColls,
x_outTmp + i * 2 * numColls,
numColls, 2);
for (i = 0; i < numColls; i++)
fft(x_outTmp + 2 * i,
x_out + 2 * i,
numRows, 2 * numColls);
return FFT_OK;
// Inverse 2-dimensional FFT
// API
// int fftInverse2d(float *x_in, float *x_out, int numRows, int numColls);
// x_in - Fourier image of matrix (contains (2 * numRows * numColls)
elements in order Re(x_in[0][0]), Im(x_in[0][0]),
Re(x_in[0][1]), Im(x_in[0][1]) and etc.)
// numRows - number of rows
// numColls - number of collumns
// x_out - initial signal (matrix, launched by rows)
// Error status
int fftInverse2d(float *x_in, float *x_out, int numRows, int numColls)
int i, size;
float *x_outTmp;
size = numRows * numColls;
x_outTmp = (float *)malloc(sizeof(float) * (2 * size));
for (i = 0; i < numRows; i++)
fftInverse(x_in + i * 2 * numColls,
x_outTmp + i * 2 * numColls,
numColls, 2);
for (i = 0; i < numColls; i++)
fftInverse(x_outTmp + 2 * i,
x_out + 2 * i,
numRows, 2 * numColls);
return FFT_OK;

@ -0,0 +1,611 @@
#include "_latentsvm.h"
#include "_matching.h"
// Transformation filter displacement from the block space
// to the space of pixels at the initial image
// API
// int convertPoints(int countLevel, CvPoint *points, int *levels,
CvPoint **partsDisplacement, int kPoints, int n);
// countLevel - the number of levels in the feature pyramid
// points - the set of root filter positions (in the block space)
// levels - the set of levels
// partsDisplacement - displacement of part filters (in the block space)
// kPoints - number of root filter positions
// n - number of part filters
// initialImageLevel - level that contains features for initial image
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// points - the set of root filter positions (in the space of pixels)
// partsDisplacement - displacement of part filters (in the space of pixels)
// Error status
int convertPoints(int countLevel, int lambda,
int initialImageLevel,
CvPoint *points, int *levels,
CvPoint **partsDisplacement, int kPoints, int n,
int maxXBorder,
int maxYBorder)
int i, j, bx, by;
float step, scale;
step = powf( 2.0f, 1.0f / ((float)lambda) );
computeBorderSize(maxXBorder, maxYBorder, &bx, &by);
for (i = 0; i < kPoints; i++)
// scaling factor for root filter
scale = SIDE_LENGTH * powf(step, (float)(levels[i] - initialImageLevel));
points[i].x = (int)((points[i].x - bx + 1) * scale);
points[i].y = (int)((points[i].y - by + 1) * scale);
// scaling factor for part filters
scale = SIDE_LENGTH * powf(step, (float)(levels[i] - lambda - initialImageLevel));
for (j = 0; j < n; j++)
partsDisplacement[i][j].x = (int)((partsDisplacement[i][j].x -
2 * bx + 1) * scale);
partsDisplacement[i][j].y = (int)((partsDisplacement[i][j].y -
2 * by + 1) * scale);
// Elimination boxes that are outside the image boudaries
// API
// int clippingBoxes(int width, int height,
CvPoint *points, int kPoints);
// width - image wediht
// height - image heigth
// points - a set of points (coordinates of top left or
bottom right corners)
// kPoints - points number
// points - updated points (if coordinates less than zero then
set zero coordinate, if coordinates more than image
size then set coordinates equal image size)
// Error status
int clippingBoxes(int width, int height,
CvPoint *points, int kPoints)
int i;
for (i = 0; i < kPoints; i++)
if (points[i].x > width - 1)
points[i].x = width - 1;
if (points[i].x < 0)
points[i].x = 0;
if (points[i].y > height - 1)
points[i].y = height - 1;
if (points[i].y < 0)
points[i].y = 0;
// Creation feature pyramid with nullable border
// API
// featurePyramid* createFeaturePyramidWithBorder(const IplImage *image,
int maxXBorder, int maxYBorder);
// image - initial image
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// Feature pyramid with nullable border
featurePyramid* createFeaturePyramidWithBorder(IplImage *image,
int maxXBorder, int maxYBorder)
int opResult;
int bx, by;
int level;
featurePyramid *H;
// Obtaining feature pyramid
opResult = getFeaturePyramid(image, LAMBDA, SIDE_LENGTH, 0, 0,
image->width, image->height, &H);
if (opResult != LATENT_SVM_OK)
return NULL;
} /* if (opResult != LATENT_SVM_OK) */
// Addition nullable border for each feature map
// the size of the border for root filters
computeBorderSize(maxXBorder, maxYBorder, &bx, &by);
for (level = 0; level < H->countLevel; level++)
addNullableBorder(H->pyramid[level], bx, by);
return H;
// Computation of the root filter displacement and values of score function
// API
// int searchObject(const featurePyramid *H, const filterObject **all_F, int n,
float b,
int maxXBorder,
int maxYBorder,
CvPoint **points, int **levels, int *kPoints, float *score,
CvPoint ***partsDisplacement);
// image - initial image for searhing object
// all_F - the set of filters (the first element is root filter,
other elements - part filters)
// n - the number of part filters
// b - linear term of the score function
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// points - positions (x, y) of the upper-left corner
of root filter frame
// levels - levels that correspond to each position
// kPoints - number of positions
// score - value of the score function
// partsDisplacement - part filters displacement for each position
of the root filter
// Error status
int searchObject(const featurePyramid *H, const filterObject **all_F,
int n, float b,
int maxXBorder,
int maxYBorder,
CvPoint **points, int **levels, int *kPoints, float *score,
CvPoint ***partsDisplacement)
int opResult;
// Matching
opResult = maxFunctionalScore(all_F, n, H, b, maxXBorder, maxYBorder,
score, points, levels,
kPoints, partsDisplacement);
if (opResult != LATENT_SVM_OK)
// Transformation filter displacement from the block space
// to the space of pixels at the initial image
// that settles at the level number LAMBDA
convertPoints(H->countLevel, H->lambda, LAMBDA, (*points),
(*levels), (*partsDisplacement), (*kPoints), n,
maxXBorder, maxYBorder);
// Computation right bottom corners coordinates of bounding boxes
// API
// int estimateBoxes(CvPoint *points, int *levels, int kPoints,
int sizeX, int sizeY, CvPoint **oppositePoints);
// points - left top corners coordinates of bounding boxes
// levels - levels of feature pyramid where points were found
// (sizeX, sizeY) - size of root filter
// oppositePoins - right bottom corners coordinates of bounding boxes
// Error status
int estimateBoxes(CvPoint *points, int *levels, int kPoints,
int sizeX, int sizeY, CvPoint **oppositePoints)
int i;
float step;
step = powf( 2.0f, 1.0f / ((float)(LAMBDA)));
*oppositePoints = (CvPoint *)malloc(sizeof(CvPoint) * kPoints);
for (i = 0; i < kPoints; i++)
getOppositePoint(points[i], sizeX, sizeY, step, levels[i] - LAMBDA, &((*oppositePoints)[i]));
// Computation of the root filter displacement and values of score function
// API
// int searchObjectThreshold(const featurePyramid *H,
const filterObject **all_F, int n,
float b,
int maxXBorder, int maxYBorder,
float scoreThreshold,
CvPoint **points, int **levels, int *kPoints,
float **score, CvPoint ***partsDisplacement);
// H - feature pyramid
// all_F - the set of filters (the first element is root filter,
other elements - part filters)
// n - the number of part filters
// b - linear term of the score function
// maxXBorder - the largest root filter size (X-direction)
// maxYBorder - the largest root filter size (Y-direction)
// scoreThreshold - score threshold
// points - positions (x, y) of the upper-left corner
of root filter frame
// levels - levels that correspond to each position
// kPoints - number of positions
// score - values of the score function
// partsDisplacement - part filters displacement for each position
of the root filter
// Error status
int searchObjectThreshold(const featurePyramid *H,
const filterObject **all_F, int n,
float b,
int maxXBorder, int maxYBorder,
float scoreThreshold,
CvPoint **points, int **levels, int *kPoints,
float **score, CvPoint ***partsDisplacement)
int opResult;
// Matching
opResult = thresholdFunctionalScore(all_F, n, H, b,
maxXBorder, maxYBorder,
score, points, levels,
kPoints, partsDisplacement);
if (opResult != LATENT_SVM_OK)
// Transformation filter displacement from the block space
// to the space of pixels at the initial image
// that settles at the level number LAMBDA
convertPoints(H->countLevel, H->lambda, LAMBDA, (*points),
(*levels), (*partsDisplacement), (*kPoints), n,
maxXBorder, maxYBorder);
// Compute opposite point for filter box
// API
// int getOppositePoint(CvPoint point,
int sizeX, int sizeY,
float step, int degree,
CvPoint *oppositePoint);
// point - coordinates of filter top left corner
(in the space of pixels)
// (sizeX, sizeY) - filter dimension in the block space
// step - scaling factor
// degree - degree of the scaling factor
// oppositePoint - coordinates of filter bottom corner
(in the space of pixels)
// Error status
int getOppositePoint(CvPoint point,
int sizeX, int sizeY,
float step, int degree,
CvPoint *oppositePoint)
float scale;
scale = SIDE_LENGTH * powf(step, (float)degree);
oppositePoint->x = (int)(point.x + sizeX * scale);
oppositePoint->y = (int)(point.y + sizeY * scale);
// Drawing root filter boxes
// API
// int showRootFilterBoxes(const IplImage *image,
const filterObject *filter,
CvPoint *points, int *levels, int kPoints,
CvScalar color, int thickness,
int line_type, int shift);
// image - initial image
// filter - root filter object
// points - a set of points
// levels - levels of feature pyramid
// kPoints - number of points
// color - line color for each box
// thickness - line thickness
// line_type - line type
// shift - shift
// window contained initial image and filter boxes
// Error status
int showRootFilterBoxes(IplImage *image,
const filterObject *filter,
CvPoint *points, int *levels, int kPoints,
CvScalar color, int thickness,
int line_type, int shift)
int i;
float step;
CvPoint oppositePoint;
step = powf( 2.0f, 1.0f / ((float)LAMBDA));
for (i = 0; i < kPoints; i++)
// Drawing rectangle for filter
getOppositePoint(points[i], filter->sizeX, filter->sizeY,
step, levels[i] - LAMBDA, &oppositePoint);
cvRectangle(image, points[i], oppositePoint,
color, thickness, line_type, shift);
cvShowImage("Initial image", image);
// Drawing part filter boxes
// API
// int showPartFilterBoxes(const IplImage *image,
const filterObject *filter,
CvPoint *points, int *levels, int kPoints,
CvScalar color, int thickness,
int line_type, int shift);
// image - initial image
// filters - a set of part filters
// n - number of part filters
// partsDisplacement - a set of points
// levels - levels of feature pyramid
// kPoints - number of foot filter positions
// color - line color for each box
// thickness - line thickness
// line_type - line type
// shift - shift
// window contained initial image and filter boxes
// Error status
int showPartFilterBoxes(IplImage *image,
const filterObject **filters,
int n, CvPoint **partsDisplacement,
int *levels, int kPoints,
CvScalar color, int thickness,
int line_type, int shift)
int i, j;
float step;
CvPoint oppositePoint;
step = powf( 2.0f, 1.0f / ((float)LAMBDA));
for (i = 0; i < kPoints; i++)
for (j = 0; j < n; j++)
// Drawing rectangles for part filters
filters[j + 1]->sizeX, filters[j + 1]->sizeY,
step, levels[i] - 2 * LAMBDA, &oppositePoint);
cvRectangle(image, partsDisplacement[i][j], oppositePoint,
color, thickness, line_type, shift);
cvShowImage("Initial image", image);
// Drawing boxes
// API
// int showBoxes(const IplImage *img,
const CvPoint *points, const CvPoint *oppositePoints, int kPoints,
CvScalar color, int thickness, int line_type, int shift);
// img - initial image
// points - top left corner coordinates
// oppositePoints - right bottom corner coordinates
// kPoints - points number
// color - line color for each box
// thickness - line thickness
// line_type - line type
// shift - shift
// Error status
int showBoxes(IplImage *img,
const CvPoint *points, const CvPoint *oppositePoints, int kPoints,
CvScalar color, int thickness, int line_type, int shift)
int i;
for (i = 0; i < kPoints; i++)
cvRectangle(img, points[i], oppositePoints[i],
color, thickness, line_type, shift);
cvShowImage("Initial image", img);
// Computation maximum filter size for each dimension
// API
// int getMaxFilterDims(const filterObject **filters, int kComponents,
const int *kPartFilters,
unsigned int *maxXBorder, unsigned int *maxYBorder);
// filters - a set of filters (at first root filter, then part filters
and etc. for all components)
// kComponents - number of components
// kPartFilters - number of part filters for each component
// maxXBorder - maximum of filter size at the horizontal dimension
// maxYBorder - maximum of filter size at the vertical dimension
// Error status
int getMaxFilterDims(const filterObject **filters, int kComponents,
const int *kPartFilters,
unsigned int *maxXBorder, unsigned int *maxYBorder)
int i, componentIndex;
*maxXBorder = filters[0]->sizeX;
*maxYBorder = filters[0]->sizeY;
componentIndex = kPartFilters[0] + 1;
for (i = 1; i < kComponents; i++)
if (filters[componentIndex]->sizeX > *maxXBorder)
*maxXBorder = filters[componentIndex]->sizeX;
if (filters[componentIndex]->sizeY > *maxYBorder)
*maxYBorder = filters[componentIndex]->sizeY;
componentIndex += (kPartFilters[i] + 1);
// Computation root filters displacement and values of score function
// API
// int searchObjectThresholdSomeComponents(const featurePyramid *H,
const filterObject **filters,
int kComponents, const int *kPartFilters,
const float *b, float scoreThreshold,
CvPoint **points, CvPoint **oppPoints,
float **score, int *kPoints);
// H - feature pyramid
// filters - filters (root filter then it's part filters, etc.)
// kComponents - root filters number
// kPartFilters - array of part filters number for each component
// b - array of linear terms
// scoreThreshold - score threshold
// points - root filters displacement (top left corners)
// oppPoints - root filters displacement (bottom right corners)
// score - array of score values
// kPoints - number of boxes
// Error status
int searchObjectThresholdSomeComponents(const featurePyramid *H,
const filterObject **filters,
int kComponents, const int *kPartFilters,
const float *b, float scoreThreshold,
CvPoint **points, CvPoint **oppPoints,
float **score, int *kPoints)
int error = 0;
int i, j, s, f, componentIndex;
unsigned int maxXBorder, maxYBorder;
CvPoint **pointsArr, **oppPointsArr, ***partsDisplacementArr;
float **scoreArr;
int *kPointsArr, **levelsArr;
// Allocation memory
pointsArr = (CvPoint **)malloc(sizeof(CvPoint *) * kComponents);
oppPointsArr = (CvPoint **)malloc(sizeof(CvPoint *) * kComponents);
scoreArr = (float **)malloc(sizeof(float *) * kComponents);
kPointsArr = (int *)malloc(sizeof(int) * kComponents);
levelsArr = (int **)malloc(sizeof(int *) * kComponents);
partsDisplacementArr = (CvPoint ***)malloc(sizeof(CvPoint **) * kComponents);
// Getting maximum filter dimensions
error = getMaxFilterDims(filters, kComponents, kPartFilters, &maxXBorder, &maxYBorder);
componentIndex = 0;
*kPoints = 0;
// For each component perform searching
for (i = 0; i < kComponents; i++)
searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],
b[i], maxXBorder, maxYBorder, scoreThreshold,
&(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]),
&(scoreArr[i]), &(partsDisplacementArr[i]));
estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i],
filters[componentIndex]->sizeX, filters[componentIndex]->sizeY, &(oppPointsArr[i]));
componentIndex += (kPartFilters[i] + 1);
*kPoints += kPointsArr[i];
*points = (CvPoint *)malloc(sizeof(CvPoint) * (*kPoints));
*oppPoints = (CvPoint *)malloc(sizeof(CvPoint) * (*kPoints));
*score = (float *)malloc(sizeof(float) * (*kPoints));
s = 0;
for (i = 0; i < kComponents; i++)
f = s + kPointsArr[i];
for (j = s; j < f; j++)
(*points)[j].x = pointsArr[i][j - s].x;
(*points)[j].y = pointsArr[i][j - s].y;
(*oppPoints)[j].x = oppPointsArr[i][j - s].x;
(*oppPoints)[j].y = oppPointsArr[i][j - s].y;
(*score)[j] = scoreArr[i][j - s];
s = f;
// Release allocated memory
for (i = 0; i < kComponents; i++)
for (j = 0; j < kPointsArr[i]; j++)

@ -0,0 +1,134 @@
#include "precomp.hpp"
#include "_lsvmparser.h"
#include "_matching.h"
// load trained detector from a file
// API
// CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename);
// filename - path to the file containing the parameters of
- trained Latent SVM detector
// trained Latent SVM detector in internal representation
CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename)
CvLatentSvmDetector* detector = 0;
filterObject** filters = 0;
int kFilters = 0;
int kComponents = 0;
int* kPartFilters = 0;
float* b = 0;
float scoreThreshold = 0.f;
loadModel(filename, &filters, &kFilters, &kComponents, &kPartFilters, &b, &scoreThreshold);
detector = (CvLatentSvmDetector*)malloc(sizeof(CvLatentSvmDetector));
detector->filters = filters;
detector->b = b;
detector->num_components = kComponents;
detector->num_filters = kFilters;
detector->num_part_filters = kPartFilters;
detector->score_threshold = scoreThreshold;
return detector;
// release memory allocated for CvLatentSvmDetector structure
// API
// void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
// detector - CvLatentSvmDetector structure to be released
void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector)
for (int i = 0; i < (*detector)->num_filters; i++)
*detector = 0;
// find rectangular regions in the given image that are likely
// to contain objects and corresponding confidence levels
// API
// CvSeq* cvLatentSvmDetectObjects(const IplImage* image,
// CvLatentSvmDetector* detector,
// CvMemStorage* storage,
// float overlap_threshold = 0.5f);
// image - image to detect objects in
// detector - Latent SVM detector in internal representation
// storage - memory storage to store the resultant sequence
// of the object candidate rectangles
// overlap_threshold - threshold for the non-maximum suppression algorithm [here will be the reference to original paper]
// sequence of detected objects (bounding boxes and confidence levels stored in CvObjectDetection structures)
CvSeq* cvLatentSvmDetectObjects(IplImage* image,
CvLatentSvmDetector* detector,
CvMemStorage* storage,
float overlap_threshold)
featurePyramid *H = 0;
CvPoint *points = 0, *oppPoints = 0;
int kPoints = 0;
float *score = 0;
unsigned int maxXBorder = 0, maxYBorder = 0;
int numBoxesOut = 0;
CvPoint *pointsOut = 0;
CvPoint *oppPointsOut = 0;
float *scoreOut = 0;
CvSeq* result_seq = 0;
cvConvertImage(image, image, CV_CVTIMG_SWAP_RB);
// Getting maximum filter dimensions
getMaxFilterDims((const filterObject**)(detector->filters), detector->num_components, detector->num_part_filters, &maxXBorder, &maxYBorder);
// Create feature pyramid with nullable border
H = createFeaturePyramidWithBorder(image, maxXBorder, maxYBorder);
// Search object
searchObjectThresholdSomeComponents(H, (const filterObject**)(detector->filters), detector->num_components,
detector->num_part_filters, detector->b, detector->score_threshold,
&points, &oppPoints, &score, &kPoints);
// Clipping boxes
clippingBoxes(image->width, image->height, points, kPoints);
clippingBoxes(image->width, image->height, oppPoints, kPoints);
// NMS procedure
nonMaximumSuppression(kPoints, points, oppPoints, score, overlap_threshold,
&numBoxesOut, &pointsOut, &oppPointsOut, &scoreOut);
result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvObjectDetection), storage );
for (int i = 0; i < numBoxesOut; i++)
CvObjectDetection detection = {{0, 0, 0, 0}, 0};
detection.score = scoreOut[i];
CvRect bounding_box = {0, 0, 0, 0};
bounding_box.x = pointsOut[i].x;
bounding_box.y = pointsOut[i].y;
bounding_box.width = oppPointsOut[i].x - pointsOut[i].x;
bounding_box.height = oppPointsOut[i].y - pointsOut[i].y;
detection.rect = bounding_box;
cvSeqPush(result_seq, &detection);
cvConvertImage(image, image, CV_CVTIMG_SWAP_RB);
return result_seq;

@ -0,0 +1,800 @@
#include <stdio.h>
#include "string.h"
#include "_lsvmparser.h"
int isMODEL (char *str){
char stag [] = "<Model>";
char etag [] = "</Model>";
if(strcmp(stag, str) == 0)return MODEL;
if(strcmp(etag, str) == 0)return EMODEL;
return 0;
int isP (char *str){
char stag [] = "<P>";
char etag [] = "</P>";
if(strcmp(stag, str) == 0)return P;
if(strcmp(etag, str) == 0)return EP;
return 0;
int isSCORE (char *str){
char stag [] = "<ScoreThreshold>";
char etag [] = "</ScoreThreshold>";
if(strcmp(stag, str) == 0)return SCORE;
if(strcmp(etag, str) == 0)return ESCORE;
return 0;
int isCOMP (char *str){
char stag [] = "<Component>";
char etag [] = "</Component>";
if(strcmp(stag, str) == 0)return COMP;
if(strcmp(etag, str) == 0)return ECOMP;
return 0;
int isRFILTER (char *str){
char stag [] = "<RootFilter>";
char etag [] = "</RootFilter>";
if(strcmp(stag, str) == 0)return RFILTER;
if(strcmp(etag, str) == 0)return ERFILTER;
return 0;
int isPFILTERs (char *str){
char stag [] = "<PartFilters>";
char etag [] = "</PartFilters>";
if(strcmp(stag, str) == 0)return PFILTERs;
if(strcmp(etag, str) == 0)return EPFILTERs;
return 0;
int isPFILTER (char *str){
char stag [] = "<PartFilter>";
char etag [] = "</PartFilter>";
if(strcmp(stag, str) == 0)return PFILTER;
if(strcmp(etag, str) == 0)return EPFILTER;
return 0;
int isSIZEX (char *str){
char stag [] = "<sizeX>";
char etag [] = "</sizeX>";
if(strcmp(stag, str) == 0)return SIZEX;
if(strcmp(etag, str) == 0)return ESIZEX;
return 0;
int isSIZEY (char *str){
char stag [] = "<sizeY>";
char etag [] = "</sizeY>";
if(strcmp(stag, str) == 0)return SIZEY;
if(strcmp(etag, str) == 0)return ESIZEY;
return 0;
int isWEIGHTS (char *str){
char stag [] = "<Weights>";
char etag [] = "</Weights>";
if(strcmp(stag, str) == 0)return WEIGHTS;
if(strcmp(etag, str) == 0)return EWEIGHTS;
return 0;
int isV (char *str){
char stag [] = "<V>";
char etag [] = "</V>";
if(strcmp(stag, str) == 0)return TAGV;
if(strcmp(etag, str) == 0)return ETAGV;
return 0;
int isVx (char *str){
char stag [] = "<Vx>";
char etag [] = "</Vx>";
if(strcmp(stag, str) == 0)return Vx;
if(strcmp(etag, str) == 0)return EVx;
return 0;
int isVy (char *str){
char stag [] = "<Vy>";
char etag [] = "</Vy>";
if(strcmp(stag, str) == 0)return Vy;
if(strcmp(etag, str) == 0)return EVy;
return 0;
int isD (char *str){
char stag [] = "<Penalty>";
char etag [] = "</Penalty>";
if(strcmp(stag, str) == 0)return TAGD;
if(strcmp(etag, str) == 0)return ETAGD;
return 0;
int isDx (char *str){
char stag [] = "<dx>";
char etag [] = "</dx>";
if(strcmp(stag, str) == 0)return Dx;
if(strcmp(etag, str) == 0)return EDx;
return 0;
int isDy (char *str){
char stag [] = "<dy>";
char etag [] = "</dy>";
if(strcmp(stag, str) == 0)return Dy;
if(strcmp(etag, str) == 0)return EDy;
return 0;
int isDxx (char *str){
char stag [] = "<dxx>";
char etag [] = "</dxx>";
if(strcmp(stag, str) == 0)return Dxx;
if(strcmp(etag, str) == 0)return EDxx;
return 0;
int isDyy (char *str){
char stag [] = "<dyy>";
char etag [] = "</dyy>";
if(strcmp(stag, str) == 0)return Dyy;
if(strcmp(etag, str) == 0)return EDyy;
return 0;
int isB (char *str){
char stag [] = "<LinearTerm>";
char etag [] = "</LinearTerm>";
if(strcmp(stag, str) == 0)return BTAG;
if(strcmp(etag, str) == 0)return EBTAG;
return 0;
int getTeg(char *str){
int sum = 0;
sum = isMODEL (str)+
isP (str)+
isSCORE (str)+
isCOMP (str)+
isRFILTER (str)+
isPFILTERs (str)+
isPFILTER (str)+
isSIZEX (str)+
isSIZEY (str)+
isWEIGHTS (str)+
isV (str)+
isVx (str)+
isVy (str)+
isD (str)+
isDx (str)+
isDy (str)+
isDxx (str)+
isDyy (str)+
isB (str);
return sum;
void addFilter(filterObject *** model, int *last, int *max){
filterObject ** nmodel;
int i;
(*last) ++;
if((*last) >= (*max)){
(*max) += 10;
nmodel = (filterObject **)malloc(sizeof(filterObject *) * (*max));
for(i = 0; i < *last; i++){
nmodel[i] = (* model)[i];
free(* model);
(*model) = nmodel;
(*model) [(*last)] = (filterObject *)malloc(sizeof(filterObject));
void parserRFilter (FILE * xmlf, int p, filterObject * model, float *b){
int st = 0;
int sizeX, sizeY;
int tag;
int tagVal;
char ch;
int i,j,ii;
char buf[1024];
char tagBuf[1024];
double *data;
model->V.x = 0;
model->V.y = 0;
model->V.l = 0;
model->fineFunction[0] = 0.0;
model->fineFunction[1] = 0.0;
model->fineFunction[2] = 0.0;
model->fineFunction[3] = 0.0;
i = 0;
j = 0;
st = 0;
tag = 0;
ch = fgetc( xmlf );
if(ch == '<'){
tag = 1;
j = 1;
tagBuf[j - 1] = ch;
}else {
if(ch == '>'){
tagBuf[j ] = ch;
tagBuf[j + 1] = '\0';
tagVal = getTeg(tagBuf);
if(tagVal == ERFILTER){
if(tagVal == SIZEX){
st = 1;
i = 0;
if(tagVal == ESIZEX){
st = 0;
buf[i] = '\0';
sizeX = atoi(buf);
model->sizeX = sizeX;
//printf("<sizeX>%d</sizeX>\n", sizeX);
if(tagVal == SIZEY){
st = 1;
i = 0;
if(tagVal == ESIZEY){
st = 0;
buf[i] = '\0';
sizeY = atoi(buf);
model->sizeY = sizeY;
//printf("<sizeY>%d</sizeY>\n", sizeY);
if(tagVal == WEIGHTS){
data = (double *)malloc( sizeof(double) * p * sizeX * sizeY);
fread(data, sizeof(double), p * sizeX * sizeY, xmlf);
model->H = (float *)malloc(sizeof(float)* p * sizeX * sizeY);
for(ii = 0; ii < p * sizeX * sizeY; ii++){
model->H[ii] = (float)data[ii];
if(tagVal == EWEIGHTS){
//printf("WEIGHTS OK\n");
if(tagVal == BTAG){
st = 1;
i = 0;
if(tagVal == EBTAG){
st = 0;
buf[i] = '\0';
*b =(float) atof(buf);
//printf("<B>%f</B>\n", *b);
tag = 0;
i = 0;
if((tag == 0)&& (st == 1)){
buf[i] = ch; i++;
tagBuf[j] = ch; j++;
void parserV (FILE * xmlf, int p, filterObject * model){
int st = 0;
int tag;
int tagVal;
char ch;
int i,j;
char buf[1024];
char tagBuf[1024];
//printf(" <V>\n");
i = 0;
j = 0;
st = 0;
tag = 0;
ch = fgetc( xmlf );
if(ch == '<'){
tag = 1;
j = 1;
tagBuf[j - 1] = ch;
}else {
if(ch == '>'){
tagBuf[j ] = ch;
tagBuf[j + 1] = '\0';
tagVal = getTeg(tagBuf);
if(tagVal == ETAGV){
//printf(" </V>\n");
if(tagVal == Vx){
st = 1;
i = 0;
if(tagVal == EVx){
st = 0;
buf[i] = '\0';
model->V.x = atoi(buf);
//printf(" <Vx>%d</Vx>\n", model->V.x);
if(tagVal == Vy){
st = 1;
i = 0;
if(tagVal == EVy){
st = 0;
buf[i] = '\0';
model->V.y = atoi(buf);
//printf(" <Vy>%d</Vy>\n", model->V.y);
tag = 0;
i = 0;
if((tag == 0)&& (st == 1)){
buf[i] = ch; i++;
tagBuf[j] = ch; j++;
void parserD (FILE * xmlf, int p, filterObject * model){
int st = 0;
int tag;
int tagVal;
char ch;
int i,j;
char buf[1024];
char tagBuf[1024];
//printf(" <D>\n");
i = 0;
j = 0;
st = 0;
tag = 0;
ch = fgetc( xmlf );
if(ch == '<'){
tag = 1;
j = 1;
tagBuf[j - 1] = ch;
}else {
if(ch == '>'){
tagBuf[j ] = ch;
tagBuf[j + 1] = '\0';
tagVal = getTeg(tagBuf);
if(tagVal == ETAGD){
//printf(" </D>\n");
if(tagVal == Dx){
st = 1;
i = 0;
if(tagVal == EDx){
st = 0;
buf[i] = '\0';
model->fineFunction[0] = (float)atof(buf);
//printf(" <Dx>%f</Dx>\n", model->fineFunction[0]);
if(tagVal == Dy){
st = 1;
i = 0;
if(tagVal == EDy){
st = 0;
buf[i] = '\0';
model->fineFunction[1] = (float)atof(buf);
//printf(" <Dy>%f</Dy>\n", model->fineFunction[1]);
if(tagVal == Dxx){
st = 1;
i = 0;
if(tagVal == EDxx){
st = 0;
buf[i] = '\0';
model->fineFunction[2] = (float)atof(buf);
//printf(" <Dxx>%f</Dxx>\n", model->fineFunction[2]);
if(tagVal == Dyy){
st = 1;
i = 0;
if(tagVal == EDyy){
st = 0;
buf[i] = '\0';
model->fineFunction[3] = (float)atof(buf);
//printf(" <Dyy>%f</Dyy>\n", model->fineFunction[3]);
tag = 0;
i = 0;
if((tag == 0)&& (st == 1)){
buf[i] = ch; i++;
tagBuf[j] = ch; j++;
void parserPFilter (FILE * xmlf, int p, int N_path, filterObject * model){
int st = 0;
int sizeX, sizeY;
int tag;
int tagVal;
char ch;
int i,j, ii;
char buf[1024];
char tagBuf[1024];
double *data;
//printf("<PathFilter> (%d)\n", N_path);
model->V.x = 0;
model->V.y = 0;
model->V.l = 0;
model->fineFunction[0] = 0.0f;
model->fineFunction[1] = 0.0f;
model->fineFunction[2] = 0.0f;
model->fineFunction[3] = 0.0f;
i = 0;
j = 0;
st = 0;
tag = 0;
ch = fgetc( xmlf );
if(ch == '<'){
tag = 1;
j = 1;
tagBuf[j - 1] = ch;
}else {
if(ch == '>'){
tagBuf[j ] = ch;
tagBuf[j + 1] = '\0';
tagVal = getTeg(tagBuf);
if(tagVal == EPFILTER){
if(tagVal == TAGV){
parserV(xmlf, p, model);
if(tagVal == TAGD){
parserD(xmlf, p, model);
if(tagVal == SIZEX){
st = 1;
i = 0;
if(tagVal == ESIZEX){
st = 0;
buf[i] = '\0';
sizeX = atoi(buf);
model->sizeX = sizeX;
//printf("<sizeX>%d</sizeX>\n", sizeX);
if(tagVal == SIZEY){
st = 1;
i = 0;
if(tagVal == ESIZEY){
st = 0;
buf[i] = '\0';
sizeY = atoi(buf);
model->sizeY = sizeY;
//printf("<sizeY>%d</sizeY>\n", sizeY);
if(tagVal == WEIGHTS){
data = (double *)malloc( sizeof(double) * p * sizeX * sizeY);
fread(data, sizeof(double), p * sizeX * sizeY, xmlf);
model->H = (float *)malloc(sizeof(float)* p * sizeX * sizeY);
for(ii = 0; ii < p * sizeX * sizeY; ii++){
model->H[ii] = (float)data[ii];
if(tagVal == EWEIGHTS){
//printf("WEIGHTS OK\n");
tag = 0;
i = 0;
if((tag == 0)&& (st == 1)){
buf[i] = ch; i++;
tagBuf[j] = ch; j++;
void parserPFilterS (FILE * xmlf, int p, filterObject *** model, int *last, int *max){
int st = 0;
int N_path = 0;
int tag;
int tagVal;
char ch;
int i,j;
char buf[1024];
char tagBuf[1024];
i = 0;
j = 0;
st = 0;
tag = 0;
ch = fgetc( xmlf );
if(ch == '<'){
tag = 1;
j = 1;
tagBuf[j - 1] = ch;
}else {
if(ch == '>'){
tagBuf[j ] = ch;
tagBuf[j + 1] = '\0';
tagVal = getTeg(tagBuf);
if(tagVal == EPFILTERs){
if(tagVal == PFILTER){
addFilter(model, last, max);
parserPFilter (xmlf, p, N_path, (*model)[*last]);
tag = 0;
i = 0;
if((tag == 0)&& (st == 1)){
buf[i] = ch; i++;
tagBuf[j] = ch; j++;
void parserComp (FILE * xmlf, int p, int *N_comp, filterObject *** model, float *b, int *last, int *max){
int st = 0;
int tag;
int tagVal;
char ch;
int i,j;
char buf[1024];
char tagBuf[1024];
//printf("<Component> %d\n", *N_comp);
i = 0;
j = 0;
st = 0;
tag = 0;
ch = fgetc( xmlf );
if(ch == '<'){
tag = 1;
j = 1;
tagBuf[j - 1] = ch;
}else {
if(ch == '>'){
tagBuf[j ] = ch;
tagBuf[j + 1] = '\0';
tagVal = getTeg(tagBuf);
if(tagVal == ECOMP){
(*N_comp) ++;
if(tagVal == RFILTER){
addFilter(model, last, max);
parserRFilter (xmlf, p, (*model)[*last],b);
if(tagVal == PFILTERs){
parserPFilterS (xmlf, p, model, last, max);
tag = 0;
i = 0;
if((tag == 0)&& (st == 1)){
buf[i] = ch; i++;
tagBuf[j] = ch; j++;
void parserModel(FILE * xmlf, filterObject *** model, int *last, int *max, int **comp, float **b, int *count, float * score){
int p = 0;
int N_comp = 0;
int * cmp;
float *bb;
int st = 0;
int tag;
int tagVal;
char ch;
int i,j, ii = 0;
char buf[1024];
char tagBuf[1024];
i = 0;
j = 0;
st = 0;
tag = 0;
ch = fgetc( xmlf );
if(ch == '<'){
tag = 1;
j = 1;
tagBuf[j - 1] = ch;
}else {
if(ch == '>'){
tagBuf[j ] = ch;
tagBuf[j + 1] = '\0';
tagVal = getTeg(tagBuf);
if(tagVal == EMODEL){
for(ii = 0; ii <= *last; ii++){
(*model)[ii]->p = p;
(*model)[ii]->xp = 9;
* count = N_comp;
if(tagVal == COMP){
if(N_comp == 0){
cmp = (int *)malloc(sizeof(int));
bb = (float *)malloc(sizeof(float));
* comp = cmp;
* b = bb;
* count = N_comp + 1;
} else {
cmp = (int *)malloc(sizeof(int) * (N_comp + 1));
bb = (float *)malloc(sizeof(float) * (N_comp + 1));
for(ii = 0; ii < N_comp; ii++){
cmp[i] = (* comp)[ii];
bb [i] = (* b )[ii];
free(* comp);
free(* b );
* comp = cmp;
* b = bb;
* count = N_comp + 1;
parserComp(xmlf, p, &N_comp, model, &((*b)[N_comp]), last, max);
cmp[N_comp - 1] = *last;
if(tagVal == P){
st = 1;
i = 0;
if(tagVal == EP){
st = 0;
buf[i] = '\0';
p = atoi(buf);
//printf("<P>%d</P>\n", p);
if(tagVal == SCORE){
st = 1;
i = 0;
if(tagVal == ESCORE){
st = 0;
buf[i] = '\0';
*score = (float)atof(buf);
//printf("<ScoreThreshold>%f</ScoreThreshold>\n", score);
tag = 0;
i = 0;
if((tag == 0)&& (st == 1)){
buf[i] = ch; i++;
tagBuf[j] = ch; j++;
void LSVMparser(const char * filename, filterObject *** model, int *last, int *max, int **comp, float **b, int *count, float * score){
int st = 0;
int tag;
char ch;
int i,j;
FILE *xmlf;
char buf[1024];
char tagBuf[1024];
(*max) = 10;
(*last) = -1;
(*model) = (filterObject ** )malloc((sizeof(filterObject * )) * (*max));
//printf("parse : %s\n", filename);
xmlf = fopen(filename, "rb");
i = 0;
j = 0;
st = 0;
tag = 0;
ch = fgetc( xmlf );
if(ch == '<'){
tag = 1;
j = 1;
tagBuf[j - 1] = ch;
}else {
if(ch == '>'){
tag = 0;
i = 0;
tagBuf[j ] = ch;
tagBuf[j + 1] = '\0';
if(getTeg(tagBuf) == MODEL){
parserModel(xmlf, model, last, max, comp, b, count, score);
if(tag == 0){
buf[i] = ch; i++;
tagBuf[j] = ch; j++;
int loadModel(
// Âõîäíûå ïàðàìåòðû
const char *modelPath,// - ïóòü äî ôàéëà ñ ìîäåëüþ
// Âûõîäíûå ïàðàìåòðû
filterObject ***filters,// - ìàññèâ óêàçàòåëåé íà ôèëüòðû êîìïîíåíò
int *kFilters, //- îáùåå êîëè÷åñòâî ôèëüòðîâ âî âñåõ ìîäåëÿõ
int *kComponents, //- êîëè÷åñòâî êîìïîíåíò
int **kPartFilters, //- ìàññèâ, ñîäåðæàùèé êîëè÷åñòâî òî÷íûõ ôèëüòðîâ â êàæäîé êîìïîíåíòå
float **b, //- ìàññèâ ëèíåéíûõ ÷ëåíîâ â îöåíî÷íîé ôóíêöèè
float *scoreThreshold){ //- ïîðîã äëÿ score)
int last;
int max;
int *comp;
int count;
int i;
float score;
LSVMparser(modelPath, filters, &last, &max, &comp, b, &count, &score);
(*kFilters) = last + 1;
(*kComponents) = count;
(*scoreThreshold) = (float) score;
(*kPartFilters) = (int *)malloc(sizeof(int) * count);
for(i = 1; i < count;i++){
(*kPartFilters)[i] = (comp[i] - comp[i - 1]) - 1;
(*kPartFilters)[0] = comp[0];
return 0;

File diff suppressed because it is too large Load Diff

@ -54,6 +54,8 @@
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/core/core_c.h"
#include "opencv2/highgui/highgui_c.h"
#include "opencv2/core/internal.hpp"

@ -0,0 +1,244 @@
#include "_resizeimg.h"
#include <stdio.h>
#include <assert.h>
#include <math.h>
IplImage * resize_opencv (IplImage * img, float scale){
IplImage * imgTmp;
int W, H, tW, tH;
W = img->width;
H = img->height;
tW = (int)(((float)W) * scale + 0.5);
tH = (int)(((float)H) * scale + 0.5);
imgTmp = cvCreateImage(cvSize(tW , tH), img->depth, img->nChannels);
return imgTmp;
// * Fast image subsampling.
// * This is used to construct the feature pyramid.
// */
//// struct used for caching interpolation values
//typedef struct {
// int si, di;
// float alpha;
//// copy src into dst using pre-computed interpolation values
//void alphacopy(float *src, float *dst, alphainfo *ofs, int n) {
// int i;
// for(i = 0; i < n; i++){
// dst[ofs[i].di] += ofs[i].alpha * src[ofs[i].si];
// }
//int round(float val){
// return (int)(val + 0.5);
//void bzero(float * arr, int cnt){
// int i;
// for(i = 0; i < cnt; i++){
// arr[i] = 0.0f;
// }
//// resize along each column
//// result is transposed, so we can apply it twice for a complete resize
//void resize1dtran(float *src, int sheight, float *dst, int dheight,
// int width, int chan) {
// alphainfo *ofs;
// float scale = (float)dheight/(float)sheight;
// float invscale = (float)sheight/(float)dheight;
// // we cache the interpolation values since they can be
// // shared among different columns
// int len = (int)ceilf(dheight*invscale) + 2*dheight;
// int k = 0;
// int dy;
// float fsy1;
// float fsy2;
// int sy1;
// int sy2;
// int sy;
// int c, x;
// float *s, *d;
// ofs = (alphainfo *) malloc (sizeof(alphainfo) * len);
// for (dy = 0; dy < dheight; dy++) {
// fsy1 = dy * invscale;
// fsy2 = fsy1 + invscale;
// sy1 = (int)ceilf(fsy1);
// sy2 = (int)floorf(fsy2);
// if (sy1 - fsy1 > 1e-3) {
// assert(k < len);
// assert(sy1 - 1 >= 0);
// ofs[k].di = dy*width;
// ofs[k].si = sy1-1;
// ofs[k++].alpha = (sy1 - fsy1) * scale;
// }
// for (sy = sy1; sy < sy2; sy++) {
// assert(k < len);
// assert(sy < sheight);
// ofs[k].di = dy*width;
// ofs[k].si = sy;
// ofs[k++].alpha = scale;
// }
// if (fsy2 - sy2 > 1e-3) {
// assert(k < len);
// assert(sy2 < sheight);
// ofs[k].di = dy*width;
// ofs[k].si = sy2;
// ofs[k++].alpha = (fsy2 - sy2) * scale;
// }
// }
// // resize each column of each color channel
// bzero(dst, chan*width*dheight);
// for (c = 0; c < chan; c++) {
// for (x = 0; x < width; x++) {
// s = src + c*width*sheight + x*sheight;
// d = dst + c*width*dheight + x;
// alphacopy(s, d, ofs, k);
// }
// }
// free(ofs);
//IplImage * resize_article_dp(IplImage * img, float scale, const int k){
// IplImage * imgTmp;
// float W, H;
// unsigned char *dataSrc;
// float * dataf;
// float *src, *dst, *tmp;
// int i, j, kk, channels;
// int index;
// int widthStep;
// int tW, tH;
// W = (float)img->width;
// H = (float)img->height;
// channels = img->nChannels;
// widthStep = img->widthStep;
// tW = (int)(((float)W) * scale + 0.5f);
// tH = (int)(((float)H) * scale + 0.5f);
// src = (float *)malloc(sizeof(float) * (int)(W * H * 3));
// dataSrc = (unsigned char*)(img->imageData);
// index = 0;
// for (kk = 0; kk < channels; kk++)
// {
// for (i = 0; i < W; i++)
// {
// for (j = 0; j < H; j++)
// {
// src[index++] = (float)dataSrc[j * widthStep + i * channels + kk];
// }
// }
// }
// imgTmp = cvCreateImage(cvSize(tW , tH), IPL_DEPTH_32F, channels);
// dst = (float *)malloc(sizeof(float) * (int)(tH * tW) * channels);
// tmp = (float *)malloc(sizeof(float) * (int)(tH * W) * channels);
// resize1dtran(src, (int)H, tmp, (int)tH, (int)W , 3);
// resize1dtran(tmp, (int)W, dst, (int)tW, (int)tH, 3);
// index = 0;
// //dataf = (float*)imgTmp->imageData;
// for (kk = 0; kk < channels; kk++)
// {
// for (i = 0; i < tW; i++)
// {
// for (j = 0; j < tH; j++)
// {
// dataf = (float*)(imgTmp->imageData + j * imgTmp->widthStep);
// dataf[ i * channels + kk] = dst[index++];
// }
// }
// }
// free(src);
// free(dst);
// free(tmp);
// return imgTmp;
//IplImage * resize_article_dp1(IplImage * img, float scale, const int k){
// IplImage * imgTmp;
// float W, H;
// float * dataf;
// float *src, *dst, *tmp;
// int i, j, kk, channels;
// int index;
// int widthStep;
// int tW, tH;
// W = (float)img->width;
// H = (float)img->height;
// channels = img->nChannels;
// widthStep = img->widthStep;
// tW = (int)(((float)W) * scale + 0.5f);
// tH = (int)(((float)H) * scale + 0.5f);
// src = (float *)malloc(sizeof(float) * (int)(W * H) * 3);
// index = 0;
// for (kk = 0; kk < channels; kk++)
// {
// for (i = 0; i < W; i++)
// {
// for (j = 0; j < H; j++)
// {
// src[index++] = (float)(*( (float *)(img->imageData + j * widthStep) + i * channels + kk));
// }
// }
// }
// imgTmp = cvCreateImage(cvSize(tW , tH), IPL_DEPTH_32F, channels);
// dst = (float *)malloc(sizeof(float) * (int)(tH * tW) * channels);
// tmp = (float *)malloc(sizeof(float) * (int)(tH * W) * channels);
// resize1dtran(src, (int)H, tmp, (int)tH, (int)W , 3);
// resize1dtran(tmp, (int)W, dst, (int)tW, (int)tH, 3);
// index = 0;
// for (kk = 0; kk < channels; kk++)
// {
// for (i = 0; i < tW; i++)
// {
// for (j = 0; j < tH; j++)
// {
// dataf = (float *)(imgTmp->imageData + j * imgTmp->widthStep);
// dataf[ i * channels + kk] = dst[index++];
// }
// }
// }
// free(src);
// free(dst);
// free(tmp);
// return imgTmp;

@ -0,0 +1,103 @@
#include "_routine.h"
int allocFilterObject(filterObject **obj, const int sizeX, const int sizeY, const int p, const int xp){
int i;
(*obj) = (filterObject *)malloc(sizeof(filterObject));
(*obj)->sizeX = sizeX;
(*obj)->sizeY = sizeY;
(*obj)->p = p ;
(*obj)->xp = xp ;
(*obj)->fineFunction[0] = 0.0f;
(*obj)->fineFunction[1] = 0.0f;
(*obj)->fineFunction[2] = 0.0f;
(*obj)->fineFunction[3] = 0.0f;
(*obj)->V.x = 0;
(*obj)->V.y = 0;
(*obj)->V.l = 0;
(*obj)->H = (float *) malloc(sizeof (float) * (sizeX * sizeY * p));
for(i = 0; i < sizeX * sizeY * p; i++){
(*obj)->H[i] = 0.0f;
int freeFilterObject (filterObject **obj){
if(*obj == NULL) return 0;
(*obj) = NULL;
int allocFeatureMapObject(featureMap **obj, const int sizeX, const int sizeY, const int p, const int xp){
int i;
(*obj) = (featureMap *)malloc(sizeof(featureMap));
(*obj)->sizeX = sizeX;
(*obj)->sizeY = sizeY;
(*obj)->p = p ;
(*obj)->xp = xp ;
(*obj)->Map = (float *) malloc(sizeof (float) * (sizeX * sizeY * p));
for(i = 0; i < sizeX * sizeY * p; i++){
(*obj)->Map[i] = 0.0;
int freeFeatureMapObject (featureMap **obj){
if(*obj == NULL) return 0;
(*obj) = NULL;
int allocFeaturePyramidObject(featurePyramid **obj, const int lambda, const int countLevel){
(*obj) = (featurePyramid *)malloc(sizeof(featurePyramid));
(*obj)->countLevel = countLevel;
(*obj)->pyramid = (featureMap **)malloc(sizeof(featureMap *) * countLevel);
(*obj)->lambda = lambda;
int freeFeaturePyramidObject (featurePyramid **obj){
int i;
if(*obj == NULL) return 0;
for(i = 0; i < (*obj)->countLevel; i++)
(*obj) = NULL;
int allocFFTImage(fftImage **image, int p, int dimX, int dimY)
int i, j, size;
*image = (fftImage *)malloc(sizeof(fftImage));
(*image)->p = p;
(*image)->dimX = dimX;
(*image)->dimY = dimY;
(*image)->channels = (float **)malloc(sizeof(float *) * p);
size = 2 * dimX * dimY;
for (i = 0; i < p; i++)
(*image)->channels[i] = (float *)malloc(sizeof(float) * size);
for (j = 0; j < size; j++)
(*image)->channels[i][j] = 0.0;
int freeFFTImage(fftImage **image)
unsigned int i;
if (*image == NULL) return LATENT_SVM_OK;
for (i = 0; i < (*image)->p; i++)
(*image)->channels[i] = NULL;
(*image)->channels = NULL;

Binary file not shown.


Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

@ -0,0 +1,49 @@
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/core/core_c.h"
#include "opencv2/highgui/highgui_c.h"
#include <stdio.h>
using namespace cv;
const char* model_filename = "cat.xml";
const char* image_filename = "000028.jpg";
void detect_and_draw_objects( IplImage* image, CvLatentSvmDetector* detector)
CvMemStorage* storage = cvCreateMemStorage(0);
CvSeq* detections = 0;
int i = 0;
int64 start = 0, finish = 0;
start = cvGetTickCount();
detections = cvLatentSvmDetectObjects(image, detector, storage);
finish = cvGetTickCount();
printf("detection time = %.3f\n", (float)(finish - start) / (float)(cvGetTickFrequency() * 1000000.0));
for( i = 0; i < detections->total; i++ )
CvObjectDetection detection = *(CvObjectDetection*)cvGetSeqElem( detections, i );
CvRect bounding_box = detection.rect;
cvRectangle( image, cvPoint(bounding_box.x, bounding_box.y),
cvPoint(bounding_box.x + bounding_box.width,
bounding_box.y + bounding_box.height),
CV_RGB(255,0,0), 3 );
cvReleaseMemStorage( &storage );
int main(int argc, char* argv[])
IplImage* image = cvLoadImage(image_filename);
CvLatentSvmDetector* detector = cvLoadLatentSvmDetector(model_filename);
detect_and_draw_objects( image, detector );
cvNamedWindow( "test", 0 );
cvShowImage( "test", image );
cvReleaseLatentSvmDetector( &detector );
cvReleaseImage( &image );
return 0;