@ -137,47 +137,22 @@ struct CvHidHaarClassifierCascade
} ;
typedef struct
{
//int rows;
//int ystep;
int width_height ;
//int height;
int grpnumperline_totalgrp ;
//int totalgrp;
int imgoff ;
float factor ;
} detect_piramid_info ;
# if defined WIN32 && !defined __MINGW__ && !defined __MINGW32__
# ifdef WIN32
# define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT))
typedef _ALIGNED_ON ( 128 ) struct GpuHidHaarFeature
{
_ALIGNED_ON ( 32 ) struct
{
_ALIGNED_ON ( 4 ) int p0 ;
_ALIGNED_ON ( 4 ) int p1 ;
_ALIGNED_ON ( 4 ) int p2 ;
_ALIGNED_ON ( 4 ) int p3 ;
_ALIGNED_ON ( 4 ) float weight ;
}
/*_ALIGNED_ON(32)*/ rect [ CV_HAAR_FEATURE_MAX ] ;
}
GpuHidHaarFeature ;
typedef _ALIGNED_ON ( 128 ) struct GpuHidHaarTreeNode
{
_ALIGNED_ON ( 64 ) int p [ CV_HAAR_FEATURE_MAX ] [ 4 ] ;
//_ALIGNED_ON(16) int p1[CV_HAAR_FEATURE_MAX] ;
//_ALIGNED_ON(16) int p2[CV_HAAR_FEATURE_MAX] ;
//_ALIGNED_ON(16) int p3[CV_HAAR_FEATURE_MAX] ;
/*_ALIGNED_ON(16)*/
float weight [ CV_HAAR_FEATURE_MAX ] ;
/*_ALIGNED_ON(4)*/
float threshold ;
_ALIGNED_ON ( 8 ) float alpha [ 2 ] ;
_ALIGNED_ON ( 16 ) float alpha [ 3 ] ;
_ALIGNED_ON ( 4 ) int left ;
_ALIGNED_ON ( 4 ) int right ;
// GpuHidHaarFeature feature __attribute__((aligned (128)));
}
GpuHidHaarTreeNode ;
@ -185,7 +160,6 @@ GpuHidHaarTreeNode;
typedef _ALIGNED_ON ( 32 ) struct GpuHidHaarClassifier
{
_ALIGNED_ON ( 4 ) int count ;
//CvHaarFeature* orig_feature;
_ALIGNED_ON ( 8 ) GpuHidHaarTreeNode * node ;
_ALIGNED_ON ( 8 ) float * alpha ;
}
@ -220,32 +194,16 @@ typedef _ALIGNED_ON(64) struct GpuHidHaarClassifierCascade
_ALIGNED_ON ( 4 ) int p2 ;
_ALIGNED_ON ( 4 ) int p3 ;
_ALIGNED_ON ( 4 ) float inv_window_area ;
// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
} GpuHidHaarClassifierCascade ;
# else
# define _ALIGNED_ON(_ALIGNMENT) __attribute__((aligned(_ALIGNMENT) ))
typedef struct _ALIGNED_ON ( 128 ) GpuHidHaarFeature
{
struct _ALIGNED_ON ( 32 )
{
int p0 _ALIGNED_ON ( 4 ) ;
int p1 _ALIGNED_ON ( 4 ) ;
int p2 _ALIGNED_ON ( 4 ) ;
int p3 _ALIGNED_ON ( 4 ) ;
float weight _ALIGNED_ON ( 4 ) ;
}
rect [ CV_HAAR_FEATURE_MAX ] _ALIGNED_ON ( 32 ) ;
}
GpuHidHaarFeature ;
typedef struct _ALIGNED_ON ( 128 ) GpuHidHaarTreeNode
{
int p [ CV_HAAR_FEATURE_MAX ] [ 4 ] _ALIGNED_ON ( 64 ) ;
float weight [ CV_HAAR_FEATURE_MAX ] ; // _ALIGNED_ON(16);
float threshold ; // _ALIGNED_ON(4);
float alpha [ 2 ] _ALIGNED_ON ( 8 ) ;
float alpha [ 3 ] _ALIGNED_ON ( 16 ) ;
int left _ALIGNED_ON ( 4 ) ;
int right _ALIGNED_ON ( 4 ) ;
}
@ -288,7 +246,6 @@ typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade
int p2 _ALIGNED_ON ( 4 ) ;
int p3 _ALIGNED_ON ( 4 ) ;
float inv_window_area _ALIGNED_ON ( 4 ) ;
// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
} GpuHidHaarClassifierCascade ;
# endif
@ -296,36 +253,6 @@ const int icv_object_win_border = 1;
const float icv_stage_threshold_bias = 0.0001f ;
double globaltime = 0 ;
// static CvHaarClassifierCascade * gpuCreateHaarClassifierCascade( int stage_count )
// {
// CvHaarClassifierCascade *cascade = 0;
// int block_size = sizeof(*cascade) + stage_count * sizeof(*cascade->stage_classifier);
// if( stage_count <= 0 )
// CV_Error( CV_StsOutOfRange, "Number of stages should be positive" );
// cascade = (CvHaarClassifierCascade *)cvAlloc( block_size );
// memset( cascade, 0, block_size );
// cascade->stage_classifier = (CvHaarStageClassifier *)(cascade + 1);
// cascade->flags = CV_HAAR_MAGIC_VAL;
// cascade->count = stage_count;
// return cascade;
// }
//static int globalcounter = 0;
// static void gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade )
// {
// if( _cascade && *_cascade )
// {
// cvFree( _cascade );
// }
// }
/* create more efficient internal representation of haar classifier cascade */
static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade ( CvHaarClassifierCascade * cascade , int * size , int * totalclassifier )
{
@ -441,24 +368,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
hid_stage_classifier - > two_rects = 1 ;
haar_classifier_ptr + = stage_classifier - > count ;
/*
hid_stage_classifier - > parent = ( stage_classifier - > parent = = - 1 )
? NULL : stage_classifier_ptr + stage_classifier - > parent ;
hid_stage_classifier - > next = ( stage_classifier - > next = = - 1 )
? NULL : stage_classifier_ptr + stage_classifier - > next ;
hid_stage_classifier - > child = ( stage_classifier - > child = = - 1 )
? NULL : stage_classifier_ptr + stage_classifier - > child ;
out - > is_tree | = hid_stage_classifier - > next ! = NULL ;
*/
for ( j = 0 ; j < stage_classifier - > count ; j + + )
{
CvHaarClassifier * classifier = stage_classifier - > classifier + j ;
GpuHidHaarClassifier * hid_classifier = hid_stage_classifier - > classifier + j ;
int node_count = classifier - > count ;
// float* alpha_ptr = (float*)(haar_node_ptr + node_count);
float * alpha_ptr = & haar_node_ptr - > alpha [ 0 ] ;
hid_classifier - > count = node_count ;
@ -485,16 +400,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
node - > p [ 2 ] [ 3 ] = 0 ;
node - > weight [ 2 ] = 0 ;
}
// memset( &(node->feature.rect[2]), 0, sizeof(node->feature.rect[2]) );
else
hid_stage_classifier - > two_rects = 0 ;
}
memcpy ( alpha_ptr , classifier - > alpha , ( node_count + 1 ) * sizeof ( alpha_ptr [ 0 ] ) ) ;
haar_node_ptr = haar_node_ptr + 1 ;
// (GpuHidHaarTreeNode*)cvAlignPtr(alpha_ptr+node_count+1, sizeof(void*));
// (GpuHidHaarTreeNode*)(alpha_ptr+node_count+1);
memcpy ( node - > alpha , classifier - > alpha , ( node_count + 1 ) * sizeof ( alpha_ptr [ 0 ] ) ) ;
haar_node_ptr = haar_node_ptr + 1 ;
}
out - > is_stump_based & = node_count = = 1 ;
}
}
@ -507,25 +418,19 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
# define sum_elem_ptr(sum,row,col) \
( ( sumtype * ) CV_MAT_ELEM_PTR_FAST ( ( sum ) , ( row ) , ( col ) , sizeof ( sumtype ) ) )
( ( sumtype * ) CV_MAT_ELEM_PTR_FAST ( ( sum ) , ( row ) , ( col ) , sizeof ( sumtype ) ) )
# define sqsum_elem_ptr(sqsum,row,col) \
( ( sqsumtype * ) CV_MAT_ELEM_PTR_FAST ( ( sqsum ) , ( row ) , ( col ) , sizeof ( sqsumtype ) ) )
( ( sqsumtype * ) CV_MAT_ELEM_PTR_FAST ( ( sqsum ) , ( row ) , ( col ) , sizeof ( sqsumtype ) ) )
# define calc_sum(rect,offset) \
( ( rect ) . p0 [ offset ] - ( rect ) . p1 [ offset ] - ( rect ) . p2 [ offset ] + ( rect ) . p3 [ offset ] )
( ( rect ) . p0 [ offset ] - ( rect ) . p1 [ offset ] - ( rect ) . p2 [ offset ] + ( rect ) . p3 [ offset ] )
static void gpuSetImagesForHaarClassifierCascade ( CvHaarClassifierCascade * _cascade ,
/* const CvArr* _sum,
const CvArr * _sqsum ,
const CvArr * _tilted_sum , */
double scale ,
int step )
{
// CvMat sum_stub, *sum = (CvMat*)_sum;
// CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum;
// CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum;
GpuHidHaarClassifierCascade * cascade ;
int coi0 = 0 , coi1 = 0 ;
int i ;
@ -541,61 +446,25 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
if ( scale < = 0 )
CV_Error ( CV_StsOutOfRange , " Scale must be positive " ) ;
// sum = cvGetMat( sum, &sum_stub, &coi0 );
// sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 );
if ( coi0 | | coi1 )
CV_Error ( CV_BadCOI , " COI is not supported " ) ;
// if( !CV_ARE_SIZES_EQ( sum, sqsum ))
// CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
// if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 ||
// CV_MAT_TYPE(sum->type) != CV_32SC1 )
// CV_Error( CV_StsUnsupportedFormat,
// "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
if ( ! _cascade - > hid_cascade )
gpuCreateHidHaarClassifierCascade ( _cascade , & datasize , & total ) ;
cascade = ( GpuHidHaarClassifierCascade * ) _cascade - > hid_cascade ;
stage_classifier = ( GpuHidHaarStageClassifier * ) ( cascade + 1 ) ;
if ( cascade - > has_tilted_features )
{
// tilted = cvGetMat( tilted, &tilted_stub, &coi1 );
// if( CV_MAT_TYPE(tilted->type) != CV_32SC1 )
// CV_Error( CV_StsUnsupportedFormat,
// "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
// if( sum->step != tilted->step )
// CV_Error( CV_StsUnmatchedSizes,
// "Sum and tilted_sum must have the same stride (step, widthStep)" );
// if( !CV_ARE_SIZES_EQ( sum, tilted ))
// CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
// cascade->tilted = *tilted;
}
_cascade - > scale = scale ;
_cascade - > real_window_size . width = cvRound ( _cascade - > orig_window_size . width * scale ) ;
_cascade - > real_window_size . height = cvRound ( _cascade - > orig_window_size . height * scale ) ;
//cascade->sum = *sum;
//cascade->sqsum = *sqsum;
equRect . x = equRect . y = cvRound ( scale ) ;
equRect . width = cvRound ( ( _cascade - > orig_window_size . width - 2 ) * scale ) ;
equRect . height = cvRound ( ( _cascade - > orig_window_size . height - 2 ) * scale ) ;
weight_scale = 1. / ( equRect . width * equRect . height ) ;
cascade - > inv_window_area = weight_scale ;
// cascade->pq0 = equRect.y * step + equRect.x;
// cascade->pq1 = equRect.y * step + equRect.x + equRect.width ;
// cascade->pq2 = (equRect.y + equRect.height)*step + equRect.x;
// cascade->pq3 = (equRect.y + equRect.height)*step + equRect.x + equRect.width ;
cascade - > pq0 = equRect . x ;
cascade - > pq1 = equRect . y ;
cascade - > pq2 = equRect . x + equRect . width ;
@ -618,10 +487,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
{
CvHaarFeature * feature =
& _cascade - > stage_classifier [ i ] . classifier [ j ] . haar_feature [ l ] ;
/* GpuHidHaarClassifier* classifier =
cascade - > stage_classifier [ i ] . classifier + j ; */
//GpuHidHaarFeature* hidfeature =
// &cascade->stage_classifier[i].classifier[j].node[l].feature;
GpuHidHaarTreeNode * hidnode = & stage_classifier [ i ] . classifier [ j ] . node [ l ] ;
double sum0 = 0 , area0 = 0 ;
CvRect r [ 3 ] ;
@ -636,8 +501,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
/* align blocks */
for ( k = 0 ; k < CV_HAAR_FEATURE_MAX ; k + + )
{
//if( !hidfeature->rect[k].p0 )
// break;
if ( ! hidnode - > p [ k ] [ 0 ] )
break ;
r [ k ] = feature - > rect [ k ] . r ;
@ -717,15 +580,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
if ( ! feature - > tilted )
{
/* hidfeature->rect[k].p0 = tr.y * sum->cols + tr.x;
hidfeature - > rect [ k ] . p1 = tr . y * sum - > cols + tr . x + tr . width ;
hidfeature - > rect [ k ] . p2 = ( tr . y + tr . height ) * sum - > cols + tr . x ;
hidfeature - > rect [ k ] . p3 = ( tr . y + tr . height ) * sum - > cols + tr . x + tr . width ;
*/
/*hidnode->p0[k] = tr.y * step + tr.x;
hidnode - > p1 [ k ] = tr . y * step + tr . x + tr . width ;
hidnode - > p2 [ k ] = ( tr . y + tr . height ) * step + tr . x ;
hidnode - > p3 [ k ] = ( tr . y + tr . height ) * step + tr . x + tr . width ; */
hidnode - > p [ k ] [ 0 ] = tr . x ;
hidnode - > p [ k ] [ 1 ] = tr . y ;
hidnode - > p [ k ] [ 2 ] = tr . x + tr . width ;
@ -733,37 +587,24 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
}
else
{
/* hidfeature->rect[k].p2 = (tr.y + tr.width) * tilted->cols + tr.x + tr.width;
hidfeature - > rect [ k ] . p3 = ( tr . y + tr . width + tr . height ) * tilted - > cols + tr . x + tr . width - tr . height ;
hidfeature - > rect [ k ] . p0 = tr . y * tilted - > cols + tr . x ;
hidfeature - > rect [ k ] . p1 = ( tr . y + tr . height ) * tilted - > cols + tr . x - tr . height ;
*/
hidnode - > p [ k ] [ 2 ] = ( tr . y + tr . width ) * step + tr . x + tr . width ;
hidnode - > p [ k ] [ 3 ] = ( tr . y + tr . width + tr . height ) * step + tr . x + tr . width - tr . height ;
hidnode - > p [ k ] [ 0 ] = tr . y * step + tr . x ;
hidnode - > p [ k ] [ 1 ] = ( tr . y + tr . height ) * step + tr . x - tr . height ;
}
//hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio);
hidnode - > weight [ k ] = ( float ) ( feature - > rect [ k ] . weight * correction_ratio ) ;
if ( k = = 0 )
area0 = tr . width * tr . height ;
else
//sum0 += hidfeature->rect[k].weight * tr.width * tr.height;
sum0 + = hidnode - > weight [ k ] * tr . width * tr . height ;
}
// hidfeature->rect[0].weight = (float)(-sum0/area0);
hidnode - > weight [ 0 ] = ( float ) ( - sum0 / area0 ) ;
} /* l */
} /* j */
}
}
static void gpuSetHaarClassifierCascade ( CvHaarClassifierCascade * _cascade
/*double scale=0.0,*/
/*int step*/ )
static void gpuSetHaarClassifierCascade ( CvHaarClassifierCascade * _cascade )
{
GpuHidHaarClassifierCascade * cascade ;
int i ;
@ -817,11 +658,7 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
if ( ! hidnode - > p [ k ] [ 0 ] )
break ;
r [ k ] = feature - > rect [ k ] . r ;
// base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) );
// base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) );
// base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) );
// base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) );
}
}
nr = k ;
for ( k = 0 ; k < nr ; k + + )
@ -839,7 +676,6 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
hidnode - > p [ k ] [ 3 ] = tr . height ;
hidnode - > weight [ k ] = ( float ) ( feature - > rect [ k ] . weight * correction_ratio ) ;
}
//hidnode->weight[0]=(float)(-sum0/area0);
} /* l */
} /* j */
}
@ -852,7 +688,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
const double GROUP_EPS = 0.2 ;
CvSeq * result_seq = 0 ;
cv : : Ptr < CvMemStorage > temp_storage ;
cv : : ConcurrentRectVector allCandidates ;
std : : vector < cv : : Rect > rectList ;
@ -910,6 +745,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
if ( gimg . cols < minSize . width | | gimg . rows < minSize . height )
CV_Error ( CV_StsError , " Image too small " ) ;
cl_command_queue qu = reinterpret_cast < cl_command_queue > ( Context : : getContext ( ) - > oclCommandQueue ( ) ) ;
if ( ( flags & CV_HAAR_SCALE_IMAGE ) )
{
CvSize winSize0 = cascade - > orig_window_size ;
@ -952,7 +788,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
size_t blocksize = 8 ;
size_t localThreads [ 3 ] = { blocksize , blocksize , 1 } ;
size_t globalThreads [ 3 ] = { grp_per_CU * gsum . clCxt - > computeUnits ( ) * localThreads [ 0 ] ,
size_t globalThreads [ 3 ] = { grp_per_CU * ( gsum . clCxt - > computeUnits ( ) ) * localThreads [ 0 ] ,
localThreads [ 1 ] , 1
} ;
int outputsz = 256 * globalThreads [ 0 ] / localThreads [ 0 ] ;
@ -997,7 +833,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
gpuSetImagesForHaarClassifierCascade ( cascade , 1. , gsum . step / 4 ) ;
stagebuffer = openCLCreateBuffer ( gsum . clCxt , CL_MEM_READ_ONLY , sizeof ( GpuHidHaarStageClassifier ) * gcascade - > count ) ;
cl_command_queue qu = ( cl_command_queue ) gsum . clCxt - > oclCommandQueue ( ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , stagebuffer , 1 , 0 , sizeof ( GpuHidHaarStageClassifier ) * gcascade - > count , stage , 0 , NULL , NULL ) ) ;
nodebuffer = openCLCreateBuffer ( gsum . clCxt , CL_MEM_READ_ONLY , nodenum * sizeof ( GpuHidHaarTreeNode ) ) ;
@ -1044,7 +879,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
args . push_back ( make_pair ( sizeof ( cl_int4 ) , ( void * ) & pq ) ) ;
args . push_back ( make_pair ( sizeof ( cl_float ) , ( void * ) & correction ) ) ;
openCLExecuteKernel ( gsum . clCxt , & haarobjectdetect , " gpuRunHaarClassifierCascade " , globalThreads , localThreads , args , - 1 , - 1 ) ;
const char * build_options = gcascade - > is_stump_based ? " -D STUMP_BASED=1 " : " -D STUMP_BASED=0 " ;
openCLExecuteKernel ( gsum . clCxt , & haarobjectdetect , " gpuRunHaarClassifierCascade " , globalThreads , localThreads , args , - 1 , - 1 , build_options ) ;
openCLReadBuffer ( gsum . clCxt , candidatebuffer , candidate , 4 * sizeof ( int ) * outputsz ) ;
@ -1059,6 +896,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
openCLSafeCall ( clReleaseMemObject ( scaleinfobuffer ) ) ;
openCLSafeCall ( clReleaseMemObject ( nodebuffer ) ) ;
openCLSafeCall ( clReleaseMemObject ( candidatebuffer ) ) ;
}
else
{
@ -1118,7 +956,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
sizeof ( GpuHidHaarStageClassifier ) * gcascade - > count - sizeof ( GpuHidHaarClassifier ) * totalclassifier ) / sizeof ( GpuHidHaarTreeNode ) ;
nodebuffer = openCLCreateBuffer ( gsum . clCxt , CL_MEM_READ_ONLY ,
nodenum * sizeof ( GpuHidHaarTreeNode ) ) ;
cl_command_queue qu = ( cl_command_queue ) gsum . clCxt - > oclCommandQueue ( ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , nodebuffer , 1 , 0 ,
nodenum * sizeof ( GpuHidHaarTreeNode ) ,
node , 0 , NULL , NULL ) ) ;
@ -1160,7 +997,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
args1 . push_back ( make_pair ( sizeof ( cl_int ) , ( void * ) & startnodenum ) ) ;
size_t globalThreads2 [ 3 ] = { nodenum , 1 , 1 } ;
openCLExecuteKernel ( gsum . clCxt , & haarobjectdetect_scaled2 , " gpuscaleclassifier " , globalThreads2 , NULL /*localThreads2*/ , args1 , - 1 , - 1 ) ;
}
@ -1195,8 +1031,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
args . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & pbuffer ) ) ;
args . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & correctionbuffer ) ) ;
args . push_back ( make_pair ( sizeof ( cl_int ) , ( void * ) & nodenum ) ) ;
openCLExecuteKernel ( gsum . clCxt , & haarobjectdetect_scaled2 , " gpuRunHaarClassifierCascade_scaled2 " , globalThreads , localThreads , args , - 1 , - 1 ) ;
const char * build_options = gcascade - > is_stump_based ? " -D STUMP_BASED=1 " : " -D STUMP_BASED=0 " ;
openCLExecuteKernel ( gsum . clCxt , & haarobjectdetect_scaled2 , " gpuRunHaarClassifierCascade_scaled2 " , globalThreads , localThreads , args , - 1 , - 1 , build_options ) ;
candidate = ( int * ) clEnqueueMapBuffer ( qu , candidatebuffer , 1 , CL_MAP_READ , 0 , 4 * sizeof ( int ) * outputsz , 0 , 0 , 0 , & status ) ;
@ -1284,7 +1120,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
int blocksize = 8 ;
int grp_per_CU = 12 ;
size_t localThreads [ 3 ] = { blocksize , blocksize , 1 } ;
size_t globalThreads [ 3 ] = { grp_per_CU * Context : : getContext ( ) - > computeUnits ( ) * localThreads [ 0 ] ,
size_t globalThreads [ 3 ] = { grp_per_CU * cv : : ocl : : Context : : getContext ( ) - > computeUnits ( ) * localThreads [ 0 ] ,
localThreads [ 1 ] ,
1 } ;
int outputsz = 256 * globalThreads [ 0 ] / localThreads [ 0 ] ;
@ -1300,8 +1136,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
CvHaarClassifierCascade * cascade = oldCascade ;
GpuHidHaarClassifierCascade * gcascade ;
GpuHidHaarStageClassifier * stage ;
GpuHidHaarClassifier * classifier ;
GpuHidHaarTreeNode * node ;
if ( CV_MAT_DEPTH ( gimg . type ( ) ) ! = CV_8U )
CV_Error ( CV_StsUnsupportedFormat , " Only 8-bit images are supported " ) ;
@ -1314,7 +1148,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
}
int * candidate ;
cl_command_queue qu = reinterpret_cast < cl_command_queue > ( Context : : getContext ( ) - > oclCommandQueue ( ) ) ;
if ( ( flags & CV_HAAR_SCALE_IMAGE ) )
{
int indexy = 0 ;
@ -1340,19 +1174,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
gcascade = ( GpuHidHaarClassifierCascade * ) ( cascade - > hid_cascade ) ;
stage = ( GpuHidHaarStageClassifier * ) ( gcascade + 1 ) ;
classifier = ( GpuHidHaarClassifier * ) ( stage + gcascade - > count ) ;
node = ( GpuHidHaarTreeNode * ) ( classifier - > node ) ;
gpuSetImagesForHaarClassifierCascade ( cascade , 1. , gsum . step / 4 ) ;
cl_command_queue qu = ( cl_command_queue ) gsum . clCxt - > oclCommandQueue ( ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > stagebuffer , 1 , 0 ,
sizeof ( GpuHidHaarStageClassifier ) * gcascade - > count ,
stage , 0 , NULL , NULL ) ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > nodebuffer , 1 , 0 ,
m_nodenum * sizeof ( GpuHidHaarTreeNode ) ,
node , 0 , NULL , NULL ) ) ;
int startstage = 0 ;
int endstage = gcascade - > count ;
@ -1389,17 +1210,23 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
args . push_back ( make_pair ( sizeof ( cl_int4 ) , ( void * ) & pq ) ) ;
args . push_back ( make_pair ( sizeof ( cl_float ) , ( void * ) & correction ) ) ;
openCLExecuteKernel ( gsum . clCxt , & haarobjectdetect , " gpuRunHaarClassifierCascade " , globalThreads , localThreads , args , - 1 , - 1 ) ;
const char * build_options = gcascade - > is_stump_based ? " -D STUMP_BASED=1 " : " -D STUMP_BASED=0 " ;
openCLExecuteKernel ( gsum . clCxt , & haarobjectdetect , " gpuRunHaarClassifierCascade " , globalThreads , localThreads , args , - 1 , - 1 , build_options ) ;
candidate = ( int * ) malloc ( 4 * sizeof ( int ) * outputsz ) ;
memset ( candidate , 0 , 4 * sizeof ( int ) * outputsz ) ;
openCLReadBuffer ( gsum . clCxt , ( ( OclBuffers * ) buffers ) - > candidatebuffer , candidate , 4 * sizeof ( int ) * outputsz ) ;
for ( int i = 0 ; i < outputsz ; i + + )
{
if ( candidate [ 4 * i + 2 ] ! = 0 )
{
allCandidates . push_back ( Rect ( candidate [ 4 * i ] , candidate [ 4 * i + 1 ] ,
candidate [ 4 * i + 2 ] , candidate [ 4 * i + 3 ] ) ) ;
}
}
free ( ( void * ) candidate ) ;
candidate = NULL ;
}
@ -1407,56 +1234,14 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
{
cv : : ocl : : integral ( gimg , gsum , gsqsum ) ;
gpuSetHaarClassifierCascade ( cascade ) ;
gcascade = ( GpuHidHaarClassifierCascade * ) cascade - > hid_cascade ;
stage = ( GpuHidHaarStageClassifier * ) ( gcascade + 1 ) ;
classifier = ( GpuHidHaarClassifier * ) ( stage + gcascade - > count ) ;
node = ( GpuHidHaarTreeNode * ) ( classifier - > node ) ;
cl_command_queue qu = ( cl_command_queue ) gsum . clCxt - > oclCommandQueue ( ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > nodebuffer , 1 , 0 ,
m_nodenum * sizeof ( GpuHidHaarTreeNode ) ,
node , 0 , NULL , NULL ) ) ;
cl_int4 * p = ( cl_int4 * ) malloc ( sizeof ( cl_int4 ) * m_loopcount ) ;
float * correction = ( float * ) malloc ( sizeof ( float ) * m_loopcount ) ;
int startstage = 0 ;
int endstage = gcascade - > count ;
double factor ;
for ( int i = 0 ; i < m_loopcount ; i + + )
{
factor = scalev [ i ] ;
int equRect_x = ( int ) ( factor * gcascade - > p0 + 0.5 ) ;
int equRect_y = ( int ) ( factor * gcascade - > p1 + 0.5 ) ;
int equRect_w = ( int ) ( factor * gcascade - > p3 + 0.5 ) ;
int equRect_h = ( int ) ( factor * gcascade - > p2 + 0.5 ) ;
p [ i ] . s [ 0 ] = equRect_x ;
p [ i ] . s [ 1 ] = equRect_y ;
p [ i ] . s [ 2 ] = equRect_x + equRect_w ;
p [ i ] . s [ 3 ] = equRect_y + equRect_h ;
correction [ i ] = 1. / ( equRect_w * equRect_h ) ;
int startnodenum = m_nodenum * i ;
float factor2 = ( float ) factor ;
vector < pair < size_t , const void * > > args1 ;
args1 . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & ( ( OclBuffers * ) buffers ) - > nodebuffer ) ) ;
args1 . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & ( ( OclBuffers * ) buffers ) - > newnodebuffer ) ) ;
args1 . push_back ( make_pair ( sizeof ( cl_float ) , ( void * ) & factor2 ) ) ;
args1 . push_back ( make_pair ( sizeof ( cl_float ) , ( void * ) & correction [ i ] ) ) ;
args1 . push_back ( make_pair ( sizeof ( cl_int ) , ( void * ) & startnodenum ) ) ;
size_t globalThreads2 [ 3 ] = { m_nodenum , 1 , 1 } ;
openCLExecuteKernel ( gsum . clCxt , & haarobjectdetect_scaled2 , " gpuscaleclassifier " , globalThreads2 , NULL /*localThreads2*/ , args1 , - 1 , - 1 ) ;
}
int step = gsum . step / 4 ;
int startnode = 0 ;
int splitstage = 3 ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > stagebuffer , 1 , 0 , sizeof ( GpuHidHaarStageClassifier ) * gcascade - > count , stage , 0 , NULL , NULL ) ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > pbuffer , 1 , 0 , sizeof ( cl_int4 ) * m_loopcount , p , 0 , NULL , NULL ) ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > correctionb uffer , 1 , 0 , sizeof ( cl_floa t ) * m_loopcount , correction , 0 , NULL , NULL ) ) ;
int startstage = 0 ;
int endstage = gcascade - > count ;
vector < pair < size_t , const void * > > args ;
args . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & ( ( OclBuffers * ) buffers ) - > stagebuffer ) ) ;
@ -1477,7 +1262,8 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
args . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & ( ( OclBuffers * ) buffers ) - > correctionbuffer ) ) ;
args . push_back ( make_pair ( sizeof ( cl_int ) , ( void * ) & m_nodenum ) ) ;
openCLExecuteKernel ( gsum . clCxt , & haarobjectdetect_scaled2 , " gpuRunHaarClassifierCascade_scaled2 " , globalThreads , localThreads , args , - 1 , - 1 ) ;
const char * build_options = gcascade - > is_stump_based ? " -D STUMP_BASED=1 " : " -D STUMP_BASED=0 " ;
openCLExecuteKernel ( gsum . clCxt , & haarobjectdetect_scaled2 , " gpuRunHaarClassifierCascade_scaled2 " , globalThreads , localThreads , args , - 1 , - 1 , build_options ) ;
candidate = ( int * ) clEnqueueMapBuffer ( qu , ( ( OclBuffers * ) buffers ) - > candidatebuffer , 1 , CL_MAP_READ , 0 , 4 * sizeof ( int ) * outputsz , 0 , 0 , 0 , NULL ) ;
@ -1487,12 +1273,8 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
allCandidates . push_back ( Rect ( candidate [ 4 * i ] , candidate [ 4 * i + 1 ] ,
candidate [ 4 * i + 2 ] , candidate [ 4 * i + 3 ] ) ) ;
}
free ( p ) ;
free ( correction ) ;
clEnqueueUnmapMemObject ( qu , ( ( OclBuffers * ) buffers ) - > candidatebuffer , candidate , 0 , 0 , 0 ) ;
}
rectList . resize ( allCandidates . size ( ) ) ;
if ( ! allCandidates . empty ( ) )
std : : copy ( allCandidates . begin ( ) , allCandidates . end ( ) , rectList . begin ( ) ) ;
@ -1510,6 +1292,10 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
const int outputsz , const size_t localThreads [ ] ,
CvSize minSize , CvSize maxSize )
{
if ( initialized )
{
return ; // we only allow one time initialization
}
CvHaarClassifierCascade * cascade = oldCascade ;
if ( ! CV_IS_HAAR_CLASSIFIER ( cascade ) )
@ -1525,7 +1311,9 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
int totalclassifier = 0 ;
if ( ! cascade - > hid_cascade )
{
gpuCreateHidHaarClassifierCascade ( cascade , & datasize , & totalclassifier ) ;
}
if ( maxSize . height = = 0 | | maxSize . width = = 0 )
{
@ -1547,6 +1335,78 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
m_minSize = minSize ;
m_maxSize = maxSize ;
// initialize nodes
GpuHidHaarClassifierCascade * gcascade ;
GpuHidHaarStageClassifier * stage ;
GpuHidHaarClassifier * classifier ;
GpuHidHaarTreeNode * node ;
cl_command_queue qu = reinterpret_cast < cl_command_queue > ( Context : : getContext ( ) - > oclCommandQueue ( ) ) ;
if ( ( flags & CV_HAAR_SCALE_IMAGE ) )
{
gcascade = ( GpuHidHaarClassifierCascade * ) ( cascade - > hid_cascade ) ;
stage = ( GpuHidHaarStageClassifier * ) ( gcascade + 1 ) ;
classifier = ( GpuHidHaarClassifier * ) ( stage + gcascade - > count ) ;
node = ( GpuHidHaarTreeNode * ) ( classifier - > node ) ;
gpuSetImagesForHaarClassifierCascade ( cascade , 1. , gsum . step / 4 ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > stagebuffer , 1 , 0 ,
sizeof ( GpuHidHaarStageClassifier ) * gcascade - > count ,
stage , 0 , NULL , NULL ) ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > nodebuffer , 1 , 0 ,
m_nodenum * sizeof ( GpuHidHaarTreeNode ) ,
node , 0 , NULL , NULL ) ) ;
}
else
{
gpuSetHaarClassifierCascade ( cascade ) ;
gcascade = ( GpuHidHaarClassifierCascade * ) cascade - > hid_cascade ;
stage = ( GpuHidHaarStageClassifier * ) ( gcascade + 1 ) ;
classifier = ( GpuHidHaarClassifier * ) ( stage + gcascade - > count ) ;
node = ( GpuHidHaarTreeNode * ) ( classifier - > node ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > nodebuffer , 1 , 0 ,
m_nodenum * sizeof ( GpuHidHaarTreeNode ) ,
node , 0 , NULL , NULL ) ) ;
cl_int4 * p = ( cl_int4 * ) malloc ( sizeof ( cl_int4 ) * m_loopcount ) ;
float * correction = ( float * ) malloc ( sizeof ( float ) * m_loopcount ) ;
double factor ;
for ( int i = 0 ; i < m_loopcount ; i + + )
{
factor = scalev [ i ] ;
int equRect_x = ( int ) ( factor * gcascade - > p0 + 0.5 ) ;
int equRect_y = ( int ) ( factor * gcascade - > p1 + 0.5 ) ;
int equRect_w = ( int ) ( factor * gcascade - > p3 + 0.5 ) ;
int equRect_h = ( int ) ( factor * gcascade - > p2 + 0.5 ) ;
p [ i ] . s [ 0 ] = equRect_x ;
p [ i ] . s [ 1 ] = equRect_y ;
p [ i ] . s [ 2 ] = equRect_x + equRect_w ;
p [ i ] . s [ 3 ] = equRect_y + equRect_h ;
correction [ i ] = 1. / ( equRect_w * equRect_h ) ;
int startnodenum = m_nodenum * i ;
float factor2 = ( float ) factor ;
vector < pair < size_t , const void * > > args1 ;
args1 . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & ( ( OclBuffers * ) buffers ) - > nodebuffer ) ) ;
args1 . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & ( ( OclBuffers * ) buffers ) - > newnodebuffer ) ) ;
args1 . push_back ( make_pair ( sizeof ( cl_float ) , ( void * ) & factor2 ) ) ;
args1 . push_back ( make_pair ( sizeof ( cl_float ) , ( void * ) & correction [ i ] ) ) ;
args1 . push_back ( make_pair ( sizeof ( cl_int ) , ( void * ) & startnodenum ) ) ;
size_t globalThreads2 [ 3 ] = { m_nodenum , 1 , 1 } ;
openCLExecuteKernel ( Context : : getContext ( ) , & haarobjectdetect_scaled2 , " gpuscaleclassifier " , globalThreads2 , NULL /*localThreads2*/ , args1 , - 1 , - 1 ) ;
}
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > stagebuffer , 1 , 0 , sizeof ( GpuHidHaarStageClassifier ) * gcascade - > count , stage , 0 , NULL , NULL ) ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > pbuffer , 1 , 0 , sizeof ( cl_int4 ) * m_loopcount , p , 0 , NULL , NULL ) ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > correctionbuffer , 1 , 0 , sizeof ( cl_float ) * m_loopcount , correction , 0 , NULL , NULL ) ) ;
free ( p ) ;
free ( correction ) ;
}
initialized = true ;
}
@ -1645,6 +1505,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
CvSize sz ;
CvSize winSize0 = oldCascade - > orig_window_size ;
detect_piramid_info * scaleinfo ;
cl_command_queue qu = reinterpret_cast < cl_command_queue > ( Context : : getContext ( ) - > oclCommandQueue ( ) ) ;
if ( flags & CV_HAAR_SCALE_IMAGE )
{
for ( factor = 1.f ; ; factor * = scaleFactor )
@ -1746,7 +1607,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
( ( OclBuffers * ) buffers ) - > scaleinfobuffer = openCLCreateBuffer ( cv : : ocl : : Context : : getContext ( ) , CL_MEM_READ_ONLY , sizeof ( detect_piramid_info ) * loopcount ) ;
}
openCLSafeCall ( clEnqueueWriteBuffer ( ( cl_command_ queue ) cv : : ocl : : Context : : getContext ( ) - > oclCommandQueue ( ) , ( ( OclBuffers * ) buffers ) - > scaleinfobuffer , 1 , 0 ,
openCLSafeCall ( clEnqueueWriteBuffer ( qu , ( ( OclBuffers * ) buffers ) - > scaleinfobuffer , 1 , 0 ,
sizeof ( detect_piramid_info ) * loopcount ,
scaleinfo , 0 , NULL , NULL ) ) ;
free ( scaleinfo ) ;
@ -1758,7 +1619,8 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& f
const std : : vector < cv : : Rect > & rectList ,
const std : : vector < int > & rweights )
{
CvSeq * result_seq = cvCreateSeq ( 0 , sizeof ( CvSeq ) , sizeof ( CvAvgComp ) , cvCreateMemStorage ( 0 ) ) ;
MemStorage tempStorage ( cvCreateMemStorage ( 0 ) ) ;
CvSeq * result_seq = cvCreateSeq ( 0 , sizeof ( CvSeq ) , sizeof ( CvAvgComp ) , tempStorage ) ;
if ( findBiggestObject & & rectList . size ( ) )
{
@ -1794,167 +1656,30 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& f
void cv : : ocl : : OclCascadeClassifierBuf : : release ( )
{
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > stagebuffer ) ) ;
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > scaleinfobuffer ) ) ;
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > nodebuffer ) ) ;
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > candidatebuffer ) ) ;
if ( ( m_flags & CV_HAAR_SCALE_IMAGE ) )
{
cvFree ( & oldCascade - > hid_cascade ) ;
}
else
if ( initialized )
{
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > newnodebuffer ) ) ;
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > correctionbuffer ) ) ;
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > pbuffer ) ) ;
}
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > stagebuffer ) ) ;
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > scaleinfobuffer ) ) ;
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > nodebuffer ) ) ;
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > candidatebuffer ) ) ;
if ( ( m_flags & CV_HAAR_SCALE_IMAGE ) )
{
cvFree ( & oldCascade - > hid_cascade ) ;
}
else
{
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > newnodebuffer ) ) ;
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > correctionbuffer ) ) ;
openCLSafeCall ( clReleaseMemObject ( ( ( OclBuffers * ) buffers ) - > pbuffer ) ) ;
}
free ( buffers ) ;
buffers = NULL ;
free ( buffers ) ;
buffers = NULL ;
initialized = false ;
}
}
# ifndef _MAX_PATH
# define _MAX_PATH 1024
# endif
/****************************************************************************************\
* Persistence functions *
\ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* field names */
# define ICV_HAAR_SIZE_NAME "size"
# define ICV_HAAR_STAGES_NAME "stages"
# define ICV_HAAR_TREES_NAME "trees"
# define ICV_HAAR_FEATURE_NAME "feature"
# define ICV_HAAR_RECTS_NAME "rects"
# define ICV_HAAR_TILTED_NAME "tilted"
# define ICV_HAAR_THRESHOLD_NAME "threshold"
# define ICV_HAAR_LEFT_NODE_NAME "left_node"
# define ICV_HAAR_LEFT_VAL_NAME "left_val"
# define ICV_HAAR_RIGHT_NODE_NAME "right_node"
# define ICV_HAAR_RIGHT_VAL_NAME "right_val"
# define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold"
# define ICV_HAAR_PARENT_NAME "parent"
# define ICV_HAAR_NEXT_NAME "next"
static int gpuRunHaarClassifierCascade ( /*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */ )
{
return 1 ;
}
namespace cv
{
namespace ocl
{
struct gpuHaarDetectObjects_ScaleImage_Invoker
{
gpuHaarDetectObjects_ScaleImage_Invoker ( const CvHaarClassifierCascade * _cascade ,
int _stripSize , double _factor ,
const Mat & _sum1 , const Mat & _sqsum1 , Mat * _norm1 ,
Mat * _mask1 , Rect _equRect , ConcurrentRectVector & _vec )
{
cascade = _cascade ;
stripSize = _stripSize ;
factor = _factor ;
sum1 = _sum1 ;
sqsum1 = _sqsum1 ;
norm1 = _norm1 ;
mask1 = _mask1 ;
equRect = _equRect ;
vec = & _vec ;
}
void operator ( ) ( const BlockedRange & range ) const
{
Size winSize0 = cascade - > orig_window_size ;
Size winSize ( cvRound ( winSize0 . width * factor ) , cvRound ( winSize0 . height * factor ) ) ;
int y1 = range . begin ( ) * stripSize , y2 = min ( range . end ( ) * stripSize , sum1 . rows - 1 - winSize0 . height ) ;
Size ssz ( sum1 . cols - 1 - winSize0 . width , y2 - y1 ) ;
int x , y , ystep = factor > 2 ? 1 : 2 ;
for ( y = y1 ; y < y2 ; y + = ystep )
for ( x = 0 ; x < ssz . width ; x + = ystep )
{
if ( gpuRunHaarClassifierCascade ( /*cascade, cvPoint(x, y), 0*/ ) > 0 )
vec - > push_back ( Rect ( cvRound ( x * factor ) , cvRound ( y * factor ) ,
winSize . width , winSize . height ) ) ;
}
}
const CvHaarClassifierCascade * cascade ;
int stripSize ;
double factor ;
Mat sum1 , sqsum1 , * norm1 , * mask1 ;
Rect equRect ;
ConcurrentRectVector * vec ;
} ;
struct gpuHaarDetectObjects_ScaleCascade_Invoker
{
gpuHaarDetectObjects_ScaleCascade_Invoker ( const CvHaarClassifierCascade * _cascade ,
Size _winsize , const Range & _xrange , double _ystep ,
size_t _sumstep , const int * * _p , const int * * _pq ,
ConcurrentRectVector & _vec )
{
cascade = _cascade ;
winsize = _winsize ;
xrange = _xrange ;
ystep = _ystep ;
sumstep = _sumstep ;
p = _p ;
pq = _pq ;
vec = & _vec ;
}
void operator ( ) ( const BlockedRange & range ) const
{
int iy , startY = range . begin ( ) , endY = range . end ( ) ;
const int * p0 = p [ 0 ] , * p1 = p [ 1 ] , * p2 = p [ 2 ] , * p3 = p [ 3 ] ;
const int * pq0 = pq [ 0 ] , * pq1 = pq [ 1 ] , * pq2 = pq [ 2 ] , * pq3 = pq [ 3 ] ;
bool doCannyPruning = p0 ! = 0 ;
int sstep = ( int ) ( sumstep / sizeof ( p0 [ 0 ] ) ) ;
for ( iy = startY ; iy < endY ; iy + + )
{
int ix , y = cvRound ( iy * ystep ) , ixstep = 1 ;
for ( ix = xrange . start ; ix < xrange . end ; ix + = ixstep )
{
int x = cvRound ( ix * ystep ) ; // it should really be ystep, not ixstep
if ( doCannyPruning )
{
int offset = y * sstep + x ;
int s = p0 [ offset ] - p1 [ offset ] - p2 [ offset ] + p3 [ offset ] ;
int sq = pq0 [ offset ] - pq1 [ offset ] - pq2 [ offset ] + pq3 [ offset ] ;
if ( s < 100 | | sq < 20 )
{
ixstep = 2 ;
continue ;
}
}
int result = gpuRunHaarClassifierCascade ( /* cascade, cvPoint(x, y), 0 */ ) ;
if ( result > 0 )
vec - > push_back ( Rect ( x , y , winsize . width , winsize . height ) ) ;
ixstep = result ! = 0 ? 1 : 2 ;
}
}
}
const CvHaarClassifierCascade * cascade ;
double ystep ;
size_t sumstep ;
Size winsize ;
Range xrange ;
const int * * p ;
const int * * pq ;
ConcurrentRectVector * vec ;
} ;
}
}