|
|
|
@ -45,7 +45,6 @@ |
|
|
|
|
#include <stdio.h> |
|
|
|
|
#include "opencv2/core/internal.hpp" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if CV_SSE2 || CV_SSE3 |
|
|
|
|
# if !CV_SSE4_1 && !CV_SSE4_2 |
|
|
|
|
# define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m)) |
|
|
|
@ -53,13 +52,13 @@ |
|
|
|
|
# endif |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
# if CV_AVX |
|
|
|
|
#if CV_AVX |
|
|
|
|
# define CV_HAAR_USE_AVX 1 |
|
|
|
|
# else |
|
|
|
|
#else |
|
|
|
|
# if CV_SSE2 || CV_SSE3 |
|
|
|
|
# define CV_HAAR_USE_SSE 1 |
|
|
|
|
# endif |
|
|
|
|
# endif |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
/* these settings affect the quality of detection: change with care */ |
|
|
|
|
#define CV_ADJUST_FEATURES 1 |
|
|
|
@ -76,8 +75,7 @@ typedef struct CvHidHaarFeature |
|
|
|
|
float weight; |
|
|
|
|
} |
|
|
|
|
rect[CV_HAAR_FEATURE_MAX]; |
|
|
|
|
} |
|
|
|
|
CvHidHaarFeature; |
|
|
|
|
} CvHidHaarFeature; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct CvHidHaarTreeNode |
|
|
|
@ -86,8 +84,7 @@ typedef struct CvHidHaarTreeNode |
|
|
|
|
float threshold; |
|
|
|
|
int left; |
|
|
|
|
int right; |
|
|
|
|
} |
|
|
|
|
CvHidHaarTreeNode; |
|
|
|
|
} CvHidHaarTreeNode; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct CvHidHaarClassifier |
|
|
|
@ -96,8 +93,7 @@ typedef struct CvHidHaarClassifier |
|
|
|
|
//CvHaarFeature* orig_feature;
|
|
|
|
|
CvHidHaarTreeNode* node; |
|
|
|
|
float* alpha; |
|
|
|
|
} |
|
|
|
|
CvHidHaarClassifier; |
|
|
|
|
} CvHidHaarClassifier; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct CvHidHaarStageClassifier |
|
|
|
@ -110,11 +106,10 @@ typedef struct CvHidHaarStageClassifier |
|
|
|
|
struct CvHidHaarStageClassifier* next; |
|
|
|
|
struct CvHidHaarStageClassifier* child; |
|
|
|
|
struct CvHidHaarStageClassifier* parent; |
|
|
|
|
} |
|
|
|
|
CvHidHaarStageClassifier; |
|
|
|
|
} CvHidHaarStageClassifier; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct CvHidHaarClassifierCascade |
|
|
|
|
typedef struct CvHidHaarClassifierCascade |
|
|
|
|
{ |
|
|
|
|
int count; |
|
|
|
|
int isStumpBased; |
|
|
|
@ -127,7 +122,7 @@ struct CvHidHaarClassifierCascade |
|
|
|
|
sumtype *p0, *p1, *p2, *p3; |
|
|
|
|
|
|
|
|
|
void** ipp_stages; |
|
|
|
|
}; |
|
|
|
|
} CvHidHaarClassifierCascade; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const int icv_object_win_border = 1; |
|
|
|
@ -634,21 +629,21 @@ cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//AVX version icvEvalHidHaarClassifier. Process 8 CvHidHaarClassifiers per call. Check AVX support before invocation!!
|
|
|
|
|
// AVX version icvEvalHidHaarClassifier. Process 8 CvHidHaarClassifiers per call. Check AVX support before invocation!!
|
|
|
|
|
#ifdef CV_HAAR_USE_AVX |
|
|
|
|
CV_INLINE |
|
|
|
|
double icvEvalHidHaarClassifierAVX( CvHidHaarClassifier* classifier, |
|
|
|
|
double variance_norm_factor, size_t p_offset ) |
|
|
|
|
{ |
|
|
|
|
int CV_DECL_ALIGNED(32) idxV[8] = {0,0,0,0,0,0,0,0}; |
|
|
|
|
char flags[8] = {0,0,0,0,0,0,0,0}; |
|
|
|
|
uchar flags[8] = {0,0,0,0,0,0,0,0}; |
|
|
|
|
CvHidHaarTreeNode* nodes[8]; |
|
|
|
|
double res = 0; |
|
|
|
|
char exitConditionFlag = 0; |
|
|
|
|
uchar exitConditionFlag = 0; |
|
|
|
|
for(;;) |
|
|
|
|
{ |
|
|
|
|
float CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0}; |
|
|
|
|
nodes[0] = classifier ->node + idxV[0]; |
|
|
|
|
nodes[0] = (classifier+0)->node + idxV[0]; |
|
|
|
|
nodes[1] = (classifier+1)->node + idxV[1]; |
|
|
|
|
nodes[2] = (classifier+2)->node + idxV[2]; |
|
|
|
|
nodes[3] = (classifier+3)->node + idxV[3]; |
|
|
|
@ -658,46 +653,79 @@ double icvEvalHidHaarClassifierAVX( CvHidHaarClassifier* classifier, |
|
|
|
|
nodes[7] = (classifier+7)->node + idxV[7]; |
|
|
|
|
|
|
|
|
|
__m256 t = _mm256_set1_ps(variance_norm_factor); |
|
|
|
|
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold)); |
|
|
|
|
|
|
|
|
|
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0], |
|
|
|
|
p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset)); |
|
|
|
|
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight, |
|
|
|
|
nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight); |
|
|
|
|
__m256 sum = _mm256_mul_ps(offset, weight); |
|
|
|
|
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold, |
|
|
|
|
nodes[6]->threshold, |
|
|
|
|
nodes[5]->threshold, |
|
|
|
|
nodes[4]->threshold, |
|
|
|
|
nodes[3]->threshold, |
|
|
|
|
nodes[2]->threshold, |
|
|
|
|
nodes[1]->threshold, |
|
|
|
|
nodes[0]->threshold)); |
|
|
|
|
|
|
|
|
|
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[6]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[5]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[3]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[2]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[1]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[0], p_offset)); |
|
|
|
|
|
|
|
|
|
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, |
|
|
|
|
nodes[6]->feature.rect[0].weight, |
|
|
|
|
nodes[5]->feature.rect[0].weight, |
|
|
|
|
nodes[4]->feature.rect[0].weight, |
|
|
|
|
nodes[3]->feature.rect[0].weight, |
|
|
|
|
nodes[2]->feature.rect[0].weight, |
|
|
|
|
nodes[1]->feature.rect[0].weight, |
|
|
|
|
nodes[0]->feature.rect[0].weight); |
|
|
|
|
|
|
|
|
|
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[1],p_offset)); |
|
|
|
|
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight, |
|
|
|
|
nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight); |
|
|
|
|
__m256 sum = _mm256_mul_ps(offset, weight); |
|
|
|
|
|
|
|
|
|
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight)); |
|
|
|
|
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[6]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[5]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[3]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[2]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[1]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[1], p_offset)); |
|
|
|
|
|
|
|
|
|
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, |
|
|
|
|
nodes[6]->feature.rect[1].weight, |
|
|
|
|
nodes[5]->feature.rect[1].weight, |
|
|
|
|
nodes[4]->feature.rect[1].weight, |
|
|
|
|
nodes[3]->feature.rect[1].weight, |
|
|
|
|
nodes[2]->feature.rect[1].weight, |
|
|
|
|
nodes[1]->feature.rect[1].weight, |
|
|
|
|
nodes[0]->feature.rect[1].weight); |
|
|
|
|
|
|
|
|
|
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset, weight)); |
|
|
|
|
|
|
|
|
|
if( nodes[0]->feature.rect[2].p0 ) |
|
|
|
|
tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight; |
|
|
|
|
tmp[0] = calc_sum(nodes[0]->feature.rect[2], p_offset) * nodes[0]->feature.rect[2].weight; |
|
|
|
|
if( nodes[1]->feature.rect[2].p0 ) |
|
|
|
|
tmp[1] = calc_sum(nodes[1]->feature.rect[2],p_offset) * nodes[1]->feature.rect[2].weight; |
|
|
|
|
tmp[1] = calc_sum(nodes[1]->feature.rect[2], p_offset) * nodes[1]->feature.rect[2].weight; |
|
|
|
|
if( nodes[2]->feature.rect[2].p0 ) |
|
|
|
|
tmp[2] = calc_sum(nodes[2]->feature.rect[2],p_offset) * nodes[2]->feature.rect[2].weight; |
|
|
|
|
tmp[2] = calc_sum(nodes[2]->feature.rect[2], p_offset) * nodes[2]->feature.rect[2].weight; |
|
|
|
|
if( nodes[3]->feature.rect[2].p0 ) |
|
|
|
|
tmp[3] = calc_sum(nodes[3]->feature.rect[2],p_offset) * nodes[3]->feature.rect[2].weight; |
|
|
|
|
tmp[3] = calc_sum(nodes[3]->feature.rect[2], p_offset) * nodes[3]->feature.rect[2].weight; |
|
|
|
|
if( nodes[4]->feature.rect[2].p0 ) |
|
|
|
|
tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight; |
|
|
|
|
tmp[4] = calc_sum(nodes[4]->feature.rect[2], p_offset) * nodes[4]->feature.rect[2].weight; |
|
|
|
|
if( nodes[5]->feature.rect[2].p0 ) |
|
|
|
|
tmp[5] = calc_sum(nodes[5]->feature.rect[2],p_offset) * nodes[5]->feature.rect[2].weight; |
|
|
|
|
tmp[5] = calc_sum(nodes[5]->feature.rect[2], p_offset) * nodes[5]->feature.rect[2].weight; |
|
|
|
|
if( nodes[6]->feature.rect[2].p0 ) |
|
|
|
|
tmp[6] = calc_sum(nodes[6]->feature.rect[2],p_offset) * nodes[6]->feature.rect[2].weight; |
|
|
|
|
tmp[6] = calc_sum(nodes[6]->feature.rect[2], p_offset) * nodes[6]->feature.rect[2].weight; |
|
|
|
|
if( nodes[7]->feature.rect[2].p0 ) |
|
|
|
|
tmp[7] = calc_sum(nodes[7]->feature.rect[2],p_offset) * nodes[7]->feature.rect[2].weight; |
|
|
|
|
tmp[7] = calc_sum(nodes[7]->feature.rect[2], p_offset) * nodes[7]->feature.rect[2].weight; |
|
|
|
|
|
|
|
|
|
sum = _mm256_add_ps(sum,_mm256_load_ps(tmp)); |
|
|
|
|
|
|
|
|
|
__m256 left = _mm256_set_ps(nodes[7]->left,nodes[6]->left,nodes[5]->left,nodes[4]->left,nodes[3]->left,nodes[2]->left,nodes[1]->left,nodes[0]->left); |
|
|
|
|
__m256 left = _mm256_set_ps(nodes[7]->left, nodes[6]->left, nodes[5]->left, nodes[4]->left, nodes[3]->left, nodes[2]->left, nodes[1]->left, nodes[0]->left ); |
|
|
|
|
__m256 right = _mm256_set_ps(nodes[7]->right,nodes[6]->right,nodes[5]->right,nodes[4]->right,nodes[3]->right,nodes[2]->right,nodes[1]->right,nodes[0]->right); |
|
|
|
|
|
|
|
|
|
_mm256_store_si256((__m256i*)idxV,_mm256_cvttps_epi32(_mm256_blendv_ps(right, left,_mm256_cmp_ps(sum, t, _CMP_LT_OQ )))); |
|
|
|
|
_mm256_store_si256((__m256i*)idxV, _mm256_cvttps_epi32(_mm256_blendv_ps(right, left, _mm256_cmp_ps(sum, t, _CMP_LT_OQ)))); |
|
|
|
|
|
|
|
|
|
for(int i = 0; i < 8; i++) |
|
|
|
|
{ |
|
|
|
@ -706,17 +734,17 @@ double icvEvalHidHaarClassifierAVX( CvHidHaarClassifier* classifier, |
|
|
|
|
if(!flags[i]) |
|
|
|
|
{ |
|
|
|
|
exitConditionFlag++; |
|
|
|
|
flags[i]=1; |
|
|
|
|
res+=((classifier+i)->alpha[-idxV[i]]); |
|
|
|
|
flags[i] = 1; |
|
|
|
|
res += (classifier+i)->alpha[-idxV[i]]; |
|
|
|
|
} |
|
|
|
|
idxV[i]=0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if(exitConditionFlag==8) |
|
|
|
|
if(exitConditionFlag == 8) |
|
|
|
|
return res; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
#endif //CV_HAAR_USE_AVX
|
|
|
|
|
|
|
|
|
|
CV_INLINE |
|
|
|
|
double icvEvalHidHaarClassifier( CvHidHaarClassifier* classifier, |
|
|
|
@ -778,18 +806,16 @@ static int |
|
|
|
|
cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
CvPoint pt, double& stage_sum, int start_stage ) |
|
|
|
|
{ |
|
|
|
|
#ifdef CV_HAAR_USE_AVX |
|
|
|
|
#ifdef CV_HAAR_USE_AVX |
|
|
|
|
bool haveAVX = false; |
|
|
|
|
if(cv::checkHardwareSupport(CV_CPU_AVX)) |
|
|
|
|
if(__xgetbv()&0x6)// Check if the OS will save the YMM registers
|
|
|
|
|
{ |
|
|
|
|
haveAVX = true; |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
#ifdef CV_HAAR_USE_SSE |
|
|
|
|
#else |
|
|
|
|
# ifdef CV_HAAR_USE_SSE |
|
|
|
|
bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2); |
|
|
|
|
#endif |
|
|
|
|
#endif |
|
|
|
|
# endif |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
int p_offset, pq_offset; |
|
|
|
|
int i, j; |
|
|
|
@ -828,19 +854,20 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
while( ptr ) |
|
|
|
|
{ |
|
|
|
|
stage_sum = 0.0; |
|
|
|
|
j = 0; |
|
|
|
|
|
|
|
|
|
#ifdef CV_HAAR_USE_AVX |
|
|
|
|
#ifdef CV_HAAR_USE_AVX |
|
|
|
|
if(haveAVX) |
|
|
|
|
{ |
|
|
|
|
for( ; j < cascade->stage_classifier[i].count-8; j+=8 ) |
|
|
|
|
for( ; j <= ptr->count - 8; j += 8 ) |
|
|
|
|
{ |
|
|
|
|
stage_sum += icvEvalHidHaarClassifierAVX( |
|
|
|
|
cascade->stage_classifier[i].classifier+j, |
|
|
|
|
ptr->classifier + j, |
|
|
|
|
variance_norm_factor, p_offset ); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
for( j = 0; j < ptr->count; j++ ) |
|
|
|
|
#endif |
|
|
|
|
for( ; j < ptr->count; j++ ) |
|
|
|
|
{ |
|
|
|
|
stage_sum += icvEvalHidHaarClassifier( ptr->classifier + j, variance_norm_factor, p_offset ); |
|
|
|
|
} |
|
|
|
@ -860,7 +887,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
} |
|
|
|
|
else if( cascade->isStumpBased ) |
|
|
|
|
{ |
|
|
|
|
#ifdef CV_HAAR_USE_AVX |
|
|
|
|
#ifdef CV_HAAR_USE_AVX |
|
|
|
|
if(haveAVX) |
|
|
|
|
{ |
|
|
|
|
CvHidHaarClassifier* classifiers[8]; |
|
|
|
@ -872,15 +899,14 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
float CV_DECL_ALIGNED(32) buf[8]; |
|
|
|
|
if( cascade->stage_classifier[i].two_rects ) |
|
|
|
|
{ |
|
|
|
|
for( ; j <= cascade->stage_classifier[i].count-8; j+=8 ) |
|
|
|
|
for( ; j <= cascade->stage_classifier[i].count - 8; j += 8 ) |
|
|
|
|
{ |
|
|
|
|
//__m256 stage_sumPart = _mm256_setzero_ps();
|
|
|
|
|
classifiers[0] = cascade->stage_classifier[i].classifier + j; |
|
|
|
|
nodes[0] = classifiers[0]->node; |
|
|
|
|
classifiers[1] = cascade->stage_classifier[i].classifier + j + 1; |
|
|
|
|
nodes[1] = classifiers[1]->node; |
|
|
|
|
classifiers[2] = cascade->stage_classifier[i].classifier + j + 2; |
|
|
|
|
nodes[2]= classifiers[2]->node; |
|
|
|
|
nodes[2] = classifiers[2]->node; |
|
|
|
|
classifiers[3] = cascade->stage_classifier[i].classifier + j + 3; |
|
|
|
|
nodes[3] = classifiers[3]->node; |
|
|
|
|
classifiers[4] = cascade->stage_classifier[i].classifier + j + 4; |
|
|
|
@ -893,30 +919,74 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
nodes[7] = classifiers[7]->node; |
|
|
|
|
|
|
|
|
|
__m256 t = _mm256_set1_ps(variance_norm_factor); |
|
|
|
|
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold)); |
|
|
|
|
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold, |
|
|
|
|
nodes[6]->threshold, |
|
|
|
|
nodes[5]->threshold, |
|
|
|
|
nodes[4]->threshold, |
|
|
|
|
nodes[3]->threshold, |
|
|
|
|
nodes[2]->threshold, |
|
|
|
|
nodes[1]->threshold, |
|
|
|
|
nodes[0]->threshold)); |
|
|
|
|
|
|
|
|
|
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[6]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[5]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[3]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[2]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[1]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[0], p_offset)); |
|
|
|
|
|
|
|
|
|
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, |
|
|
|
|
nodes[6]->feature.rect[0].weight, |
|
|
|
|
nodes[5]->feature.rect[0].weight, |
|
|
|
|
nodes[4]->feature.rect[0].weight, |
|
|
|
|
nodes[3]->feature.rect[0].weight, |
|
|
|
|
nodes[2]->feature.rect[0].weight, |
|
|
|
|
nodes[1]->feature.rect[0].weight, |
|
|
|
|
nodes[0]->feature.rect[0].weight); |
|
|
|
|
|
|
|
|
|
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0], |
|
|
|
|
p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset)); |
|
|
|
|
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight, |
|
|
|
|
nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight); |
|
|
|
|
__m256 sum = _mm256_mul_ps(offset, weight); |
|
|
|
|
|
|
|
|
|
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[1],p_offset)); |
|
|
|
|
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight, |
|
|
|
|
nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight); |
|
|
|
|
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight)); |
|
|
|
|
|
|
|
|
|
__m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0],classifiers[6]->alpha[0],classifiers[5]->alpha[0],classifiers[4]->alpha[0],classifiers[3]->alpha[0], |
|
|
|
|
classifiers[2]->alpha[0],classifiers[1]->alpha[0],classifiers[0]->alpha[0]); |
|
|
|
|
__m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1],classifiers[6]->alpha[1],classifiers[5]->alpha[1],classifiers[4]->alpha[1],classifiers[3]->alpha[1], |
|
|
|
|
classifiers[2]->alpha[1],classifiers[1]->alpha[1],classifiers[0]->alpha[1]); |
|
|
|
|
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[6]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[5]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[3]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[2]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[1]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[1], p_offset)); |
|
|
|
|
|
|
|
|
|
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, |
|
|
|
|
nodes[6]->feature.rect[1].weight, |
|
|
|
|
nodes[5]->feature.rect[1].weight, |
|
|
|
|
nodes[4]->feature.rect[1].weight, |
|
|
|
|
nodes[3]->feature.rect[1].weight, |
|
|
|
|
nodes[2]->feature.rect[1].weight, |
|
|
|
|
nodes[1]->feature.rect[1].weight, |
|
|
|
|
nodes[0]->feature.rect[1].weight); |
|
|
|
|
|
|
|
|
|
_mm256_store_ps(buf, _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ ))); |
|
|
|
|
stage_sum+=(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]); |
|
|
|
|
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight)); |
|
|
|
|
|
|
|
|
|
__m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0], |
|
|
|
|
classifiers[6]->alpha[0], |
|
|
|
|
classifiers[5]->alpha[0], |
|
|
|
|
classifiers[4]->alpha[0], |
|
|
|
|
classifiers[3]->alpha[0], |
|
|
|
|
classifiers[2]->alpha[0], |
|
|
|
|
classifiers[1]->alpha[0], |
|
|
|
|
classifiers[0]->alpha[0]); |
|
|
|
|
__m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1], |
|
|
|
|
classifiers[6]->alpha[1], |
|
|
|
|
classifiers[5]->alpha[1], |
|
|
|
|
classifiers[4]->alpha[1], |
|
|
|
|
classifiers[3]->alpha[1], |
|
|
|
|
classifiers[2]->alpha[1], |
|
|
|
|
classifiers[1]->alpha[1], |
|
|
|
|
classifiers[0]->alpha[1]); |
|
|
|
|
|
|
|
|
|
_mm256_store_ps(buf, _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ))); |
|
|
|
|
stage_sum += (buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for( ; j < cascade->stage_classifier[i].count; j++ ) |
|
|
|
@ -941,7 +1011,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
classifiers[1] = cascade->stage_classifier[i].classifier + j + 1; |
|
|
|
|
nodes[1] = classifiers[1]->node; |
|
|
|
|
classifiers[2] = cascade->stage_classifier[i].classifier + j + 2; |
|
|
|
|
nodes[2]= classifiers[2]->node; |
|
|
|
|
nodes[2] = classifiers[2]->node; |
|
|
|
|
classifiers[3] = cascade->stage_classifier[i].classifier + j + 3; |
|
|
|
|
nodes[3] = classifiers[3]->node; |
|
|
|
|
classifiers[4] = cascade->stage_classifier[i].classifier + j + 4; |
|
|
|
@ -954,22 +1024,55 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
nodes[7] = classifiers[7]->node; |
|
|
|
|
|
|
|
|
|
__m256 t = _mm256_set1_ps(variance_norm_factor); |
|
|
|
|
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold)); |
|
|
|
|
|
|
|
|
|
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0], |
|
|
|
|
p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset)); |
|
|
|
|
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight, |
|
|
|
|
nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight); |
|
|
|
|
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold, |
|
|
|
|
nodes[6]->threshold, |
|
|
|
|
nodes[5]->threshold, |
|
|
|
|
nodes[4]->threshold, |
|
|
|
|
nodes[3]->threshold, |
|
|
|
|
nodes[2]->threshold, |
|
|
|
|
nodes[1]->threshold, |
|
|
|
|
nodes[0]->threshold)); |
|
|
|
|
|
|
|
|
|
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[6]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[5]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[3]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[2]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[1]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[0], p_offset)); |
|
|
|
|
|
|
|
|
|
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, |
|
|
|
|
nodes[6]->feature.rect[0].weight, |
|
|
|
|
nodes[5]->feature.rect[0].weight, |
|
|
|
|
nodes[4]->feature.rect[0].weight, |
|
|
|
|
nodes[3]->feature.rect[0].weight, |
|
|
|
|
nodes[2]->feature.rect[0].weight, |
|
|
|
|
nodes[1]->feature.rect[0].weight, |
|
|
|
|
nodes[0]->feature.rect[0].weight); |
|
|
|
|
|
|
|
|
|
__m256 sum = _mm256_mul_ps(offset, weight); |
|
|
|
|
|
|
|
|
|
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[1],p_offset)); |
|
|
|
|
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight, |
|
|
|
|
nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight); |
|
|
|
|
|
|
|
|
|
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight)); |
|
|
|
|
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[6]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[5]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[3]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[2]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[1]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[1], p_offset)); |
|
|
|
|
|
|
|
|
|
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, |
|
|
|
|
nodes[6]->feature.rect[1].weight, |
|
|
|
|
nodes[5]->feature.rect[1].weight, |
|
|
|
|
nodes[4]->feature.rect[1].weight, |
|
|
|
|
nodes[3]->feature.rect[1].weight, |
|
|
|
|
nodes[2]->feature.rect[1].weight, |
|
|
|
|
nodes[1]->feature.rect[1].weight, |
|
|
|
|
nodes[0]->feature.rect[1].weight); |
|
|
|
|
|
|
|
|
|
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset, weight)); |
|
|
|
|
|
|
|
|
|
if( nodes[0]->feature.rect[2].p0 ) |
|
|
|
|
tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight; |
|
|
|
@ -990,16 +1093,28 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
|
|
|
|
|
sum = _mm256_add_ps(sum, _mm256_load_ps(tmp)); |
|
|
|
|
|
|
|
|
|
__m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0],classifiers[6]->alpha[0],classifiers[5]->alpha[0],classifiers[4]->alpha[0],classifiers[3]->alpha[0], |
|
|
|
|
classifiers[2]->alpha[0],classifiers[1]->alpha[0],classifiers[0]->alpha[0]); |
|
|
|
|
__m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1],classifiers[6]->alpha[1],classifiers[5]->alpha[1],classifiers[4]->alpha[1],classifiers[3]->alpha[1], |
|
|
|
|
classifiers[2]->alpha[1],classifiers[1]->alpha[1],classifiers[0]->alpha[1]); |
|
|
|
|
__m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0], |
|
|
|
|
classifiers[6]->alpha[0], |
|
|
|
|
classifiers[5]->alpha[0], |
|
|
|
|
classifiers[4]->alpha[0], |
|
|
|
|
classifiers[3]->alpha[0], |
|
|
|
|
classifiers[2]->alpha[0], |
|
|
|
|
classifiers[1]->alpha[0], |
|
|
|
|
classifiers[0]->alpha[0]); |
|
|
|
|
__m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1], |
|
|
|
|
classifiers[6]->alpha[1], |
|
|
|
|
classifiers[5]->alpha[1], |
|
|
|
|
classifiers[4]->alpha[1], |
|
|
|
|
classifiers[3]->alpha[1], |
|
|
|
|
classifiers[2]->alpha[1], |
|
|
|
|
classifiers[1]->alpha[1], |
|
|
|
|
classifiers[0]->alpha[1]); |
|
|
|
|
|
|
|
|
|
__m256 outBuf = _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ )); |
|
|
|
|
outBuf = _mm256_hadd_ps(outBuf, outBuf); |
|
|
|
|
outBuf = _mm256_hadd_ps(outBuf, outBuf); |
|
|
|
|
_mm256_store_ps(buf, outBuf); |
|
|
|
|
stage_sum+=(buf[0]+buf[4]);//(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
|
|
|
|
|
stage_sum += (buf[0] + buf[4]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for( ; j < cascade->stage_classifier[i].count; j++ ) |
|
|
|
@ -1020,8 +1135,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
#endif |
|
|
|
|
#if defined CV_HAAR_USE_SSE && CV_HAAR_USE_SSE && (!defined CV_HAAR_USE_AVX || !CV_HAAR_USE_AVX) //old SSE optimization
|
|
|
|
|
#elif defined CV_HAAR_USE_SSE //old SSE optimization
|
|
|
|
|
if(haveSSE2) |
|
|
|
|
{ |
|
|
|
|
for( i = start_stage; i < cascade->count; i++ ) |
|
|
|
@ -1070,7 +1184,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
#endif |
|
|
|
|
#endif // AVX or SSE
|
|
|
|
|
{ |
|
|
|
|
for( i = start_stage; i < cascade->count; i++ ) |
|
|
|
|
{ |
|
|
|
@ -1106,24 +1220,24 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
for( i = start_stage; i < cascade->count; i++ ) |
|
|
|
|
{ |
|
|
|
|
stage_sum = 0.0; |
|
|
|
|
int k = 0; |
|
|
|
|
#ifdef CV_HAAR_USE_AVX |
|
|
|
|
|
|
|
|
|
#ifdef CV_HAAR_USE_AVX |
|
|
|
|
if(haveAVX) |
|
|
|
|
{ |
|
|
|
|
for( ; k < cascade->stage_classifier[i].count-8; k+=8 ) |
|
|
|
|
for( ; k < cascade->stage_classifier[i].count - 8; k += 8 ) |
|
|
|
|
{ |
|
|
|
|
stage_sum += icvEvalHidHaarClassifierAVX( |
|
|
|
|
cascade->stage_classifier[i].classifier+k, |
|
|
|
|
cascade->stage_classifier[i].classifier + k, |
|
|
|
|
variance_norm_factor, p_offset ); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
#endif |
|
|
|
|
for(; k < cascade->stage_classifier[i].count; k++ ) |
|
|
|
|
{ |
|
|
|
|
|
|
|
|
@ -1136,7 +1250,6 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
return -i; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
//_mm256_zeroupper();
|
|
|
|
|
return 1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -1186,7 +1299,7 @@ struct HaarDetectObjects_ScaleImage_Invoker |
|
|
|
|
Size ssz(sum1.cols - 1 - winSize0.width, y2 - y1); |
|
|
|
|
int x, y, ystep = factor > 2 ? 1 : 2; |
|
|
|
|
|
|
|
|
|
#ifdef HAVE_IPP |
|
|
|
|
#ifdef HAVE_IPP |
|
|
|
|
if( cascade->hid_cascade->ipp_stages ) |
|
|
|
|
{ |
|
|
|
|
IppiRect iequRect = {equRect.x, equRect.y, equRect.width, equRect.height}; |
|
|
|
@ -1241,7 +1354,7 @@ struct HaarDetectObjects_ScaleImage_Invoker |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
#endif |
|
|
|
|
#endif // IPP
|
|
|
|
|
for( y = y1; y < y2; y += ystep ) |
|
|
|
|
for( x = 0; x < ssz.width; x += ystep ) |
|
|
|
|
{ |
|
|
|
@ -2418,45 +2531,4 @@ CvType haar_type( CV_TYPE_NAME_HAAR, icvIsHaarClassifier, |
|
|
|
|
icvReadHaarClassifier, icvWriteHaarClassifier, |
|
|
|
|
icvCloneHaarClassifier ); |
|
|
|
|
|
|
|
|
|
#if 0 |
|
|
|
|
namespace cv |
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
HaarClassifierCascade::HaarClassifierCascade() {} |
|
|
|
|
HaarClassifierCascade::HaarClassifierCascade(const String& filename) |
|
|
|
|
{ load(filename); } |
|
|
|
|
|
|
|
|
|
bool HaarClassifierCascade::load(const String& filename) |
|
|
|
|
{ |
|
|
|
|
cascade = Ptr<CvHaarClassifierCascade>((CvHaarClassifierCascade*)cvLoad(filename.c_str(), 0, 0, 0)); |
|
|
|
|
return (CvHaarClassifierCascade*)cascade != 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void HaarClassifierCascade::detectMultiScale( const Mat& image, |
|
|
|
|
Vector<Rect>& objects, double scaleFactor, |
|
|
|
|
int minNeighbors, int flags, |
|
|
|
|
Size minSize ) |
|
|
|
|
{ |
|
|
|
|
MemStorage storage(cvCreateMemStorage(0)); |
|
|
|
|
CvMat _image = image; |
|
|
|
|
CvSeq* _objects = cvHaarDetectObjects( &_image, cascade, storage, scaleFactor, |
|
|
|
|
minNeighbors, flags, minSize ); |
|
|
|
|
Seq<Rect>(_objects).copyTo(objects); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int HaarClassifierCascade::runAt(Point pt, int startStage, int) const |
|
|
|
|
{ |
|
|
|
|
return cvRunHaarClassifierCascade(cascade, pt, startStage); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void HaarClassifierCascade::setImages( const Mat& sum, const Mat& sqsum, |
|
|
|
|
const Mat& tilted, double scale ) |
|
|
|
|
{ |
|
|
|
|
CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted; |
|
|
|
|
cvSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
/* End of file. */ |
|
|
|
|