|
|
|
@ -54,6 +54,9 @@ |
|
|
|
|
|
|
|
|
|
#if CV_AVX |
|
|
|
|
# define CV_HAAR_USE_AVX 1 |
|
|
|
|
# if defined _MSC_VER |
|
|
|
|
# pragma warning( disable : 4752 ) |
|
|
|
|
# endif |
|
|
|
|
#else |
|
|
|
|
# if CV_SSE2 || CV_SSE3 |
|
|
|
|
# define CV_HAAR_USE_SSE 1 |
|
|
|
@ -412,6 +415,9 @@ icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade ) |
|
|
|
|
#define calc_sum(rect,offset) \ |
|
|
|
|
((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset]) |
|
|
|
|
|
|
|
|
|
#define calc_sumf(rect,offset) \ |
|
|
|
|
static_cast<float>((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CV_IMPL void |
|
|
|
|
cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade, |
|
|
|
@ -652,7 +658,7 @@ double icvEvalHidHaarClassifierAVX( CvHidHaarClassifier* classifier, |
|
|
|
|
nodes[6] = (classifier+6)->node + idxV[6]; |
|
|
|
|
nodes[7] = (classifier+7)->node + idxV[7]; |
|
|
|
|
|
|
|
|
|
__m256 t = _mm256_set1_ps(variance_norm_factor); |
|
|
|
|
__m256 t = _mm256_set1_ps(static_cast<float>(variance_norm_factor)); |
|
|
|
|
|
|
|
|
|
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold, |
|
|
|
|
nodes[6]->threshold, |
|
|
|
@ -663,14 +669,14 @@ double icvEvalHidHaarClassifierAVX( CvHidHaarClassifier* classifier, |
|
|
|
|
nodes[1]->threshold, |
|
|
|
|
nodes[0]->threshold)); |
|
|
|
|
|
|
|
|
|
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[6]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[5]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[3]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[2]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[1]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[0], p_offset)); |
|
|
|
|
__m256 offset = _mm256_set_ps(calc_sumf(nodes[7]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[6]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[5]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[4]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[3]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[2]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[1]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[0]->feature.rect[0], p_offset)); |
|
|
|
|
|
|
|
|
|
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, |
|
|
|
|
nodes[6]->feature.rect[0].weight, |
|
|
|
@ -683,14 +689,14 @@ double icvEvalHidHaarClassifierAVX( CvHidHaarClassifier* classifier, |
|
|
|
|
|
|
|
|
|
__m256 sum = _mm256_mul_ps(offset, weight); |
|
|
|
|
|
|
|
|
|
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[6]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[5]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[3]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[2]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[1]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[1], p_offset)); |
|
|
|
|
offset = _mm256_set_ps(calc_sumf(nodes[7]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[6]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[5]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[4]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[3]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[2]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[1]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[0]->feature.rect[1], p_offset)); |
|
|
|
|
|
|
|
|
|
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, |
|
|
|
|
nodes[6]->feature.rect[1].weight, |
|
|
|
@ -704,21 +710,21 @@ double icvEvalHidHaarClassifierAVX( CvHidHaarClassifier* classifier, |
|
|
|
|
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset, weight)); |
|
|
|
|
|
|
|
|
|
if( nodes[0]->feature.rect[2].p0 ) |
|
|
|
|
tmp[0] = calc_sum(nodes[0]->feature.rect[2], p_offset) * nodes[0]->feature.rect[2].weight; |
|
|
|
|
tmp[0] = calc_sumf(nodes[0]->feature.rect[2], p_offset) * nodes[0]->feature.rect[2].weight; |
|
|
|
|
if( nodes[1]->feature.rect[2].p0 ) |
|
|
|
|
tmp[1] = calc_sum(nodes[1]->feature.rect[2], p_offset) * nodes[1]->feature.rect[2].weight; |
|
|
|
|
tmp[1] = calc_sumf(nodes[1]->feature.rect[2], p_offset) * nodes[1]->feature.rect[2].weight; |
|
|
|
|
if( nodes[2]->feature.rect[2].p0 ) |
|
|
|
|
tmp[2] = calc_sum(nodes[2]->feature.rect[2], p_offset) * nodes[2]->feature.rect[2].weight; |
|
|
|
|
tmp[2] = calc_sumf(nodes[2]->feature.rect[2], p_offset) * nodes[2]->feature.rect[2].weight; |
|
|
|
|
if( nodes[3]->feature.rect[2].p0 ) |
|
|
|
|
tmp[3] = calc_sum(nodes[3]->feature.rect[2], p_offset) * nodes[3]->feature.rect[2].weight; |
|
|
|
|
tmp[3] = calc_sumf(nodes[3]->feature.rect[2], p_offset) * nodes[3]->feature.rect[2].weight; |
|
|
|
|
if( nodes[4]->feature.rect[2].p0 ) |
|
|
|
|
tmp[4] = calc_sum(nodes[4]->feature.rect[2], p_offset) * nodes[4]->feature.rect[2].weight; |
|
|
|
|
tmp[4] = calc_sumf(nodes[4]->feature.rect[2], p_offset) * nodes[4]->feature.rect[2].weight; |
|
|
|
|
if( nodes[5]->feature.rect[2].p0 ) |
|
|
|
|
tmp[5] = calc_sum(nodes[5]->feature.rect[2], p_offset) * nodes[5]->feature.rect[2].weight; |
|
|
|
|
tmp[5] = calc_sumf(nodes[5]->feature.rect[2], p_offset) * nodes[5]->feature.rect[2].weight; |
|
|
|
|
if( nodes[6]->feature.rect[2].p0 ) |
|
|
|
|
tmp[6] = calc_sum(nodes[6]->feature.rect[2], p_offset) * nodes[6]->feature.rect[2].weight; |
|
|
|
|
tmp[6] = calc_sumf(nodes[6]->feature.rect[2], p_offset) * nodes[6]->feature.rect[2].weight; |
|
|
|
|
if( nodes[7]->feature.rect[2].p0 ) |
|
|
|
|
tmp[7] = calc_sum(nodes[7]->feature.rect[2], p_offset) * nodes[7]->feature.rect[2].weight; |
|
|
|
|
tmp[7] = calc_sumf(nodes[7]->feature.rect[2], p_offset) * nodes[7]->feature.rect[2].weight; |
|
|
|
|
|
|
|
|
|
sum = _mm256_add_ps(sum,_mm256_load_ps(tmp)); |
|
|
|
|
|
|
|
|
@ -918,7 +924,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
classifiers[7] = cascade->stage_classifier[i].classifier + j + 7; |
|
|
|
|
nodes[7] = classifiers[7]->node; |
|
|
|
|
|
|
|
|
|
__m256 t = _mm256_set1_ps(variance_norm_factor); |
|
|
|
|
__m256 t = _mm256_set1_ps(static_cast<float>(variance_norm_factor)); |
|
|
|
|
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold, |
|
|
|
|
nodes[6]->threshold, |
|
|
|
|
nodes[5]->threshold, |
|
|
|
@ -928,14 +934,14 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
nodes[1]->threshold, |
|
|
|
|
nodes[0]->threshold)); |
|
|
|
|
|
|
|
|
|
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[6]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[5]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[3]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[2]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[1]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[0], p_offset)); |
|
|
|
|
__m256 offset = _mm256_set_ps(calc_sumf(nodes[7]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[6]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[5]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[4]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[3]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[2]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[1]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[0]->feature.rect[0], p_offset)); |
|
|
|
|
|
|
|
|
|
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, |
|
|
|
|
nodes[6]->feature.rect[0].weight, |
|
|
|
@ -948,14 +954,14 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
|
|
|
|
|
__m256 sum = _mm256_mul_ps(offset, weight); |
|
|
|
|
|
|
|
|
|
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[6]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[5]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[3]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[2]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[1]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[1], p_offset)); |
|
|
|
|
offset = _mm256_set_ps(calc_sumf(nodes[7]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[6]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[5]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[4]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[3]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[2]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[1]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[0]->feature.rect[1], p_offset)); |
|
|
|
|
|
|
|
|
|
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, |
|
|
|
|
nodes[6]->feature.rect[1].weight, |
|
|
|
@ -1023,7 +1029,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
classifiers[7] = cascade->stage_classifier[i].classifier + j + 7; |
|
|
|
|
nodes[7] = classifiers[7]->node; |
|
|
|
|
|
|
|
|
|
__m256 t = _mm256_set1_ps(variance_norm_factor); |
|
|
|
|
__m256 t = _mm256_set1_ps(static_cast<float>(variance_norm_factor)); |
|
|
|
|
|
|
|
|
|
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold, |
|
|
|
|
nodes[6]->threshold, |
|
|
|
@ -1034,14 +1040,14 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
nodes[1]->threshold, |
|
|
|
|
nodes[0]->threshold)); |
|
|
|
|
|
|
|
|
|
__m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[6]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[5]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[3]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[2]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[1]->feature.rect[0], p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[0], p_offset)); |
|
|
|
|
__m256 offset = _mm256_set_ps(calc_sumf(nodes[7]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[6]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[5]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[4]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[3]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[2]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[1]->feature.rect[0], p_offset), |
|
|
|
|
calc_sumf(nodes[0]->feature.rect[0], p_offset)); |
|
|
|
|
|
|
|
|
|
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, |
|
|
|
|
nodes[6]->feature.rect[0].weight, |
|
|
|
@ -1054,14 +1060,14 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
|
|
|
|
|
__m256 sum = _mm256_mul_ps(offset, weight); |
|
|
|
|
|
|
|
|
|
offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[6]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[5]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[4]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[3]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[2]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[1]->feature.rect[1], p_offset), |
|
|
|
|
calc_sum(nodes[0]->feature.rect[1], p_offset)); |
|
|
|
|
offset = _mm256_set_ps(calc_sumf(nodes[7]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[6]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[5]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[4]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[3]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[2]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[1]->feature.rect[1], p_offset), |
|
|
|
|
calc_sumf(nodes[0]->feature.rect[1], p_offset)); |
|
|
|
|
|
|
|
|
|
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, |
|
|
|
|
nodes[6]->feature.rect[1].weight, |
|
|
|
@ -1075,21 +1081,21 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
|
|
|
|
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset, weight)); |
|
|
|
|
|
|
|
|
|
if( nodes[0]->feature.rect[2].p0 ) |
|
|
|
|
tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight; |
|
|
|
|
tmp[0] = calc_sumf(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight; |
|
|
|
|
if( nodes[1]->feature.rect[2].p0 ) |
|
|
|
|
tmp[1] = calc_sum(nodes[1]->feature.rect[2],p_offset) * nodes[1]->feature.rect[2].weight; |
|
|
|
|
tmp[1] = calc_sumf(nodes[1]->feature.rect[2],p_offset) * nodes[1]->feature.rect[2].weight; |
|
|
|
|
if( nodes[2]->feature.rect[2].p0 ) |
|
|
|
|
tmp[2] = calc_sum(nodes[2]->feature.rect[2],p_offset) * nodes[2]->feature.rect[2].weight; |
|
|
|
|
tmp[2] = calc_sumf(nodes[2]->feature.rect[2],p_offset) * nodes[2]->feature.rect[2].weight; |
|
|
|
|
if( nodes[3]->feature.rect[2].p0 ) |
|
|
|
|
tmp[3] = calc_sum(nodes[3]->feature.rect[2],p_offset) * nodes[3]->feature.rect[2].weight; |
|
|
|
|
tmp[3] = calc_sumf(nodes[3]->feature.rect[2],p_offset) * nodes[3]->feature.rect[2].weight; |
|
|
|
|
if( nodes[4]->feature.rect[2].p0 ) |
|
|
|
|
tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight; |
|
|
|
|
tmp[4] = calc_sumf(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight; |
|
|
|
|
if( nodes[5]->feature.rect[2].p0 ) |
|
|
|
|
tmp[5] = calc_sum(nodes[5]->feature.rect[2],p_offset) * nodes[5]->feature.rect[2].weight; |
|
|
|
|
tmp[5] = calc_sumf(nodes[5]->feature.rect[2],p_offset) * nodes[5]->feature.rect[2].weight; |
|
|
|
|
if( nodes[6]->feature.rect[2].p0 ) |
|
|
|
|
tmp[6] = calc_sum(nodes[6]->feature.rect[2],p_offset) * nodes[6]->feature.rect[2].weight; |
|
|
|
|
tmp[6] = calc_sumf(nodes[6]->feature.rect[2],p_offset) * nodes[6]->feature.rect[2].weight; |
|
|
|
|
if( nodes[7]->feature.rect[2].p0 ) |
|
|
|
|
tmp[7] = calc_sum(nodes[7]->feature.rect[2],p_offset) * nodes[7]->feature.rect[2].weight; |
|
|
|
|
tmp[7] = calc_sumf(nodes[7]->feature.rect[2],p_offset) * nodes[7]->feature.rect[2].weight; |
|
|
|
|
|
|
|
|
|
sum = _mm256_add_ps(sum, _mm256_load_ps(tmp)); |
|
|
|
|
|
|
|
|
|