Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
369 lines
16 KiB
369 lines
16 KiB
/*M/////////////////////////////////////////////////////////////////////////////////////// |
|
// |
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
|
// |
|
// By downloading, copying, installing or using the software you agree to this license. |
|
// If you do not agree to this license, do not download, install, |
|
// copy or use the software. |
|
// |
|
// |
|
// Intel License Agreement |
|
// For Open Source Computer Vision Library |
|
// |
|
// Copyright (C) 2000, Intel Corporation, all rights reserved. |
|
// Third party copyrights are property of their respective owners. |
|
// |
|
// Redistribution and use in source and binary forms, with or without modification, |
|
// are permitted provided that the following conditions are met: |
|
// |
|
// * Redistribution's of source code must retain the above copyright notice, |
|
// this list of conditions and the following disclaimer. |
|
// |
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
|
// this list of conditions and the following disclaimer in the documentation |
|
// and/or other materials provided with the distribution. |
|
// |
|
// * The name of Intel Corporation may not be used to endorse or promote products |
|
// derived from this software without specific prior written permission. |
|
// |
|
// This software is provided by the copyright holders and contributors "as is" and |
|
// any express or implied warranties, including, but not limited to, the implied |
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
|
// indirect, incidental, special, exemplary, or consequential damages |
|
// (including, but not limited to, procurement of substitute goods or services; |
|
// loss of use, data, or profits; or business interruption) however caused |
|
// and on any theory of liability, whether in contract, strict liability, |
|
// or tort (including negligence or otherwise) arising in any way out of |
|
// the use of this software, even if advised of the possibility of such damage. |
|
// |
|
//M*/ |
|
|
|
/* Haar features calculation */ |
|
|
|
#include "precomp.hpp" |
|
#include "haar.hpp" |
|
|
|
namespace cv_haar_avx |
|
{ |
|
|
|
// AVX version icvEvalHidHaarClassifier. Process 8 CvHidHaarClassifiers per call. Check AVX support before invocation!! |
|
#if CV_HAAR_USE_AVX |
|
double icvEvalHidHaarClassifierAVX(CvHidHaarClassifier* classifier, |
|
double variance_norm_factor, size_t p_offset) |
|
{ |
|
int CV_DECL_ALIGNED(32) idxV[8] = { 0,0,0,0,0,0,0,0 }; |
|
uchar flags[8] = { 0,0,0,0,0,0,0,0 }; |
|
CvHidHaarTreeNode* nodes[8]; |
|
double res = 0; |
|
uchar exitConditionFlag = 0; |
|
for (;;) |
|
{ |
|
float CV_DECL_ALIGNED(32) tmp[8] = { 0,0,0,0,0,0,0,0 }; |
|
nodes[0] = (classifier + 0)->node + idxV[0]; |
|
nodes[1] = (classifier + 1)->node + idxV[1]; |
|
nodes[2] = (classifier + 2)->node + idxV[2]; |
|
nodes[3] = (classifier + 3)->node + idxV[3]; |
|
nodes[4] = (classifier + 4)->node + idxV[4]; |
|
nodes[5] = (classifier + 5)->node + idxV[5]; |
|
nodes[6] = (classifier + 6)->node + idxV[6]; |
|
nodes[7] = (classifier + 7)->node + idxV[7]; |
|
|
|
__m256 t = _mm256_set1_ps(static_cast<float>(variance_norm_factor)); |
|
|
|
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold, |
|
nodes[6]->threshold, |
|
nodes[5]->threshold, |
|
nodes[4]->threshold, |
|
nodes[3]->threshold, |
|
nodes[2]->threshold, |
|
nodes[1]->threshold, |
|
nodes[0]->threshold)); |
|
|
|
__m256 offset = _mm256_set_ps(calc_sumf(nodes[7]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[6]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[5]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[4]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[3]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[2]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[1]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[0]->feature.rect[0], p_offset)); |
|
|
|
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, |
|
nodes[6]->feature.rect[0].weight, |
|
nodes[5]->feature.rect[0].weight, |
|
nodes[4]->feature.rect[0].weight, |
|
nodes[3]->feature.rect[0].weight, |
|
nodes[2]->feature.rect[0].weight, |
|
nodes[1]->feature.rect[0].weight, |
|
nodes[0]->feature.rect[0].weight); |
|
|
|
__m256 sum = _mm256_mul_ps(offset, weight); |
|
|
|
offset = _mm256_set_ps(calc_sumf(nodes[7]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[6]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[5]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[4]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[3]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[2]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[1]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[0]->feature.rect[1], p_offset)); |
|
|
|
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, |
|
nodes[6]->feature.rect[1].weight, |
|
nodes[5]->feature.rect[1].weight, |
|
nodes[4]->feature.rect[1].weight, |
|
nodes[3]->feature.rect[1].weight, |
|
nodes[2]->feature.rect[1].weight, |
|
nodes[1]->feature.rect[1].weight, |
|
nodes[0]->feature.rect[1].weight); |
|
|
|
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset, weight)); |
|
|
|
if (nodes[0]->feature.rect[2].p0) |
|
tmp[0] = calc_sumf(nodes[0]->feature.rect[2], p_offset) * nodes[0]->feature.rect[2].weight; |
|
if (nodes[1]->feature.rect[2].p0) |
|
tmp[1] = calc_sumf(nodes[1]->feature.rect[2], p_offset) * nodes[1]->feature.rect[2].weight; |
|
if (nodes[2]->feature.rect[2].p0) |
|
tmp[2] = calc_sumf(nodes[2]->feature.rect[2], p_offset) * nodes[2]->feature.rect[2].weight; |
|
if (nodes[3]->feature.rect[2].p0) |
|
tmp[3] = calc_sumf(nodes[3]->feature.rect[2], p_offset) * nodes[3]->feature.rect[2].weight; |
|
if (nodes[4]->feature.rect[2].p0) |
|
tmp[4] = calc_sumf(nodes[4]->feature.rect[2], p_offset) * nodes[4]->feature.rect[2].weight; |
|
if (nodes[5]->feature.rect[2].p0) |
|
tmp[5] = calc_sumf(nodes[5]->feature.rect[2], p_offset) * nodes[5]->feature.rect[2].weight; |
|
if (nodes[6]->feature.rect[2].p0) |
|
tmp[6] = calc_sumf(nodes[6]->feature.rect[2], p_offset) * nodes[6]->feature.rect[2].weight; |
|
if (nodes[7]->feature.rect[2].p0) |
|
tmp[7] = calc_sumf(nodes[7]->feature.rect[2], p_offset) * nodes[7]->feature.rect[2].weight; |
|
|
|
sum = _mm256_add_ps(sum, _mm256_load_ps(tmp)); |
|
|
|
__m256 left = _mm256_set_ps(static_cast<float>(nodes[7]->left), static_cast<float>(nodes[6]->left), |
|
static_cast<float>(nodes[5]->left), static_cast<float>(nodes[4]->left), |
|
static_cast<float>(nodes[3]->left), static_cast<float>(nodes[2]->left), |
|
static_cast<float>(nodes[1]->left), static_cast<float>(nodes[0]->left)); |
|
__m256 right = _mm256_set_ps(static_cast<float>(nodes[7]->right), static_cast<float>(nodes[6]->right), |
|
static_cast<float>(nodes[5]->right), static_cast<float>(nodes[4]->right), |
|
static_cast<float>(nodes[3]->right), static_cast<float>(nodes[2]->right), |
|
static_cast<float>(nodes[1]->right), static_cast<float>(nodes[0]->right)); |
|
|
|
_mm256_store_si256((__m256i*)idxV, _mm256_cvttps_epi32(_mm256_blendv_ps(right, left, _mm256_cmp_ps(sum, t, _CMP_LT_OQ)))); |
|
|
|
for (int i = 0; i < 8; i++) |
|
{ |
|
if (idxV[i] <= 0) |
|
{ |
|
if (!flags[i]) |
|
{ |
|
exitConditionFlag++; |
|
flags[i] = 1; |
|
res += (classifier + i)->alpha[-idxV[i]]; |
|
} |
|
idxV[i] = 0; |
|
} |
|
} |
|
if (exitConditionFlag == 8) |
|
return res; |
|
} |
|
} |
|
|
|
double icvEvalHidHaarStumpClassifierAVX(CvHidHaarClassifier* classifier, |
|
double variance_norm_factor, size_t p_offset) |
|
{ |
|
float CV_DECL_ALIGNED(32) tmp[8] = { 0,0,0,0,0,0,0,0 }; |
|
CvHidHaarTreeNode* nodes[8]; |
|
|
|
nodes[0] = classifier[0].node; |
|
nodes[1] = classifier[1].node; |
|
nodes[2] = classifier[2].node; |
|
nodes[3] = classifier[3].node; |
|
nodes[4] = classifier[4].node; |
|
nodes[5] = classifier[5].node; |
|
nodes[6] = classifier[6].node; |
|
nodes[7] = classifier[7].node; |
|
|
|
__m256 t = _mm256_set1_ps(static_cast<float>(variance_norm_factor)); |
|
|
|
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold, |
|
nodes[6]->threshold, |
|
nodes[5]->threshold, |
|
nodes[4]->threshold, |
|
nodes[3]->threshold, |
|
nodes[2]->threshold, |
|
nodes[1]->threshold, |
|
nodes[0]->threshold)); |
|
|
|
__m256 offset = _mm256_set_ps(calc_sumf(nodes[7]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[6]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[5]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[4]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[3]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[2]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[1]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[0]->feature.rect[0], p_offset)); |
|
|
|
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, |
|
nodes[6]->feature.rect[0].weight, |
|
nodes[5]->feature.rect[0].weight, |
|
nodes[4]->feature.rect[0].weight, |
|
nodes[3]->feature.rect[0].weight, |
|
nodes[2]->feature.rect[0].weight, |
|
nodes[1]->feature.rect[0].weight, |
|
nodes[0]->feature.rect[0].weight); |
|
|
|
__m256 sum = _mm256_mul_ps(offset, weight); |
|
|
|
offset = _mm256_set_ps(calc_sumf(nodes[7]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[6]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[5]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[4]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[3]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[2]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[1]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[0]->feature.rect[1], p_offset)); |
|
|
|
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, |
|
nodes[6]->feature.rect[1].weight, |
|
nodes[5]->feature.rect[1].weight, |
|
nodes[4]->feature.rect[1].weight, |
|
nodes[3]->feature.rect[1].weight, |
|
nodes[2]->feature.rect[1].weight, |
|
nodes[1]->feature.rect[1].weight, |
|
nodes[0]->feature.rect[1].weight); |
|
|
|
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset, weight)); |
|
|
|
if (nodes[0]->feature.rect[2].p0) |
|
tmp[0] = calc_sumf(nodes[0]->feature.rect[2], p_offset) * nodes[0]->feature.rect[2].weight; |
|
if (nodes[1]->feature.rect[2].p0) |
|
tmp[1] = calc_sumf(nodes[1]->feature.rect[2], p_offset) * nodes[1]->feature.rect[2].weight; |
|
if (nodes[2]->feature.rect[2].p0) |
|
tmp[2] = calc_sumf(nodes[2]->feature.rect[2], p_offset) * nodes[2]->feature.rect[2].weight; |
|
if (nodes[3]->feature.rect[2].p0) |
|
tmp[3] = calc_sumf(nodes[3]->feature.rect[2], p_offset) * nodes[3]->feature.rect[2].weight; |
|
if (nodes[4]->feature.rect[2].p0) |
|
tmp[4] = calc_sumf(nodes[4]->feature.rect[2], p_offset) * nodes[4]->feature.rect[2].weight; |
|
if (nodes[5]->feature.rect[2].p0) |
|
tmp[5] = calc_sumf(nodes[5]->feature.rect[2], p_offset) * nodes[5]->feature.rect[2].weight; |
|
if (nodes[6]->feature.rect[2].p0) |
|
tmp[6] = calc_sumf(nodes[6]->feature.rect[2], p_offset) * nodes[6]->feature.rect[2].weight; |
|
if (nodes[7]->feature.rect[2].p0) |
|
tmp[7] = calc_sumf(nodes[7]->feature.rect[2], p_offset) * nodes[7]->feature.rect[2].weight; |
|
|
|
sum = _mm256_add_ps(sum, _mm256_load_ps(tmp)); |
|
|
|
__m256 alpha0 = _mm256_set_ps(classifier[7].alpha[0], |
|
classifier[6].alpha[0], |
|
classifier[5].alpha[0], |
|
classifier[4].alpha[0], |
|
classifier[3].alpha[0], |
|
classifier[2].alpha[0], |
|
classifier[1].alpha[0], |
|
classifier[0].alpha[0]); |
|
__m256 alpha1 = _mm256_set_ps(classifier[7].alpha[1], |
|
classifier[6].alpha[1], |
|
classifier[5].alpha[1], |
|
classifier[4].alpha[1], |
|
classifier[3].alpha[1], |
|
classifier[2].alpha[1], |
|
classifier[1].alpha[1], |
|
classifier[0].alpha[1]); |
|
|
|
__m256 outBuf = _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ)); |
|
outBuf = _mm256_hadd_ps(outBuf, outBuf); |
|
outBuf = _mm256_hadd_ps(outBuf, outBuf); |
|
_mm256_store_ps(tmp, outBuf); |
|
return (tmp[0] + tmp[4]); |
|
} |
|
|
|
double icvEvalHidHaarStumpClassifierTwoRectAVX(CvHidHaarClassifier* classifier, |
|
double variance_norm_factor, size_t p_offset) |
|
{ |
|
float CV_DECL_ALIGNED(32) buf[8]; |
|
CvHidHaarTreeNode* nodes[8]; |
|
nodes[0] = classifier[0].node; |
|
nodes[1] = classifier[1].node; |
|
nodes[2] = classifier[2].node; |
|
nodes[3] = classifier[3].node; |
|
nodes[4] = classifier[4].node; |
|
nodes[5] = classifier[5].node; |
|
nodes[6] = classifier[6].node; |
|
nodes[7] = classifier[7].node; |
|
|
|
__m256 t = _mm256_set1_ps(static_cast<float>(variance_norm_factor)); |
|
t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold, |
|
nodes[6]->threshold, |
|
nodes[5]->threshold, |
|
nodes[4]->threshold, |
|
nodes[3]->threshold, |
|
nodes[2]->threshold, |
|
nodes[1]->threshold, |
|
nodes[0]->threshold)); |
|
|
|
__m256 offset = _mm256_set_ps(calc_sumf(nodes[7]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[6]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[5]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[4]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[3]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[2]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[1]->feature.rect[0], p_offset), |
|
calc_sumf(nodes[0]->feature.rect[0], p_offset)); |
|
|
|
__m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, |
|
nodes[6]->feature.rect[0].weight, |
|
nodes[5]->feature.rect[0].weight, |
|
nodes[4]->feature.rect[0].weight, |
|
nodes[3]->feature.rect[0].weight, |
|
nodes[2]->feature.rect[0].weight, |
|
nodes[1]->feature.rect[0].weight, |
|
nodes[0]->feature.rect[0].weight); |
|
|
|
__m256 sum = _mm256_mul_ps(offset, weight); |
|
|
|
offset = _mm256_set_ps(calc_sumf(nodes[7]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[6]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[5]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[4]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[3]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[2]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[1]->feature.rect[1], p_offset), |
|
calc_sumf(nodes[0]->feature.rect[1], p_offset)); |
|
|
|
weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, |
|
nodes[6]->feature.rect[1].weight, |
|
nodes[5]->feature.rect[1].weight, |
|
nodes[4]->feature.rect[1].weight, |
|
nodes[3]->feature.rect[1].weight, |
|
nodes[2]->feature.rect[1].weight, |
|
nodes[1]->feature.rect[1].weight, |
|
nodes[0]->feature.rect[1].weight); |
|
|
|
sum = _mm256_add_ps(sum, _mm256_mul_ps(offset, weight)); |
|
|
|
__m256 alpha0 = _mm256_set_ps(classifier[7].alpha[0], |
|
classifier[6].alpha[0], |
|
classifier[5].alpha[0], |
|
classifier[4].alpha[0], |
|
classifier[3].alpha[0], |
|
classifier[2].alpha[0], |
|
classifier[1].alpha[0], |
|
classifier[0].alpha[0]); |
|
__m256 alpha1 = _mm256_set_ps(classifier[7].alpha[1], |
|
classifier[6].alpha[1], |
|
classifier[5].alpha[1], |
|
classifier[4].alpha[1], |
|
classifier[3].alpha[1], |
|
classifier[2].alpha[1], |
|
classifier[1].alpha[1], |
|
classifier[0].alpha[1]); |
|
|
|
_mm256_store_ps(buf, _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ))); |
|
return (buf[0] + buf[1] + buf[2] + buf[3] + buf[4] + buf[5] + buf[6] + buf[7]); |
|
} |
|
|
|
#endif //CV_HAAR_USE_AVX |
|
|
|
} |
|
|
|
/* End of file. */
|
|
|