|
|
@ -1,43 +1,5 @@ |
|
|
|
// License Agreement |
|
|
|
///////////////////////////// OpenCL kernels for face detection ////////////////////////////// |
|
|
|
// For Open Source Computer Vision Library |
|
|
|
////////////////////////////// see the opencv/doc/license.txt /////////////////////////////// |
|
|
|
// |
|
|
|
|
|
|
|
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. |
|
|
|
|
|
|
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
|
|
|
|
|
|
|
// Third party copyrights are property of their respective owners. |
|
|
|
|
|
|
|
// |
|
|
|
|
|
|
|
// @Authors |
|
|
|
|
|
|
|
// Niko Li, newlife20080214@gmail.com |
|
|
|
|
|
|
|
// Wang Weiyan, wangweiyanster@gmail.com |
|
|
|
|
|
|
|
// Jia Haipeng, jiahaipeng95@gmail.com |
|
|
|
|
|
|
|
// Nathan, liujun@multicorewareinc.com |
|
|
|
|
|
|
|
// Peng Xiao, pengxiao@outlook.com |
|
|
|
|
|
|
|
// Erping Pang, erping@multicorewareinc.com |
|
|
|
|
|
|
|
// Vadim Pisarevsky, vadim.pisarevsky@itseez.com |
|
|
|
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification, |
|
|
|
|
|
|
|
// are permitted provided that the following conditions are met: |
|
|
|
|
|
|
|
// |
|
|
|
|
|
|
|
// * Redistribution's of source code must retain the above copyright notice, |
|
|
|
|
|
|
|
// this list of conditions and the following disclaimer. |
|
|
|
|
|
|
|
// |
|
|
|
|
|
|
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
|
|
|
|
|
|
|
// this list of conditions and the following disclaimer in the documentation |
|
|
|
|
|
|
|
// and/or other materials provided with the distribution. |
|
|
|
|
|
|
|
// |
|
|
|
|
|
|
|
// * The name of the copyright holders may not be used to endorse or promote products |
|
|
|
|
|
|
|
// derived from this software without specific prior written permission. |
|
|
|
|
|
|
|
// |
|
|
|
|
|
|
|
// This software is provided by the copyright holders and contributors as is and |
|
|
|
|
|
|
|
// any express or implied warranties, including, but not limited to, the implied |
|
|
|
|
|
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
|
|
|
|
|
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
|
|
|
|
|
|
|
// indirect, incidental, special, exemplary, or consequential damages |
|
|
|
|
|
|
|
// (including, but not limited to, procurement of substitute goods or services; |
|
|
|
|
|
|
|
// loss of use, data, or profits; or business interruption) however caused |
|
|
|
|
|
|
|
// and on any theory of liability, whether in contract, strict liability, |
|
|
|
|
|
|
|
// or tort (including negligence or otherwise) arising in any way out of |
|
|
|
|
|
|
|
// the use of this software, even if advised of the possibility of such damage. |
|
|
|
|
|
|
|
// |
|
|
|
|
|
|
|
// |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct __attribute__((aligned(4))) OptFeature |
|
|
|
typedef struct __attribute__((aligned(4))) OptFeature |
|
|
|
{ |
|
|
|
{ |
|
|
@ -46,20 +8,14 @@ typedef struct __attribute__((aligned(4))) OptFeature |
|
|
|
} |
|
|
|
} |
|
|
|
OptFeature; |
|
|
|
OptFeature; |
|
|
|
|
|
|
|
|
|
|
|
typedef struct __attribute__((aligned(4))) DTreeNode |
|
|
|
typedef struct __attribute__((aligned(4))) Stump |
|
|
|
{ |
|
|
|
{ |
|
|
|
int featureIdx __attribute__((aligned (4))); |
|
|
|
int featureIdx __attribute__((aligned (4))); |
|
|
|
float threshold __attribute__((aligned (4))); // for ordered features only |
|
|
|
float threshold __attribute__((aligned (4))); // for ordered features only |
|
|
|
int left __attribute__((aligned (4))); |
|
|
|
float left __attribute__((aligned (4))); |
|
|
|
int right __attribute__((aligned (4))); |
|
|
|
float right __attribute__((aligned (4))); |
|
|
|
} |
|
|
|
} |
|
|
|
DTreeNode; |
|
|
|
Stump; |
|
|
|
|
|
|
|
|
|
|
|
typedef struct __attribute__((aligned (4))) DTree |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
int nodeCount __attribute__((aligned (4))); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
DTree; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct __attribute__((aligned (4))) Stage |
|
|
|
typedef struct __attribute__((aligned (4))) Stage |
|
|
|
{ |
|
|
|
{ |
|
|
@ -78,25 +34,23 @@ __kernel void runHaarClassifierStump( |
|
|
|
|
|
|
|
|
|
|
|
int nstages, |
|
|
|
int nstages, |
|
|
|
__global const Stage* stages, |
|
|
|
__global const Stage* stages, |
|
|
|
__global const DTree* trees, |
|
|
|
__global const Stump* stumps, |
|
|
|
__global const DTreeNode* nodes, |
|
|
|
|
|
|
|
__global const float* leaves, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
volatile __global int* facepos, |
|
|
|
volatile __global int* facepos, |
|
|
|
int2 imgsize, int xyscale, float factor, |
|
|
|
int2 imgsize, int xyscale, float factor, |
|
|
|
int4 normrect, int2 windowsize) |
|
|
|
int4 normrect, int2 windowsize, int maxFaces) |
|
|
|
{ |
|
|
|
{ |
|
|
|
int ix = get_global_id(0)*xyscale; |
|
|
|
int ix = get_global_id(0)*xyscale*VECTOR_SIZE; |
|
|
|
int iy = get_global_id(1)*xyscale; |
|
|
|
int iy = get_global_id(1)*xyscale; |
|
|
|
sumstep /= sizeof(int); |
|
|
|
sumstep /= sizeof(int); |
|
|
|
sqsumstep /= sizeof(int); |
|
|
|
sqsumstep /= sizeof(int); |
|
|
|
|
|
|
|
|
|
|
|
if( ix < imgsize.x && iy < imgsize.y ) |
|
|
|
if( ix < imgsize.x && iy < imgsize.y ) |
|
|
|
{ |
|
|
|
{ |
|
|
|
int ntrees, nodeOfs = 0, leafOfs = 0; |
|
|
|
int ntrees; |
|
|
|
int stageIdx, i; |
|
|
|
int stageIdx, i; |
|
|
|
float s = 0.f; |
|
|
|
float s = 0.f; |
|
|
|
__global const DTreeNode* node; |
|
|
|
__global const Stump* stump = stumps; |
|
|
|
__global const OptFeature* f; |
|
|
|
__global const OptFeature* f; |
|
|
|
|
|
|
|
|
|
|
|
__global const int* psum = sum + mad24(iy, sumstep, ix); |
|
|
|
__global const int* psum = sum + mad24(iy, sumstep, ix); |
|
|
@ -107,19 +61,17 @@ __kernel void runHaarClassifierStump( |
|
|
|
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea; |
|
|
|
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea; |
|
|
|
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea; |
|
|
|
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea; |
|
|
|
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f)); |
|
|
|
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f)); |
|
|
|
float4 weight; |
|
|
|
float4 weight, vsval; |
|
|
|
int4 ofs; |
|
|
|
int4 ofs, ofs0, ofs1, ofs2; |
|
|
|
nf = nf > 0 ? nf : 1.f; |
|
|
|
nf = nf > 0 ? nf : 1.f; |
|
|
|
|
|
|
|
|
|
|
|
for( stageIdx = 0; stageIdx < nstages; stageIdx++ ) |
|
|
|
for( stageIdx = 0; stageIdx < nstages; stageIdx++ ) |
|
|
|
{ |
|
|
|
{ |
|
|
|
ntrees = stages[stageIdx].ntrees; |
|
|
|
ntrees = stages[stageIdx].ntrees; |
|
|
|
s = 0.f; |
|
|
|
s = 0.f; |
|
|
|
for( i = 0; i < ntrees; i++, nodeOfs++, leafOfs += 2 ) |
|
|
|
for( i = 0; i < ntrees; i++, stump++ ) |
|
|
|
{ |
|
|
|
{ |
|
|
|
node = nodes + nodeOfs; |
|
|
|
f = optfeatures + stump->featureIdx; |
|
|
|
f = optfeatures + node->featureIdx; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
weight = f->weight; |
|
|
|
weight = f->weight; |
|
|
|
|
|
|
|
|
|
|
|
ofs = f->ofs[0]; |
|
|
|
ofs = f->ofs[0]; |
|
|
@ -131,7 +83,8 @@ __kernel void runHaarClassifierStump( |
|
|
|
ofs = f->ofs[2]; |
|
|
|
ofs = f->ofs[2]; |
|
|
|
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z; |
|
|
|
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z; |
|
|
|
} |
|
|
|
} |
|
|
|
s += leaves[ sval < node->threshold*nf ? leafOfs : leafOfs + 1 ]; |
|
|
|
|
|
|
|
|
|
|
|
s += (sval < stump->threshold*nf) ? stump->left : stump->right; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if( s < stages[stageIdx].threshold ) |
|
|
|
if( s < stages[stageIdx].threshold ) |
|
|
@ -142,7 +95,84 @@ __kernel void runHaarClassifierStump( |
|
|
|
{ |
|
|
|
{ |
|
|
|
int nfaces = atomic_inc(facepos); |
|
|
|
int nfaces = atomic_inc(facepos); |
|
|
|
//printf("detected face #d!!!!\n", nfaces); |
|
|
|
//printf("detected face #d!!!!\n", nfaces); |
|
|
|
if( nfaces < MAX_FACES ) |
|
|
|
if( nfaces < maxFaces ) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
volatile __global int* face = facepos + 1 + nfaces*4; |
|
|
|
|
|
|
|
face[0] = convert_int_rte(ix*factor); |
|
|
|
|
|
|
|
face[1] = convert_int_rte(iy*factor); |
|
|
|
|
|
|
|
face[2] = convert_int_rte(windowsize.x*factor); |
|
|
|
|
|
|
|
face[3] = convert_int_rte(windowsize.y*factor); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if 0 |
|
|
|
|
|
|
|
__kernel void runLBPClassifierStump( |
|
|
|
|
|
|
|
__global const int* sum, |
|
|
|
|
|
|
|
int sumstep, int sumoffset, |
|
|
|
|
|
|
|
__global const int* sqsum, |
|
|
|
|
|
|
|
int sqsumstep, int sqsumoffset, |
|
|
|
|
|
|
|
__global const OptFeature* optfeatures, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int nstages, |
|
|
|
|
|
|
|
__global const Stage* stages, |
|
|
|
|
|
|
|
__global const Stump* stumps, |
|
|
|
|
|
|
|
__global const int* bitsets, |
|
|
|
|
|
|
|
int bitsetSize, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
volatile __global int* facepos, |
|
|
|
|
|
|
|
int2 imgsize, int xyscale, float factor, |
|
|
|
|
|
|
|
int4 normrect, int2 windowsize, int maxFaces) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
int ix = get_global_id(0)*xyscale*VECTOR_SIZE; |
|
|
|
|
|
|
|
int iy = get_global_id(1)*xyscale; |
|
|
|
|
|
|
|
sumstep /= sizeof(int); |
|
|
|
|
|
|
|
sqsumstep /= sizeof(int); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if( ix < imgsize.x && iy < imgsize.y ) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
int ntrees; |
|
|
|
|
|
|
|
int stageIdx, i; |
|
|
|
|
|
|
|
float s = 0.f; |
|
|
|
|
|
|
|
__global const Stump* stump = stumps; |
|
|
|
|
|
|
|
__global const int* bitset = bitsets; |
|
|
|
|
|
|
|
__global const OptFeature* f; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__global const int* psum = sum + mad24(iy, sumstep, ix); |
|
|
|
|
|
|
|
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x); |
|
|
|
|
|
|
|
int normarea = normrect.z * normrect.w; |
|
|
|
|
|
|
|
float invarea = 1.f/normarea; |
|
|
|
|
|
|
|
float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] + |
|
|
|
|
|
|
|
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea; |
|
|
|
|
|
|
|
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea; |
|
|
|
|
|
|
|
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f)); |
|
|
|
|
|
|
|
float4 weight; |
|
|
|
|
|
|
|
int4 ofs; |
|
|
|
|
|
|
|
nf = nf > 0 ? nf : 1.f; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for( stageIdx = 0; stageIdx < nstages; stageIdx++ ) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
ntrees = stages[stageIdx].ntrees; |
|
|
|
|
|
|
|
s = 0.f; |
|
|
|
|
|
|
|
for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize ) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
f = optfeatures + stump->featureIdx; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
weight = f->weight; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// compute LBP feature to val |
|
|
|
|
|
|
|
s += (bitset[val >> 5] & (1 << (val & 31))) ? stump->left : stump->right; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if( s < stages[stageIdx].threshold ) |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if( stageIdx == nstages ) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
int nfaces = atomic_inc(facepos); |
|
|
|
|
|
|
|
if( nfaces < maxFaces ) |
|
|
|
{ |
|
|
|
{ |
|
|
|
volatile __global int* face = facepos + 1 + nfaces*4; |
|
|
|
volatile __global int* face = facepos + 1 + nfaces*4; |
|
|
|
face[0] = convert_int_rte(ix*factor); |
|
|
|
face[0] = convert_int_rte(ix*factor); |
|
|
@ -153,3 +183,5 @@ __kernel void runHaarClassifierStump( |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|