@ -46,54 +46,69 @@ namespace cv { namespace gpu { namespace device
{
{
namespace lbp
namespace lbp
{
{
__global__ void lbp_classify(const DevMem2D_< ::cv::gpu::device::Stage> stages, const DevMem2Di trees, const DevMem2D_< ::cv::gpu::device::ClNode> nodes,
__global__ void lbp_classify_stump(Stage* stages, int nstages, ClNode* nodes, const float* leaves, const int* subsets, const uchar4* features,
const DevMem2Df leaves, const DevMem2Di subsets,
const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects)
const DevMem2D_<uchar4> features, const DevMem2Di integral, float step, int subsetSize, DevMem2D_<int4> objects, float scale, int clWidth, int clHeight)
{
{
unsigned int x = threadIdx.x * step;
int y = threadIdx.x * scale;
unsigned int y = blockIdx.x * step;
int x = blockIdx.x * scale;
int nodeOfs = 0, leafOfs = 0;
::cv::gpu::device::Feature evaluator;
for (int s = 0; s < stages.cols; s++ )
int i = 0;
int current_node = 0;
int current_leave = 0;
LBP evaluator;
for (int s = 0; s < nstages; s++ )
{
{
::cv::gpu::device::Stage stage = stages(0, s);
float sum = 0;
int sum = 0;
Stage stage = stages[s];
for (int w = 0; w < stage.ntrees; w++)
for (int t = 0; t < stage.ntrees; t++)
{
{
::cv::gpu::device::ClNode node = nodes(0, nodeOfs);
ClNode node = nodes[current_node];
uchar4 feature = features(0, node.featureIdx);
uchar4 feature = features[node.featureIdx];
int c = evaluator(y, x, feature, integral);
const int* subsetIdx = subsets + (current_node * subsetSize);
uchar c = evaluator(y, x, feature, integral);
int idx = (subsetIdx[c >> 5] & ( 1 << (c & 31))) ? current_leave : current_leave + 1;
const int subsetIdx = (nodeOfs * subsetSize);
sum += leaves[idx];
int idx = subsetIdx + ((c >> 5) & ( 1 << (c & 31)) ? leafOfs : leafOfs + 1);
current_node += 1;
sum += leaves(0, subsets(0, idx) );
current_leave += 2;
nodeOfs++;
leafOfs += 2;
}
}
i = s;
if (sum < stage.threshold)
if (sum < stage.threshold)
return;
return;
}
}
int4 rect;
int4 rect;
rect.x = roundf(x * scale);
rect.x = roundf(x * scale);
rect.y = roundf(y * scale);
rect.y = roundf(y * scale);
rect.z = roundf(clWidth * scale);
rect.z = roundf(clWidth);
rect.w = roundf(clHeight * scale);
rect.w = roundf(clHeight);
objects(blockIdx.x, threadIdx.x) = rect;
if(i >= 19)
printf( "GPU detected [%d, %d] - [%d, %d]\n", rect.x, rect.y, rect.z, rect.w);
}
}
void cascadeClassify(const DevMem2Db bstages, const DevMem2Di trees, const DevMem2Db bnodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db bfeatures,
void classifyStump(const DevMem2Db mstages, const int nstages, const DevMem2Di mnodes, const DevMem2Df mleaves, const DevMem2Di msubsets, const DevMem2Db mfeatures,
const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors, cudaStream_t stream)
const DevMem2Di integral, const int workWidth, const int workHeight, const int clWidth, const int clHeight, float scale, int step, int subsetSize,
DevMem2D_<int4> objects)
{
{
printf("CascadeClassify");
int blocks = ceilf(workHeight / (float)step);
int blocks = ceilf(workHeight / (float)step);
int threads = ceilf(workWidth / (float)step);
int threads = ceilf(workWidth / (float)step);
DevMem2D_< ::cv::gpu::device::Stage> stages = DevMem2D_< ::cv::gpu::device::Stage>(bstages);
printf("blocks %d, threads %d\n", blocks, threads);
DevMem2D_<uchar4> features = (DevMem2D_<uchar4>)bfeatures;
DevMem2D_< ::cv::gpu::device::ClNode> nodes = DevMem2D_< ::cv::gpu::device::ClNode>(bnodes);
Stage* stages = (Stage*)(mstages.ptr());
ClNode* nodes = (ClNode*)(mnodes.ptr());
const float* leaves = mleaves.ptr();
const int* subsets = msubsets.ptr();
const uchar4* features = (uchar4*)(mfeatures.ptr());
lbp_classify<<<blocks, threads>>>(stages, trees, nodes, leaves, subsets, features, integral, step, subsetSize, objects, scale, clWidth, clHeight);
lbp_classify_stump<<<blocks, threads>>>(stages, nstages, nodes, leaves, subsets, features, integral,
workWidth, workHeight, clWidth, clHeight, scale, step, subsetSize, objects);
}
}
}
}
}}}
}}}