From 4128d5782fb2d0322e71efe6d9a1eb90be9145dc Mon Sep 17 00:00:00 2001 From: Marina Kolpakova Date: Wed, 4 Jul 2012 04:51:09 +0000 Subject: [PATCH] added hipotesis filtration --- modules/gpu/src/cascadeclassifier.cpp | 18 +++-- modules/gpu/src/cuda/lbp.cu | 84 +++++++++++++++++++++- modules/gpu/src/opencv2/gpu/device/lbp.hpp | 47 +++++++++++- 3 files changed, 139 insertions(+), 10 deletions(-) diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp index 2057f1aee9..8932667e86 100644 --- a/modules/gpu/src/cascadeclassifier.cpp +++ b/modules/gpu/src/cascadeclassifier.cpp @@ -273,7 +273,7 @@ namespace cv { namespace gpu { namespace device { namespace lbp { - classifyStump(const DevMem2Db mstages, + void classifyStump(const DevMem2Db mstages, const int nstages, const DevMem2Di mnodes, const DevMem2Df mleaves, @@ -289,16 +289,19 @@ namespace cv { namespace gpu { namespace device int subsetSize, DevMem2D_ objects, unsigned int* classified); + + int connectedConmonents(DevMem2D_ candidates, int groupThreshold, float grouping_eps, unsigned int* nclasses); } }}} int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objects, - double scaleFactor, int minNeighbors, cv::Size maxObjectSize /*, Size minSize=Size()*/) + double scaleFactor, int groupThreshold, cv::Size maxObjectSize /*, Size minSize=Size()*/) { CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); CV_Assert(!empty()); const int defaultObjSearchNum = 100; + const float grouping_eps = 0.2; if( !objects.empty() && objects.depth() == CV_32S) objects.reshape(4, 1); @@ -340,11 +343,14 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp cv::gpu::device::lbp::classifyStump(stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, leaves_mat, subsets_mat, features_mat, integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, scaleFactor, step, subsetSize, objects, dclassified); } - cudaMemcpy(classified, dclassified, sizeof(int), cudaMemcpyDeviceToHost); - std::cout << *classified << "Results: " << cv::Mat(objects).row(0).colRange(0, *classified) << std::endl; - // TODO: reject levels - return 0; + cudaMemcpy(classified, dclassified, sizeof(int), cudaMemcpyDeviceToHost); + GpuMat candidates(1, *classified, objects.type(), objects.ptr()); + // std::cout << *classified << " Results: " << cv::Mat(candidates) << std::endl; + + if (groupThreshold <= 0 || objects.empty()) + return 0; + return cv::gpu::device::lbp::connectedConmonents(candidates, groupThreshold, grouping_eps, dclassified); } // ============ old fashioned haar cascade ==============================================// diff --git a/modules/gpu/src/cuda/lbp.cu b/modules/gpu/src/cuda/lbp.cu index b07ecad0a0..5c273b32b9 100644 --- a/modules/gpu/src/cuda/lbp.cu +++ b/modules/gpu/src/cuda/lbp.cu @@ -41,6 +41,8 @@ //M*/ #include +#include +#include namespace cv { namespace gpu { namespace device { @@ -89,13 +91,83 @@ namespace cv { namespace gpu { namespace device objects(0, res) = rect; } - classifyStump(const DevMem2Db mstages, const int nstages, const DevMem2Di mnodes, const DevMem2Df mleaves, const DevMem2Di msubsets, const DevMem2Db mfeatures, + template + __global__ void disjoin(int4* candidates, unsigned int n, int groupThreshold, float grouping_eps, unsigned int* nclasses) + { + using cv::gpu::device::VecTraits; + unsigned int tid = threadIdx.x; + extern __shared__ int sbuff[]; + + int* labels = sbuff; + int* rrects = (int*)(sbuff + n); + + Pr predicate(grouping_eps); + partition(candidates, n, labels, predicate); + + rrects[tid * 4 + 0] = 0; + rrects[tid * 4 + 1] = 0; + rrects[tid * 4 + 2] = 0; + rrects[tid * 4 + 3] = 0; + __syncthreads(); + + int cls = labels[tid]; + atomicAdd((int*)(rrects + cls * 4 + 0), candidates[tid].x); + atomicAdd((int*)(rrects + cls * 4 + 1), candidates[tid].y); + atomicAdd((int*)(rrects + cls * 4 + 2), candidates[tid].z); + atomicAdd((int*)(rrects + cls * 4 + 3), candidates[tid].w); + labels[tid] = 0; + __syncthreads(); + + atomicInc((unsigned int*)labels + cls, n); + labels[n - 1] = 0; + + int active = labels[tid]; + if (active) + { + int* r1 = rrects + tid * 4; + float s = 1.f / active; + r1[0] = saturate_cast(r1[0] * s); + r1[1] = saturate_cast(r1[1] * s); + r1[2] = saturate_cast(r1[2] * s); + r1[3] = saturate_cast(r1[3] * s); + + int n1 = active; + __syncthreads(); + unsigned int j = 0; + if( active > groupThreshold ) + { + for (j = 0; j < n; j++) + { + int n2 = labels[j]; + if(!n2 || j == tid || n2 <= groupThreshold ) + continue; + + int* r2 = rrects + j * 4; + + int dx = saturate_cast( r2[2] * grouping_eps ); + int dy = saturate_cast( r2[3] * grouping_eps ); + + if( tid != j && r1[0] >= r2[0] - dx && r1[1] >= r2[1] - dy && + r1[0] + r1[2] <= r2[0] + r2[2] + dx && r1[1] + r1[3] <= r2[1] + r2[3] + dy && + (n2 > max(3, n1) || n1 < 3) ) + break; + } + + if( j == n) + { + // printf("founded gpu %d %d %d %d \n", r1[0], r1[1], r1[2], r1[3]); + candidates[atomicInc((unsigned int*)labels + n -1, n)] = VecTraits::make(r1[0], r1[1], r1[2], r1[3]); + } + } + } + } + + void classifyStump(const DevMem2Db mstages, const int nstages, const DevMem2Di mnodes, const DevMem2Df mleaves, const DevMem2Di msubsets, const DevMem2Db mfeatures, const DevMem2Di integral, const int workWidth, const int workHeight, const int clWidth, const int clHeight, float scale, int step, int subsetSize, DevMem2D_ objects, unsigned int* classified) { int blocks = ceilf(workHeight / (float)step); int threads = ceilf(workWidth / (float)step); - // printf("blocks %d, threads %d\n", blocks, threads); Stage* stages = (Stage*)(mstages.ptr()); ClNode* nodes = (ClNode*)(mnodes.ptr()); @@ -106,5 +178,13 @@ namespace cv { namespace gpu { namespace device lbp_classify_stump<<>>(stages, nstages, nodes, leaves, subsets, features, integral, workWidth, workHeight, clWidth, clHeight, scale, step, subsetSize, objects, classified); } + + int connectedConmonents(DevMem2D_ candidates, int groupThreshold, float grouping_eps, unsigned int* nclasses) + { + int threads = candidates.cols; + int smem_amount = threads * sizeof(int) + threads * sizeof(int4); + disjoin<<<1, threads, smem_amount>>>((int4*)candidates.ptr(), candidates.cols, groupThreshold, grouping_eps, nclasses); + return 0; + } } }}} \ No newline at end of file diff --git a/modules/gpu/src/opencv2/gpu/device/lbp.hpp b/modules/gpu/src/opencv2/gpu/device/lbp.hpp index 3296ee3967..2b620b5627 100644 --- a/modules/gpu/src/opencv2/gpu/device/lbp.hpp +++ b/modules/gpu/src/opencv2/gpu/device/lbp.hpp @@ -62,6 +62,50 @@ namespace lbp{ int featureIdx; }; + struct InSameComponint + { + public: + __device__ __forceinline__ InSameComponint(float _eps) : eps(_eps * 0.5) {} + __device__ __forceinline__ InSameComponint(const InSameComponint& other) : eps(other.eps) {} + + __device__ __forceinline__ bool operator()(const int4& r1, const int4& r2) const + { + double delta = eps * (min(r1.z, r2.z) + min(r1.w, r2.w)); + + return abs(r1.x - r2.x) <= delta && abs(r1.y - r2.y) <= delta + && abs(r1.x + r1.z - r2.x - r2.z) <= delta && abs(r1.y + r1.w - r2.y - r2.w) <= delta; + } + float eps; + }; + + template + __device__ __forceinline__ void partition(int4* vec, unsigned int n, int* labels, Pr predicate) + { + unsigned tid = threadIdx.x; + labels[tid] = tid; + __syncthreads(); + + for (unsigned int id = 0; id < n; id++) + { + if (tid != id && predicate(vec[tid], vec[id])) + { + int p = labels[tid]; + int q = labels[id]; + + if (p < q) + { + atomicMin(labels + id, p); + } + else if (p > q) + { + atomicMin(labels + tid, q); + } + } + } + __syncthreads(); + // printf("tid %d label %d\n", tid, labels[tid]); + } + struct LBP { __device__ __forceinline__ LBP(const LBP& other) {(void)other;} @@ -72,7 +116,6 @@ namespace lbp{ { int x_off = 2 * feature.z; int y_off = 2 * feature.w; - // printf("feature: %d %d %d %d\n", (int)feature.x, (int)feature.y, (int)feature.z, (int)feature.w); feature.z += feature.x; feature.w += feature.y; @@ -107,7 +150,7 @@ namespace lbp{ anchors[14] = integral(y + y_off + feature.w, x + x_off + feature.x); anchors[15] = integral(y + y_off + feature.w, x + x_off + feature.z); - // calculate feature + // calculate responce int sum = anchors[5] - anchors[6] - anchors[9] + anchors[10]; int response = (( (anchors[ 0] - anchors[ 1] - anchors[ 4] + anchors[ 5]) >= sum )? 128 : 0)