diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index d67e43d078..92f544b221 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1454,12 +1454,14 @@ private: int subsetSize; int nodeStep; - // located on gpu + // gpu representation of classifier GpuMat stage_mat; GpuMat trees_mat; GpuMat nodes_mat; GpuMat leaves_mat; GpuMat subsets_mat; + + // current integral image GpuMat integral; }; diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp index b423d58f64..bd0058b914 100644 --- a/modules/gpu/src/cascadeclassifier.cpp +++ b/modules/gpu/src/cascadeclassifier.cpp @@ -59,7 +59,6 @@ struct Stage struct DTreeNode { int featureIdx; - //float threshold; // for ordered features only int left; int right; DTreeNode(int f = 0, int l = 0, int r = 0) : featureIdx(f), left(l), right(r) {} @@ -271,7 +270,8 @@ namespace cv { namespace gpu { namespace device { namespace lbp { - void CascadeClassify(DevMem2Db image, DevMem2Db objects, double scaleFactor = 1.2, int minNeighbors = 4, cudaStream_t stream = 0); + void cascadeClassify(const DevMem2Db stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, + const DevMem2Db integral, int workWidth, int workHeight, int step, int subsetSize, DevMem2D_ objects, int minNeighbors = 4, cudaStream_t stream = 0); } }}} @@ -308,17 +308,8 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp int step = (factor <= 2.) + 1; int stripCount = 1, stripSize = processingRectSize.height; - int y1 = 0; - int y2 = processingRectSize.height; - - for (int y = y1; y < y2; y += step) - for (int x = 0; x < processingRectSize.width; x+=step) - { - //ToDO: classify - int result = 0; - - } - + cv::gpu::device::lbp::cascadeClassify(stage_mat, trees_mat, nodes_mat, leaves_mat, subsets_mat, + integral, processingRectSize.width, processingRectSize.height, step, subsetSize, objects, minNeighbors); } // TODO: reject levels diff --git a/modules/gpu/src/cuda/lbp.cu b/modules/gpu/src/cuda/lbp.cu index 929077c23d..61f67d4d2b 100644 --- a/modules/gpu/src/cuda/lbp.cu +++ b/modules/gpu/src/cuda/lbp.cu @@ -40,15 +40,51 @@ // //M*/ -#include +#include namespace cv { namespace gpu { namespace device { namespace lbp { - void CascadeClassify(DevMem2Db image, DevMem2Db objects, double scaleFactor=1.2, int minNeighbors=4, cudaStream_t stream) + __global__ void lbp_classify(const DevMem2D_< ::cv::gpu::device::Stage> stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, + const DevMem2Db integral, float step, int subsetSize, DevMem2D_ objects) + { + unsigned int x = threadIdx.x; + unsigned int y = blockIdx.x; + int nodeOfs = 0, leafOfs = 0; + ::cv::gpu::device::Feature feature; + + for (int s = 0; s < stages.cols; s++ ) + { + ::cv::gpu::device::Stage stage = stages(0, s); + int sum = 0; + for (int w = 0; w < stage.ntrees; w++) + { + ::cv::gpu::device::ClNode node = nodes(0, nodeOfs); + char c = feature();// TODO: inmplement it + const int subsetIdx = (nodeOfs * subsetSize); + int idx = subsetIdx + ((c >> 5) & ( 1 << (c & 31)) ? leafOfs : leafOfs + 1); + sum += leaves(0, subsets(0, idx) ); + nodeOfs++; + leafOfs += 2; + } + + if (sum < stage.threshold) + return; // nothing matched + return;//mathed + } + + } + + void cascadeClassify(const DevMem2Db bstages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, + const DevMem2Db integral, int workWidth, int workHeight, int step, int subsetSize, DevMem2D_ objects, int minNeighbors, cudaStream_t stream) { printf("CascadeClassify"); + int blocks = ceilf(workHeight / (float)step); + int threads = ceilf(workWidth / (float)step); + DevMem2D_< ::cv::gpu::device::Stage> stages = DevMem2D_< ::cv::gpu::device::Stage>(bstages); + + lbp_classify<<>>(stages, trees, nodes, leaves, subsets, integral, step, subsetSize, objects); } } }}} \ No newline at end of file diff --git a/modules/gpu/src/opencv2/gpu/device/lbp.hpp b/modules/gpu/src/opencv2/gpu/device/lbp.hpp index 3b104f6190..ede48bfe86 100644 --- a/modules/gpu/src/opencv2/gpu/device/lbp.hpp +++ b/modules/gpu/src/opencv2/gpu/device/lbp.hpp @@ -43,6 +43,13 @@ #ifndef __OPENCV_GPU_DEVICE_LBP_HPP_ #define __OPENCV_GPU_DEVICE_LBP_HPP_ +#include "internal_shared.hpp" +// #include "opencv2/gpu/device/border_interpolate.hpp" +// #include "opencv2/gpu/device/vec_traits.hpp" +// #include "opencv2/gpu/device/vec_math.hpp" +// #include "opencv2/gpu/device/saturate_cast.hpp" +// #include "opencv2/gpu/device/filters.hpp" + // #define CALC_SUM_(p0, p1, p2, p3, offset) \ // ((p0)[offset] - (p1)[offset] - (p2)[offset] + (p3)[offset]) @@ -53,16 +60,34 @@ namespace cv { namespace gpu { namespace device { + struct Stage + { + int first; + int ntrees; + float threshold; + __device__ __forceinline__ Stage(int f = 0, int n = 0, float t = 0.f) : first(f), ntrees(n), threshold(t) {} + __device__ __forceinline__ Stage(const Stage& other) : first(other.first), ntrees(other.ntrees), threshold(other.threshold) {} + }; + + struct ClNode + { + int featureIdx; + int left; + int right; + __device__ __forceinline__ ClNode(int f = 0, int l = 0, int r = 0) : featureIdx(f), left(l), right(r) {} + __device__ __forceinline__ ClNode(const ClNode& other) : featureIdx(other.featureIdx), left(other.left), right(other.right) {} + }; + struct Feature { __device__ __forceinline__ Feature(const Feature& other) {(void)other;} __device__ __forceinline__ Feature() {} - __device__ __forceinline__ char operator() (volatile int* ptr, int offset) + __device__ __forceinline__ char operator() ()//(volatile int* ptr, int offset) { return char(0); } - } -}// namespaces + }; +} } }// namespaces #endif \ No newline at end of file