From a743eca076c6a6c1b35d7cbe8ff9258dad5b7a6f Mon Sep 17 00:00:00 2001 From: Marina Kolpakova Date: Tue, 26 Jun 2012 12:15:19 +0000 Subject: [PATCH] LBP features: GPU representation --- modules/gpu/include/opencv2/gpu/gpu.hpp | 1 + modules/gpu/src/cascadeclassifier.cpp | 48 ++++++++++++++++------ modules/gpu/src/cuda/lbp.cu | 33 +++++++++------ modules/gpu/src/opencv2/gpu/device/lbp.hpp | 43 ++++++++++++++++++- 4 files changed, 98 insertions(+), 27 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 3bca642953..aa3e36816c 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1460,6 +1460,7 @@ private: GpuMat nodes_mat; GpuMat leaves_mat; GpuMat subsets_mat; + GpuMat features_mat; // current integral image GpuMat integral; diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp index 9abc4538a4..e959342a6d 100644 --- a/modules/gpu/src/cascadeclassifier.cpp +++ b/modules/gpu/src/cascadeclassifier.cpp @@ -99,10 +99,9 @@ cv::gpu::CascadeClassifier_GPU_LBP::~CascadeClassifier_GPU_LBP() void cv::gpu::CascadeClassifier_GPU_LBP::preallocateIntegralBuffer(cv::Size desired) { - integral.create(desired.width + 1, desired.height + 1, CV_32FC1); + integral.create(desired.width + 1, desired.height + 1, CV_32SC1); } - bool cv::gpu::CascadeClassifier_GPU_LBP::empty() const { return stage_mat.empty(); @@ -132,6 +131,8 @@ bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string& classifierAsXml) #define GPU_CC_WEAK_CLASSIFIERS "weakClassifiers" #define GPU_CC_INTERNAL_NODES "internalNodes" #define GPU_CC_LEAF_VALUES "leafValues" +#define GPU_CC_FEATURES "features" +#define GPU_CC_RECT "rect" bool CascadeClassifier_GPU_LBP::read(const FileNode &root) { @@ -225,6 +226,22 @@ bool CascadeClassifier_GPU_LBP::read(const FileNode &root) cl_leaves.push_back((float)*iIt); } } + fn = root[GPU_CC_FEATURES]; + if( fn.empty() ) + return false; + std::vector features; + features.reserve(fn.size() * 4); + FileNodeIterator f_it = fn.begin(), f_end = fn.end(); + for (; f_it != f_end; ++f_it) + { + FileNode rect = fn[GPU_CC_RECT]; + FileNodeIterator r_it = rect.begin(); + features.push_back(saturate_cast((int)*(r_it++))); + features.push_back(saturate_cast((int)*(r_it++))); + features.push_back(saturate_cast((int)*(r_it++))); + features.push_back(saturate_cast((int)*(r_it++))); + } + // copy data structures on gpu stage_mat = cv::gpu::GpuMat(1, (int)stages.size() * sizeof(Stage), CV_8UC1); stage_mat.upload(cv::Mat(1, stages.size() * sizeof(Stage), CV_8UC1, &(stages[0]) )); @@ -241,6 +258,9 @@ bool CascadeClassifier_GPU_LBP::read(const FileNode &root) subsets_mat = cv::gpu::GpuMat(1, (int)subsets.size(), CV_32SC1); stage_mat.upload(cv::Mat(subsets)); + features_mat = cv::gpu::GpuMat(1, (int)features.size(), CV_8UC1); + features_mat.upload(cv::Mat(features)); + return true; } @@ -270,22 +290,25 @@ namespace cv { namespace gpu { namespace device { namespace lbp { - void cascadeClassify(const DevMem2Db stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, - const DevMem2Db integral, int workWidth, int workHeight, int step, int subsetSize, DevMem2D_ objects, int minNeighbors = 4, cudaStream_t stream = 0); + void cascadeClassify(const DevMem2Db stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db features, + const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_ objects, int minNeighbors = 4, cudaStream_t stream = 0); } }}} int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objects, double scaleFactor, int minNeighbors /*, Size minSize=Size()*/) { CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); - CV_Assert(empty()); + CV_Assert(!empty()); const int defaultObjSearchNum = 100; - if( !objects.empty() && objects.depth() == CV_32S) - objects.reshape(4, 1); - else - objects.create(1 , defaultObjSearchNum, CV_32SC4); + // if( !objects.empty() && objects.depth() == CV_32S) + // objects.reshape(4, 1); + // else + // objects.create(1 , defaultObjSearchNum, CV_32SC4); + + // temp solution + objects.create(image.rows, image.cols, CV_32SC4); scaledImageBuffer.create(image.size(), image.type()); @@ -302,14 +325,13 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp // TODO: min max object sizes cheching cv::gpu::resize(image, scaledImageBuffer, scaledImageSize, 0, 0, INTER_NEAREST); //prepare image for evaluation - integral.create(cv::Size(scaledImageSize.width + 1, scaledImageSize.height + 1), CV_32FC1); + integral.create(cv::Size(scaledImageSize.width + 1, scaledImageSize.height + 1), CV_32SC1); cv::gpu::integral(scaledImageBuffer, integral); int step = (factor <= 2.) + 1; - int stripCount = 1, stripSize = processingRectSize.height; - cv::gpu::device::lbp::cascadeClassify(stage_mat, trees_mat, nodes_mat, leaves_mat, subsets_mat, - integral, processingRectSize.width, processingRectSize.height, step, subsetSize, objects, minNeighbors); + cv::gpu::device::lbp::cascadeClassify(stage_mat, trees_mat, nodes_mat, leaves_mat, subsets_mat, features_mat, + integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, scaleFactor, step, subsetSize, objects, minNeighbors); } // TODO: reject levels diff --git a/modules/gpu/src/cuda/lbp.cu b/modules/gpu/src/cuda/lbp.cu index 61f67d4d2b..888bf3b81f 100644 --- a/modules/gpu/src/cuda/lbp.cu +++ b/modules/gpu/src/cuda/lbp.cu @@ -46,13 +46,14 @@ namespace cv { namespace gpu { namespace device { namespace lbp { - __global__ void lbp_classify(const DevMem2D_< ::cv::gpu::device::Stage> stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, - const DevMem2Db integral, float step, int subsetSize, DevMem2D_ objects) + __global__ void lbp_classify(const DevMem2D_< ::cv::gpu::device::Stage> stages, const DevMem2Di trees, const DevMem2D_< ::cv::gpu::device::ClNode> nodes, + const DevMem2Df leaves, const DevMem2Di subsets, + const DevMem2D_ features, const DevMem2Di integral, float step, int subsetSize, DevMem2D_ objects, float scale, int clWidth, int clHeight) { - unsigned int x = threadIdx.x; - unsigned int y = blockIdx.x; + unsigned int x = threadIdx.x * step; + unsigned int y = blockIdx.x * step; int nodeOfs = 0, leafOfs = 0; - ::cv::gpu::device::Feature feature; + ::cv::gpu::device::Feature evaluator; for (int s = 0; s < stages.cols; s++ ) { @@ -61,7 +62,9 @@ namespace cv { namespace gpu { namespace device for (int w = 0; w < stage.ntrees; w++) { ::cv::gpu::device::ClNode node = nodes(0, nodeOfs); - char c = feature();// TODO: inmplement it + uchar4 feature = features(0, node.featureIdx); + + uchar c = evaluator(y, x, feature, integral); const int subsetIdx = (nodeOfs * subsetSize); int idx = subsetIdx + ((c >> 5) & ( 1 << (c & 31)) ? leafOfs : leafOfs + 1); sum += leaves(0, subsets(0, idx) ); @@ -70,21 +73,27 @@ namespace cv { namespace gpu { namespace device } if (sum < stage.threshold) - return; // nothing matched - return;//mathed + return; } - + int4 rect; + rect.x = roundf(x * scale); + rect.y = roundf(y * scale); + rect.z = roundf(clWidth * scale); + rect.w = roundf(clHeight * scale); + objects(blockIdx.x, threadIdx.x) = rect; } - void cascadeClassify(const DevMem2Db bstages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, - const DevMem2Db integral, int workWidth, int workHeight, int step, int subsetSize, DevMem2D_ objects, int minNeighbors, cudaStream_t stream) + void cascadeClassify(const DevMem2Db bstages, const DevMem2Di trees, const DevMem2Db bnodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db bfeatures, + const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_ objects, int minNeighbors, cudaStream_t stream) { printf("CascadeClassify"); int blocks = ceilf(workHeight / (float)step); int threads = ceilf(workWidth / (float)step); DevMem2D_< ::cv::gpu::device::Stage> stages = DevMem2D_< ::cv::gpu::device::Stage>(bstages); + DevMem2D_ features = (DevMem2D_)bfeatures; + DevMem2D_< ::cv::gpu::device::ClNode> nodes = DevMem2D_< ::cv::gpu::device::ClNode>(bnodes); - lbp_classify<<>>(stages, trees, nodes, leaves, subsets, integral, step, subsetSize, objects); + lbp_classify<<>>(stages, trees, nodes, leaves, subsets, features, integral, step, subsetSize, objects, scale, clWidth, clHeight); } } }}} \ No newline at end of file diff --git a/modules/gpu/src/opencv2/gpu/device/lbp.hpp b/modules/gpu/src/opencv2/gpu/device/lbp.hpp index ede48bfe86..debda53d40 100644 --- a/modules/gpu/src/opencv2/gpu/device/lbp.hpp +++ b/modules/gpu/src/opencv2/gpu/device/lbp.hpp @@ -82,9 +82,48 @@ namespace cv { namespace gpu { namespace device { { __device__ __forceinline__ Feature(const Feature& other) {(void)other;} __device__ __forceinline__ Feature() {} - __device__ __forceinline__ char operator() ()//(volatile int* ptr, int offset) + + //feature as uchar x, y - left top, z,w - right bottom + __device__ __forceinline__ uchar operator() (unsigned int y, unsigned int x, uchar4 feature, const DevMem2Di integral) const { - return char(0); + int x_off = 2 * feature.z; + int y_off = 2 * feature.w; + + // load feature key points + int anchors[16]; + anchors[0] = integral(y + feature.y, x + feature.x); + anchors[1] = integral(y + feature.y, x + feature.z); + anchors[2] = integral(y + feature.y, x + x_off + feature.x); + anchors[3] = integral(y + feature.y, x + x_off + feature.z); + + anchors[4] = integral(y + feature.w, x + feature.x); + anchors[5] = integral(y + feature.w, x + feature.z); + anchors[6] = integral(y + feature.w, x + x_off + feature.x); + anchors[7] = integral(y + feature.w, x + x_off + feature.z); + + anchors[8] = integral(y + y_off + feature.y, x + feature.x); + anchors[9] = integral(y + y_off + feature.y, x + feature.z); + anchors[10] = integral(y + y_off + feature.y, x + x_off + feature.x); + anchors[11] = integral(y + y_off + feature.y, x + x_off + feature.z); + + anchors[12] = integral(y + y_off + feature.w, x + feature.x); + anchors[13] = integral(y + y_off + feature.w, x + feature.z); + anchors[14] = integral(y + y_off + feature.w, x + x_off + feature.x); + anchors[15] = integral(y + y_off + feature.w, x + x_off + feature.z); + + // calculate feature + int sum = anchors[5] - anchors[6] - anchors[9] + anchors[10]; + + uchar response = (( (anchors[ 0] - anchors[ 1] - anchors[ 4] + anchors[ 5]) >= sum )? 128 : 0) + |(( (anchors[ 1] - anchors[ 2] - anchors[ 5] + anchors[ 6]) >= sum )? 64 : 0) + |(( (anchors[ 2] - anchors[ 3] - anchors[ 6] + anchors[ 7]) >= sum )? 32 : 0) + |(( (anchors[ 6] - anchors[ 7] - anchors[10] + anchors[11]) >= sum )? 16 : 0) + |(( (anchors[10] - anchors[11] - anchors[14] + anchors[15]) >= sum )? 8 : 0) + |(( (anchors[ 9] - anchors[10] - anchors[13] + anchors[14]) >= sum )? 4 : 0) + |(( (anchors[ 8] - anchors[ 9] - anchors[12] + anchors[13]) >= sum )? 2 : 0) + |(( (anchors[ 4] - anchors[ 5] - anchors[ 8] + anchors[ 9]) >= sum )? 1 : 0); + + return response; } };