LBP features: GPU representation

pull/2/head
Marina Kolpakova 13 years ago
parent 55567582a6
commit a743eca076
  1. 1
      modules/gpu/include/opencv2/gpu/gpu.hpp
  2. 48
      modules/gpu/src/cascadeclassifier.cpp
  3. 33
      modules/gpu/src/cuda/lbp.cu
  4. 43
      modules/gpu/src/opencv2/gpu/device/lbp.hpp

@ -1460,6 +1460,7 @@ private:
GpuMat nodes_mat;
GpuMat leaves_mat;
GpuMat subsets_mat;
GpuMat features_mat;
// current integral image
GpuMat integral;

@ -99,10 +99,9 @@ cv::gpu::CascadeClassifier_GPU_LBP::~CascadeClassifier_GPU_LBP()
void cv::gpu::CascadeClassifier_GPU_LBP::preallocateIntegralBuffer(cv::Size desired)
{
integral.create(desired.width + 1, desired.height + 1, CV_32FC1);
integral.create(desired.width + 1, desired.height + 1, CV_32SC1);
}
bool cv::gpu::CascadeClassifier_GPU_LBP::empty() const
{
return stage_mat.empty();
@ -132,6 +131,8 @@ bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string& classifierAsXml)
#define GPU_CC_WEAK_CLASSIFIERS "weakClassifiers"
#define GPU_CC_INTERNAL_NODES "internalNodes"
#define GPU_CC_LEAF_VALUES "leafValues"
#define GPU_CC_FEATURES "features"
#define GPU_CC_RECT "rect"
bool CascadeClassifier_GPU_LBP::read(const FileNode &root)
{
@ -225,6 +226,22 @@ bool CascadeClassifier_GPU_LBP::read(const FileNode &root)
cl_leaves.push_back((float)*iIt);
}
}
fn = root[GPU_CC_FEATURES];
if( fn.empty() )
return false;
std::vector<char> features;
features.reserve(fn.size() * 4);
FileNodeIterator f_it = fn.begin(), f_end = fn.end();
for (; f_it != f_end; ++f_it)
{
FileNode rect = fn[GPU_CC_RECT];
FileNodeIterator r_it = rect.begin();
features.push_back(saturate_cast<uchar>((int)*(r_it++)));
features.push_back(saturate_cast<uchar>((int)*(r_it++)));
features.push_back(saturate_cast<uchar>((int)*(r_it++)));
features.push_back(saturate_cast<uchar>((int)*(r_it++)));
}
// copy data structures on gpu
stage_mat = cv::gpu::GpuMat(1, (int)stages.size() * sizeof(Stage), CV_8UC1);
stage_mat.upload(cv::Mat(1, stages.size() * sizeof(Stage), CV_8UC1, &(stages[0]) ));
@ -241,6 +258,9 @@ bool CascadeClassifier_GPU_LBP::read(const FileNode &root)
subsets_mat = cv::gpu::GpuMat(1, (int)subsets.size(), CV_32SC1);
stage_mat.upload(cv::Mat(subsets));
features_mat = cv::gpu::GpuMat(1, (int)features.size(), CV_8UC1);
features_mat.upload(cv::Mat(features));
return true;
}
@ -270,22 +290,25 @@ namespace cv { namespace gpu { namespace device
{
namespace lbp
{
void cascadeClassify(const DevMem2Db stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets,
const DevMem2Db integral, int workWidth, int workHeight, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors = 4, cudaStream_t stream = 0);
void cascadeClassify(const DevMem2Db stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db features,
const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors = 4, cudaStream_t stream = 0);
}
}}}
int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objects, double scaleFactor, int minNeighbors /*, Size minSize=Size()*/)
{
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U );
CV_Assert(empty());
CV_Assert(!empty());
const int defaultObjSearchNum = 100;
if( !objects.empty() && objects.depth() == CV_32S)
objects.reshape(4, 1);
else
objects.create(1 , defaultObjSearchNum, CV_32SC4);
// if( !objects.empty() && objects.depth() == CV_32S)
// objects.reshape(4, 1);
// else
// objects.create(1 , defaultObjSearchNum, CV_32SC4);
// temp solution
objects.create(image.rows, image.cols, CV_32SC4);
scaledImageBuffer.create(image.size(), image.type());
@ -302,14 +325,13 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
// TODO: min max object sizes cheching
cv::gpu::resize(image, scaledImageBuffer, scaledImageSize, 0, 0, INTER_NEAREST);
//prepare image for evaluation
integral.create(cv::Size(scaledImageSize.width + 1, scaledImageSize.height + 1), CV_32FC1);
integral.create(cv::Size(scaledImageSize.width + 1, scaledImageSize.height + 1), CV_32SC1);
cv::gpu::integral(scaledImageBuffer, integral);
int step = (factor <= 2.) + 1;
int stripCount = 1, stripSize = processingRectSize.height;
cv::gpu::device::lbp::cascadeClassify(stage_mat, trees_mat, nodes_mat, leaves_mat, subsets_mat,
integral, processingRectSize.width, processingRectSize.height, step, subsetSize, objects, minNeighbors);
cv::gpu::device::lbp::cascadeClassify(stage_mat, trees_mat, nodes_mat, leaves_mat, subsets_mat, features_mat,
integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, scaleFactor, step, subsetSize, objects, minNeighbors);
}
// TODO: reject levels

@ -46,13 +46,14 @@ namespace cv { namespace gpu { namespace device
{
namespace lbp
{
__global__ void lbp_classify(const DevMem2D_< ::cv::gpu::device::Stage> stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets,
const DevMem2Db integral, float step, int subsetSize, DevMem2D_<int4> objects)
__global__ void lbp_classify(const DevMem2D_< ::cv::gpu::device::Stage> stages, const DevMem2Di trees, const DevMem2D_< ::cv::gpu::device::ClNode> nodes,
const DevMem2Df leaves, const DevMem2Di subsets,
const DevMem2D_<uchar4> features, const DevMem2Di integral, float step, int subsetSize, DevMem2D_<int4> objects, float scale, int clWidth, int clHeight)
{
unsigned int x = threadIdx.x;
unsigned int y = blockIdx.x;
unsigned int x = threadIdx.x * step;
unsigned int y = blockIdx.x * step;
int nodeOfs = 0, leafOfs = 0;
::cv::gpu::device::Feature feature;
::cv::gpu::device::Feature evaluator;
for (int s = 0; s < stages.cols; s++ )
{
@ -61,7 +62,9 @@ namespace cv { namespace gpu { namespace device
for (int w = 0; w < stage.ntrees; w++)
{
::cv::gpu::device::ClNode node = nodes(0, nodeOfs);
char c = feature();// TODO: inmplement it
uchar4 feature = features(0, node.featureIdx);
uchar c = evaluator(y, x, feature, integral);
const int subsetIdx = (nodeOfs * subsetSize);
int idx = subsetIdx + ((c >> 5) & ( 1 << (c & 31)) ? leafOfs : leafOfs + 1);
sum += leaves(0, subsets(0, idx) );
@ -70,21 +73,27 @@ namespace cv { namespace gpu { namespace device
}
if (sum < stage.threshold)
return; // nothing matched
return;//mathed
return;
}
int4 rect;
rect.x = roundf(x * scale);
rect.y = roundf(y * scale);
rect.z = roundf(clWidth * scale);
rect.w = roundf(clHeight * scale);
objects(blockIdx.x, threadIdx.x) = rect;
}
void cascadeClassify(const DevMem2Db bstages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets,
const DevMem2Db integral, int workWidth, int workHeight, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors, cudaStream_t stream)
void cascadeClassify(const DevMem2Db bstages, const DevMem2Di trees, const DevMem2Db bnodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db bfeatures,
const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors, cudaStream_t stream)
{
printf("CascadeClassify");
int blocks = ceilf(workHeight / (float)step);
int threads = ceilf(workWidth / (float)step);
DevMem2D_< ::cv::gpu::device::Stage> stages = DevMem2D_< ::cv::gpu::device::Stage>(bstages);
DevMem2D_<uchar4> features = (DevMem2D_<uchar4>)bfeatures;
DevMem2D_< ::cv::gpu::device::ClNode> nodes = DevMem2D_< ::cv::gpu::device::ClNode>(bnodes);
lbp_classify<<<blocks, threads>>>(stages, trees, nodes, leaves, subsets, integral, step, subsetSize, objects);
lbp_classify<<<blocks, threads>>>(stages, trees, nodes, leaves, subsets, features, integral, step, subsetSize, objects, scale, clWidth, clHeight);
}
}
}}}

@ -82,9 +82,48 @@ namespace cv { namespace gpu { namespace device {
{
__device__ __forceinline__ Feature(const Feature& other) {(void)other;}
__device__ __forceinline__ Feature() {}
__device__ __forceinline__ char operator() ()//(volatile int* ptr, int offset)
//feature as uchar x, y - left top, z,w - right bottom
__device__ __forceinline__ uchar operator() (unsigned int y, unsigned int x, uchar4 feature, const DevMem2Di integral) const
{
return char(0);
int x_off = 2 * feature.z;
int y_off = 2 * feature.w;
// load feature key points
int anchors[16];
anchors[0] = integral(y + feature.y, x + feature.x);
anchors[1] = integral(y + feature.y, x + feature.z);
anchors[2] = integral(y + feature.y, x + x_off + feature.x);
anchors[3] = integral(y + feature.y, x + x_off + feature.z);
anchors[4] = integral(y + feature.w, x + feature.x);
anchors[5] = integral(y + feature.w, x + feature.z);
anchors[6] = integral(y + feature.w, x + x_off + feature.x);
anchors[7] = integral(y + feature.w, x + x_off + feature.z);
anchors[8] = integral(y + y_off + feature.y, x + feature.x);
anchors[9] = integral(y + y_off + feature.y, x + feature.z);
anchors[10] = integral(y + y_off + feature.y, x + x_off + feature.x);
anchors[11] = integral(y + y_off + feature.y, x + x_off + feature.z);
anchors[12] = integral(y + y_off + feature.w, x + feature.x);
anchors[13] = integral(y + y_off + feature.w, x + feature.z);
anchors[14] = integral(y + y_off + feature.w, x + x_off + feature.x);
anchors[15] = integral(y + y_off + feature.w, x + x_off + feature.z);
// calculate feature
int sum = anchors[5] - anchors[6] - anchors[9] + anchors[10];
uchar response = (( (anchors[ 0] - anchors[ 1] - anchors[ 4] + anchors[ 5]) >= sum )? 128 : 0)
|(( (anchors[ 1] - anchors[ 2] - anchors[ 5] + anchors[ 6]) >= sum )? 64 : 0)
|(( (anchors[ 2] - anchors[ 3] - anchors[ 6] + anchors[ 7]) >= sum )? 32 : 0)
|(( (anchors[ 6] - anchors[ 7] - anchors[10] + anchors[11]) >= sum )? 16 : 0)
|(( (anchors[10] - anchors[11] - anchors[14] + anchors[15]) >= sum )? 8 : 0)
|(( (anchors[ 9] - anchors[10] - anchors[13] + anchors[14]) >= sum )? 4 : 0)
|(( (anchors[ 8] - anchors[ 9] - anchors[12] + anchors[13]) >= sum )? 2 : 0)
|(( (anchors[ 4] - anchors[ 5] - anchors[ 8] + anchors[ 9]) >= sum )? 1 : 0);
return response;
}
};

Loading…
Cancel
Save