update soft cascade interface: - add class Detection in interface, - split sync- and async- versions, - add support for detecting at the specific scale.

pull/158/head
marina.kolpakova 12 years ago
parent 612a258506
commit b52fea7fae
  1. 26
      modules/gpu/include/opencv2/gpu/gpu.hpp
  2. 45
      modules/gpu/src/cuda/isf-sc.cu
  3. 50
      modules/gpu/src/softcascade.cpp

@ -1537,6 +1537,18 @@ public:
class CV_EXPORTS SoftCascade
{
public:
struct CV_EXPORTS Detection
{
ushort x;
ushort y;
ushort w;
ushort h;
float confidence;
int kind;
enum {PEDESTRIAN = 0};
};
//! An empty cascade will be created.
SoftCascade();
@ -1559,9 +1571,19 @@ public:
//! Param rois is a mask
//! Param objects 4-channel matrix thet contain detected rectangles
//! Param rejectfactor used for final object box computing
//! Param stream
virtual void detectMultiScale(const GpuMat& image, const GpuMat& rois, GpuMat& objects,
int rejectfactor = 1, Stream stream = Stream::Null());
int rejectfactor = 1, int specificScale = -1);
//! detect specific objects on in the input frame for all scales computed flom minScale and maxscale values.
//! asynchronous version.
//! Param image is input frame for detector. Cascade will be applied to it.
//! Param rois is a mask
//! Param objects 4-channel matrix thet contain detected rectangles
//! Param rejectfactor used for final object box computing
//! Param ndet retrieves number of detections
//! Param stream wrapper for CUDA stream
virtual void detectMultiScale(const GpuMat& image, const GpuMat& rois, GpuMat& objects,
int rejectfactor, GpuMat& ndet, Stream stream);
private:
struct Filds;

@ -105,7 +105,7 @@ namespace icf {
float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
const float expected_new_area = farea * relScale * relScale;
float approx = sarea / expected_new_area;
float approx = __fdividef(sarea, expected_new_area);
dprintf("%d: new rect: %d box %d %d %d %d rel areas %f %f\n",threadIdx.x, (node.threshold >> 28),
scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
@ -198,12 +198,13 @@ namespace icf {
// }
__global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr)
const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr,
const int downscales)
{
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x;
Level level = levels[blockIdx.z];
Level level = levels[downscales + blockIdx.z];
if(x >= level.workRect.x || y >= level.workRect.y) return;
@ -236,7 +237,7 @@ namespace icf {
dprintf("%d: decided: %d (%d >= %f) %d %f\n\n" ,threadIdx.x, next, sum, threshold, lShift, impact);
dprintf("%d: extracted stage: %f\n",threadIdx.x, stages[(st + threadIdx.x)]);
dprintf("%d: computed score: %f\n",threadIdx.x, impact);
#pragma unroll
// scan on shuffl functions
for (int i = 1; i < 32; i *= 2)
{
@ -263,13 +264,13 @@ namespace icf {
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv,
PtrStepSz<uchar4> objects, PtrStepSzi counter)
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales)
{
int fw = 160;
int fh = 120;
dim3 block(32, 8);
dim3 grid(fw, fh / 8, 47);
dim3 grid(fw, fh / 8, downscales);
const Level* l = (const Level*)levels.ptr();
const Octave* oct = ((const Octave*)octaves.ptr());
@ -283,8 +284,38 @@ namespace icf {
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr);
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, 0);
cudaSafeCall( cudaGetLastError());
grid = dim3(fw, fh / 8, 47 - downscales);
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, downscales);
cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize());
}
void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
PtrStepSzi counter)
{
int fw = 160;
int fh = 120;
dim3 block(32, 8);
dim3 grid(fw, fh / 8, 1);
const Level* l = (const Level*)levels.ptr();
const Octave* oct = ((const Octave*)octaves.ptr());
const float* st = (const float*)stages.ptr();
const Node* nd = (const Node*)nodes.ptr();
const float* lf = (const float*)leaves.ptr();
uint* ctr = (uint*)counter.ptr();
Detection* det = (Detection*)objects.ptr();
uint max_det = objects.cols / sizeof(Detection);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, scale);
cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize());
}

@ -49,7 +49,11 @@ cv::gpu::SoftCascade::SoftCascade() : filds(0) { throw_nogpu(); }
cv::gpu::SoftCascade::SoftCascade( const string&, const float, const float) : filds(0) { throw_nogpu(); }
cv::gpu::SoftCascade::~SoftCascade() { throw_nogpu(); }
bool cv::gpu::SoftCascade::load( const string&, const float, const float) { throw_nogpu(); return false; }
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, Stream) { throw_nogpu();}
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, int) { throw_nogpu();}
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, int, GpuMat&, Stream)
{
throw_nogpu();
}
#else
@ -60,6 +64,9 @@ namespace icf {
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
const int fw, const int fh, const int bins);
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
PtrStepSzi counter, const int downscales);
void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
PtrStepSzi counter);
}
@ -86,6 +93,8 @@ struct cv::gpu::SoftCascade::Filds
int origObjWidth;
int origObjHeight;
int downscales;
GpuMat octaves;
GpuMat stages;
GpuMat nodes;
@ -120,7 +129,6 @@ struct cv::gpu::SoftCascade::Filds
FRAME_WIDTH = 640,
FRAME_HEIGHT = 480,
TOTAL_SCALES = 55,
// CLASSIFIERS = 5,
ORIG_OBJECT_WIDTH = 64,
ORIG_OBJECT_HEIGHT = 128,
HOG_BINS = 6,
@ -132,7 +140,14 @@ struct cv::gpu::SoftCascade::Filds
void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
{
cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int));
device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter);
device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter, downscales);
}
void detectAtScale(int scale, cv::gpu::GpuMat objects, cudaStream_t stream) const
{
cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int));
device::icf::detectAtScale(scale, levels, octaves, stages, nodes, leaves, hogluv, objects,
detCounter);
}
private:
@ -160,7 +175,7 @@ private:
}
};
inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs)
bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs)
{
using namespace device::icf;
minScale = mins;
@ -351,6 +366,7 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<device::ic
float logFactor = (::log(maxScale) - ::log(minScale)) / (nscales -1);
float scale = minScale;
downscales = 0;
for (int sc = 0; sc < nscales; ++sc)
{
int width = ::std::max(0.0f, frameW - (origObjWidth * scale));
@ -366,7 +382,10 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<device::ic
if (!width || !height)
break;
else
{
vlevels.push_back(level);
if (octs[fit].scale < 1) ++downscales;
}
if (::fabs(scale - maxScale) < FLT_EPSILON) break;
scale = ::std::min(maxScale, ::expf(::log(scale) + logFactor));
@ -424,8 +443,11 @@ namespace {
return s;
}
}
//================================== synchronous version ============================================================//
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/,
GpuMat& objects, const int /*rejectfactor*/, Stream s)
GpuMat& objects, const int /*rejectfactor*/, int specificScale)
{
// only color images are supperted
CV_Assert(colored.type() == CV_8UC3);
@ -513,11 +535,21 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
}
#endif
cudaStream_t stream = StreamAccessor::getStream(s);
flds.detect(objects, stream);
if (specificScale == -1)
flds.detect(objects, 0);
else
flds.detectAtScale(specificScale, objects, 0);
cv::Mat out(flds.detCounter);
int ndetections = *(out.data);
// cv::Mat out(flds.detCounter);
// std::cout << out << std::endl;
objects = GpuMat(objects, cv::Rect(0, 0, ndetections * sizeof(Detection), 1));
}
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, int, GpuMat&, Stream)
{
// cudaStream_t stream = StreamAccessor::getStream(s);
}
#endif
Loading…
Cancel
Save