refactor CUDA CascadeClassifier

pull/3600/head
Vladislav Vinogradov 10 years ago
parent 8257dc3c1e
commit 734212a402
  1. 99
      modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
  2. 26
      modules/cudaobjdetect/perf/perf_objdetect.cpp
  3. 690
      modules/cudaobjdetect/src/cascadeclassifier.cpp
  4. 33
      modules/cudaobjdetect/test/test_objdetect.cpp
  5. 60
      samples/gpu/cascadeclassifier.cpp

@ -75,7 +75,7 @@ namespace cv { namespace cuda {
- (Python) An example applying the HOG descriptor for people detection can be found at - (Python) An example applying the HOG descriptor for people detection can be found at
opencv_source_code/samples/python2/peopledetect.py opencv_source_code/samples/python2/peopledetect.py
*/ */
class CV_EXPORTS HOG : public cv::Algorithm class CV_EXPORTS HOG : public Algorithm
{ {
public: public:
enum enum
@ -204,87 +204,84 @@ public:
- A Nvidea API specific cascade classifier example can be found at - A Nvidea API specific cascade classifier example can be found at
opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp
*/ */
class CV_EXPORTS CascadeClassifier_CUDA class CV_EXPORTS CascadeClassifier : public Algorithm
{ {
public: public:
CascadeClassifier_CUDA();
/** @brief Loads the classifier from a file. Cascade type is detected automatically by constructor parameter. /** @brief Loads the classifier from a file. Cascade type is detected automatically by constructor parameter.
@param filename Name of the file from which the classifier is loaded. Only the old haar classifier @param filename Name of the file from which the classifier is loaded. Only the old haar classifier
(trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
type of OpenCV XML cascade supported for LBP. type of OpenCV XML cascade supported for LBP.
*/ */
CascadeClassifier_CUDA(const String& filename); static Ptr<CascadeClassifier> create(const String& filename);
~CascadeClassifier_CUDA(); /** @overload
*/
static Ptr<CascadeClassifier> create(const FileStorage& file);
/** @brief Checks whether the classifier is loaded or not. //! Maximum possible object size. Objects larger than that are ignored. Used for
*/ //! second signature and supported only for LBP cascades.
bool empty() const; virtual void setMaxObjectSize(Size maxObjectSize) = 0;
/** @brief Loads the classifier from a file. The previous content is destroyed. virtual Size getMaxObjectSize() const = 0;
@param filename Name of the file from which the classifier is loaded. Only the old haar classifier //! Minimum possible object size. Objects smaller than that are ignored.
(trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new virtual void setMinObjectSize(Size minSize) = 0;
type of OpenCV XML cascade supported for LBP. virtual Size getMinObjectSize() const = 0;
*/
bool load(const String& filename); //! Parameter specifying how much the image size is reduced at each image scale.
/** @brief Destroys the loaded classifier. virtual void setScaleFactor(double scaleFactor) = 0;
*/ virtual double getScaleFactor() const = 0;
void release();
//! Parameter specifying how many neighbors each candidate rectangle should have
//! to retain it.
virtual void setMinNeighbors(int minNeighbors) = 0;
virtual int getMinNeighbors() const = 0;
virtual void setFindLargestObject(bool findLargestObject) = 0;
virtual bool getFindLargestObject() = 0;
virtual void setMaxNumObjects(int maxNumObjects) = 0;
virtual int getMaxNumObjects() const = 0;
virtual Size getClassifierSize() const = 0;
/** @overload */
int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
/** @brief Detects objects of different sizes in the input image. /** @brief Detects objects of different sizes in the input image.
@param image Matrix of type CV_8U containing an image where objects should be detected. @param image Matrix of type CV_8U containing an image where objects should be detected.
@param objectsBuf Buffer to store detected objects (rectangles). If it is empty, it is allocated @param objects Buffer to store detected objects (rectangles).
with the default size. If not empty, the function searches not more than N objects, where
N = sizeof(objectsBufer's data)/sizeof(cv::Rect). To get final array of detected objects use CascadeClassifier::convert method.
@param maxObjectSize Maximum possible object size. Objects larger than that are ignored. Used for
second signature and supported only for LBP cascades.
@param scaleFactor Parameter specifying how much the image size is reduced at each image scale.
@param minNeighbors Parameter specifying how many neighbors each candidate rectangle should have
to retain it.
@param minSize Minimum possible object size. Objects smaller than that are ignored.
The detected objects are returned as a list of rectangles.
The function returns the number of detected objects, so you can retrieve them as in the following
example:
@code @code
cuda::CascadeClassifier_CUDA cascade_gpu(...); Ptr<cuda::CascadeClassifier> cascade_gpu = cuda::CascadeClassifier::create(...);
Mat image_cpu = imread(...) Mat image_cpu = imread(...)
GpuMat image_gpu(image_cpu); GpuMat image_gpu(image_cpu);
GpuMat objbuf; GpuMat objbuf;
int detections_number = cascade_gpu.detectMultiScale( image_gpu, cascade_gpu->detectMultiScale(image_gpu, objbuf);
objbuf, 1.2, minNeighbors);
Mat obj_host; std::vector<Rect> faces;
// download only detected number of rectangles cascade_gpu->convert(objbuf, faces);
objbuf.colRange(0, detections_number).download(obj_host);
Rect* faces = obj_host.ptr<Rect>();
for(int i = 0; i < detections_num; ++i) for(int i = 0; i < detections_num; ++i)
cv::rectangle(image_cpu, faces[i], Scalar(255)); cv::rectangle(image_cpu, faces[i], Scalar(255));
imshow("Faces", image_cpu); imshow("Faces", image_cpu);
@endcode @endcode
@sa CascadeClassifier::detectMultiScale @sa CascadeClassifier::detectMultiScale
*/ */
int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4); virtual void detectMultiScale(InputArray image,
OutputArray objects,
Stream& stream = Stream::Null()) = 0;
bool findLargestObject; /** @brief Converts objects array from internal representation to standard vector.
bool visualizeInPlace;
Size getClassifierSize() const; @param gpu_objects Objects array in internal representation.
@param objects Resulting array.
private: */
struct CascadeClassifierImpl; virtual void convert(OutputArray gpu_objects,
CascadeClassifierImpl* impl; std::vector<Rect>& objects) = 0;
struct HaarCascade;
struct LbpCascade;
friend class CascadeClassifier_CUDA_LBP;
}; };
//! @} //! @}

@ -107,18 +107,17 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
if (PERF_RUN_CUDA()) if (PERF_RUN_CUDA())
{ {
cv::cuda::CascadeClassifier_CUDA d_cascade; cv::Ptr<cv::cuda::CascadeClassifier> d_cascade =
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second))); cv::cuda::CascadeClassifier::create(perf::TestBase::getDataPath(GetParam().second));
const cv::cuda::GpuMat d_img(img); const cv::cuda::GpuMat d_img(img);
cv::cuda::GpuMat objects_buffer; cv::cuda::GpuMat objects_buffer;
int detections_num = 0;
TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer); TEST_CYCLE() d_cascade->detectMultiScale(d_img, objects_buffer);
std::vector<cv::Rect> gpu_rects;
d_cascade->convert(objects_buffer, gpu_rects);
std::vector<cv::Rect> gpu_rects(detections_num);
cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
cv::groupRectangles(gpu_rects, 3, 0.2); cv::groupRectangles(gpu_rects, 3, 0.2);
SANITY_CHECK(gpu_rects); SANITY_CHECK(gpu_rects);
} }
@ -146,18 +145,17 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
if (PERF_RUN_CUDA()) if (PERF_RUN_CUDA())
{ {
cv::cuda::CascadeClassifier_CUDA d_cascade; cv::Ptr<cv::cuda::CascadeClassifier> d_cascade =
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second))); cv::cuda::CascadeClassifier::create(perf::TestBase::getDataPath(GetParam().second));
const cv::cuda::GpuMat d_img(img); const cv::cuda::GpuMat d_img(img);
cv::cuda::GpuMat objects_buffer; cv::cuda::GpuMat objects_buffer;
int detections_num = 0;
TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer); TEST_CYCLE() d_cascade->detectMultiScale(d_img, objects_buffer);
std::vector<cv::Rect> gpu_rects;
d_cascade->convert(objects_buffer, gpu_rects);
std::vector<cv::Rect> gpu_rects(detections_num);
cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
cv::groupRectangles(gpu_rects, 3, 0.2); cv::groupRectangles(gpu_rects, 3, 0.2);
SANITY_CHECK(gpu_rects); SANITY_CHECK(gpu_rects);
} }

@ -48,160 +48,185 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA() { throw_no_cuda(); } Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA(const String&) { throw_no_cuda(); } Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
cv::cuda::CascadeClassifier_CUDA::~CascadeClassifier_CUDA() { throw_no_cuda(); }
bool cv::cuda::CascadeClassifier_CUDA::empty() const { throw_no_cuda(); return true; }
bool cv::cuda::CascadeClassifier_CUDA::load(const String&) { throw_no_cuda(); return true; }
Size cv::cuda::CascadeClassifier_CUDA::getClassifierSize() const { throw_no_cuda(); return Size();}
void cv::cuda::CascadeClassifier_CUDA::release() { throw_no_cuda(); }
int cv::cuda::CascadeClassifier_CUDA::detectMultiScale( const GpuMat&, GpuMat&, double, int, Size) {throw_no_cuda(); return -1;}
int cv::cuda::CascadeClassifier_CUDA::detectMultiScale( const GpuMat&, GpuMat&, Size, Size, double, int) {throw_no_cuda(); return -1;}
#else #else
struct cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl //
// CascadeClassifierBase
//
namespace
{ {
public: class CascadeClassifierBase : public cuda::CascadeClassifier
CascadeClassifierImpl(){} {
virtual ~CascadeClassifierImpl(){} public:
CascadeClassifierBase();
virtual unsigned int process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors, virtual void setMaxObjectSize(Size maxObjectSize) { maxObjectSize_ = maxObjectSize; }
bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize, cv::Size maxObjectSize) = 0; virtual Size getMaxObjectSize() const { return maxObjectSize_; }
virtual cv::Size getClassifierCvSize() const = 0; virtual void setMinObjectSize(Size minSize) { minObjectSize_ = minSize; }
virtual bool read(const String& classifierAsXml) = 0; virtual Size getMinObjectSize() const { return minObjectSize_; }
};
#ifndef HAVE_OPENCV_CUDALEGACY virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; }
virtual double getScaleFactor() const { return scaleFactor_; }
struct cv::cuda::CascadeClassifier_CUDA::HaarCascade : cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl virtual void setMinNeighbors(int minNeighbors) { minNeighbors_ = minNeighbors; }
{ virtual int getMinNeighbors() const { return minNeighbors_; }
public:
HaarCascade()
{
throw_no_cuda();
}
unsigned int process(const GpuMat&, GpuMat&, float, int, bool, bool, cv::Size, cv::Size) virtual void setFindLargestObject(bool findLargestObject) { findLargestObject_ = findLargestObject; }
{ virtual bool getFindLargestObject() { return findLargestObject_; }
throw_no_cuda();
return 0;
}
cv::Size getClassifierCvSize() const virtual void setMaxNumObjects(int maxNumObjects) { maxNumObjects_ = maxNumObjects; }
{ virtual int getMaxNumObjects() const { return maxNumObjects_; }
throw_no_cuda();
return cv::Size(); protected:
} Size maxObjectSize_;
Size minObjectSize_;
double scaleFactor_;
int minNeighbors_;
bool findLargestObject_;
int maxNumObjects_;
};
bool read(const String&) CascadeClassifierBase::CascadeClassifierBase() :
maxObjectSize_(),
minObjectSize_(),
scaleFactor_(1.2),
minNeighbors_(4),
findLargestObject_(false),
maxNumObjects_(100)
{ {
throw_no_cuda();
return false;
} }
}; }
#else //
// HaarCascade
//
#ifdef HAVE_OPENCV_CUDALEGACY
struct cv::cuda::CascadeClassifier_CUDA::HaarCascade : cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl namespace
{ {
public: class HaarCascade_Impl : public CascadeClassifierBase
HaarCascade() : lastAllocatedFrameSize(-1, -1)
{ {
ncvSetDebugOutputHandler(NCVDebugOutputHandler); public:
} explicit HaarCascade_Impl(const String& filename);
bool read(const String& filename) virtual Size getClassifierSize() const;
{
ncvSafeCall( load(filename) );
return true;
}
NCVStatus process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors, virtual void detectMultiScale(InputArray image,
bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize, OutputArray objects,
/*out*/unsigned int& numDetections) Stream& stream);
{
calculateMemReqsAndAllocate(src.size());
NCVMemPtr src_beg; virtual void convert(OutputArray gpu_objects,
src_beg.ptr = (void*)src.ptr<Ncv8u>(); std::vector<Rect>& objects);
src_beg.memtype = NCVMemoryTypeDevice;
NCVMemSegment src_seg; private:
src_seg.begin = src_beg; NCVStatus load(const String& classifierFile);
src_seg.size = src.step * src.rows; NCVStatus calculateMemReqsAndAllocate(const Size& frameSize);
NCVStatus process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections);
NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true); Size lastAllocatedFrameSize;
ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
CV_Assert(objects.rows == 1); Ptr<NCVMemStackAllocator> gpuAllocator;
Ptr<NCVMemStackAllocator> cpuAllocator;
NCVMemPtr objects_beg; cudaDeviceProp devProp;
objects_beg.ptr = (void*)objects.ptr<NcvRect32u>(); NCVStatus ncvStat;
objects_beg.memtype = NCVMemoryTypeDevice;
NCVMemSegment objects_seg; Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
objects_seg.begin = objects_beg; Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
objects_seg.size = objects.step * objects.rows;
NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
NcvSize32u roi; Ptr<NCVVectorAlloc<HaarStage64> > h_haarStages;
roi.width = d_src.width(); Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
roi.height = d_src.height(); Ptr<NCVVectorAlloc<HaarFeature64> > h_haarFeatures;
NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height); HaarClassifierCascadeDescriptor haar;
Ncv32u flags = 0; Ptr<NCVVectorAlloc<HaarStage64> > d_haarStages;
flags |= findLargestObject? NCVPipeObjDet_FindLargestObject : 0; Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
flags |= visualizeInPlace ? NCVPipeObjDet_VisualizeInPlace : 0; Ptr<NCVVectorAlloc<HaarFeature64> > d_haarFeatures;
};
ncvStat = ncvDetectObjectsMultiScale_device( static void NCVDebugOutputHandler(const String &msg)
d_src, roi, d_rects, numDetections, haar, *h_haarStages, {
*d_haarStages, *d_haarNodes, *d_haarFeatures, CV_Error(Error::GpuApiCallError, msg.c_str());
winMinSize, }
minNeighbors,
scaleStep, 1,
flags,
*gpuAllocator, *cpuAllocator, devProp, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
return NCV_SUCCESS; HaarCascade_Impl::HaarCascade_Impl(const String& filename) :
lastAllocatedFrameSize(-1, -1)
{
ncvSetDebugOutputHandler(NCVDebugOutputHandler);
ncvSafeCall( load(filename) );
} }
unsigned int process(const GpuMat& image, GpuMat& objectsBuf, float scaleFactor, int minNeighbors, Size HaarCascade_Impl::getClassifierSize() const
bool findLargestObject, bool visualizeInPlace, cv::Size minSize, cv::Size /*maxObjectSize*/)
{ {
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U); return Size(haar.ClassifierSize.width, haar.ClassifierSize.height);
}
const int defaultObjSearchNum = 100; void HaarCascade_Impl::detectMultiScale(InputArray _image,
if (objectsBuf.empty()) OutputArray _objects,
{ Stream& stream)
objectsBuf.create(1, defaultObjSearchNum, DataType<Rect>::type); {
} const GpuMat image = _image.getGpuMat();
cv::Size ncvMinSize = this->getClassifierCvSize(); CV_Assert( image.depth() == CV_8U);
CV_Assert( scaleFactor_ > 1 );
CV_Assert( !stream );
if (ncvMinSize.width < minSize.width && ncvMinSize.height < minSize.height) Size ncvMinSize = getClassifierSize();
if (ncvMinSize.width < minObjectSize_.width && ncvMinSize.height < minObjectSize_.height)
{ {
ncvMinSize.width = minSize.width; ncvMinSize.width = minObjectSize_.width;
ncvMinSize.height = minSize.height; ncvMinSize.height = minObjectSize_.height;
} }
BufferPool pool(stream);
GpuMat objectsBuf = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
unsigned int numDetections; unsigned int numDetections;
ncvSafeCall(this->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections)); ncvSafeCall( process(image, objectsBuf, ncvMinSize, numDetections) );
return numDetections; if (numDetections > 0)
{
objectsBuf.colRange(0, numDetections).copyTo(_objects);
}
else
{
_objects.release();
}
} }
cv::Size getClassifierCvSize() const { return cv::Size(haar.ClassifierSize.width, haar.ClassifierSize.height); } void HaarCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
{
if (_gpu_objects.empty())
{
objects.clear();
return;
}
Mat gpu_objects;
if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
{
_gpu_objects.getGpuMat().download(gpu_objects);
}
else
{
gpu_objects = _gpu_objects.getMat();
}
CV_Assert( gpu_objects.rows == 1 );
CV_Assert( gpu_objects.type() == DataType<Rect>::type );
private: Rect* ptr = gpu_objects.ptr<Rect>();
static void NCVDebugOutputHandler(const String &msg) { CV_Error(cv::Error::GpuApiCallError, msg.c_str()); } objects.assign(ptr, ptr + gpu_objects.cols);
}
NCVStatus load(const String& classifierFile) NCVStatus HaarCascade_Impl::load(const String& classifierFile)
{ {
int devId = cv::cuda::getDevice(); int devId = cv::cuda::getDevice();
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR); ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
@ -246,7 +271,7 @@ private:
return NCV_SUCCESS; return NCV_SUCCESS;
} }
NCVStatus calculateMemReqsAndAllocate(const Size& frameSize) NCVStatus HaarCascade_Impl::calculateMemReqsAndAllocate(const Size& frameSize)
{ {
if (lastAllocatedFrameSize == frameSize) if (lastAllocatedFrameSize == frameSize)
{ {
@ -289,88 +314,62 @@ private:
return NCV_SUCCESS; return NCV_SUCCESS;
} }
cudaDeviceProp devProp; NCVStatus HaarCascade_Impl::process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections)
NCVStatus ncvStat; {
calculateMemReqsAndAllocate(src.size());
Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
Ptr<NCVVectorAlloc<HaarStage64> > h_haarStages;
Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
Ptr<NCVVectorAlloc<HaarFeature64> > h_haarFeatures;
HaarClassifierCascadeDescriptor haar;
Ptr<NCVVectorAlloc<HaarStage64> > d_haarStages;
Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
Ptr<NCVVectorAlloc<HaarFeature64> > d_haarFeatures;
Size lastAllocatedFrameSize; NCVMemPtr src_beg;
src_beg.ptr = (void*)src.ptr<Ncv8u>();
src_beg.memtype = NCVMemoryTypeDevice;
Ptr<NCVMemStackAllocator> gpuAllocator; NCVMemSegment src_seg;
Ptr<NCVMemStackAllocator> cpuAllocator; src_seg.begin = src_beg;
src_seg.size = src.step * src.rows;
virtual ~HaarCascade(){} NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true);
}; ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
#endif CV_Assert(objects.rows == 1);
cv::Size operator -(const cv::Size& a, const cv::Size& b) NCVMemPtr objects_beg;
{ objects_beg.ptr = (void*)objects.ptr<NcvRect32u>();
return cv::Size(a.width - b.width, a.height - b.height); objects_beg.memtype = NCVMemoryTypeDevice;
}
cv::Size operator +(const cv::Size& a, const int& i) NCVMemSegment objects_seg;
{ objects_seg.begin = objects_beg;
return cv::Size(a.width + i, a.height + i); objects_seg.size = objects.step * objects.rows;
} NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
cv::Size operator *(const cv::Size& a, const float& f) NcvSize32u roi;
{ roi.width = d_src.width();
return cv::Size(cvRound(a.width * f), cvRound(a.height * f)); roi.height = d_src.height();
}
cv::Size operator /(const cv::Size& a, const float& f) NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height);
{
return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
}
bool operator <=(const cv::Size& a, const cv::Size& b) Ncv32u flags = 0;
{ flags |= findLargestObject_ ? NCVPipeObjDet_FindLargestObject : 0;
return a.width <= b.width && a.height <= b.width;
}
struct PyrLavel ncvStat = ncvDetectObjectsMultiScale_device(
{ d_src, roi, d_rects, numDetections, haar, *h_haarStages,
PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize) *d_haarStages, *d_haarNodes, *d_haarFeatures,
{ winMinSize,
do minNeighbors_,
{ scaleFactor_, 1,
order = _order; flags,
scale = pow(_scale, order); *gpuAllocator, *cpuAllocator, devProp, 0);
sFrame = frame / scale; ncvAssertReturnNcvStat(ncvStat);
workArea = sFrame - window + 1; ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
sWindow = window * scale;
_order++;
} while (sWindow <= minObjectSize);
}
bool isFeasible(cv::Size maxObj) return NCV_SUCCESS;
{
return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
} }
}
PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize) #endif
{
return PyrLavel(order + 1, factor, frame, window, minObjectSize);
}
int order; //
float scale; // LbpCascade
cv::Size sFrame; //
cv::Size workArea;
cv::Size sWindow;
};
namespace cv { namespace cuda { namespace device namespace cv { namespace cuda { namespace device
{ {
@ -394,42 +393,154 @@ namespace cv { namespace cuda { namespace device
unsigned int* classified, unsigned int* classified,
PtrStepSzi integral); PtrStepSzi integral);
void connectedConmonents(PtrStepSz<int4> candidates, int ncandidates, PtrStepSz<int4> objects,int groupThreshold, float grouping_eps, unsigned int* nclasses); void connectedConmonents(PtrStepSz<int4> candidates,
int ncandidates,
PtrStepSz<int4> objects,
int groupThreshold,
float grouping_eps,
unsigned int* nclasses);
} }
}}} }}}
struct cv::cuda::CascadeClassifier_CUDA::LbpCascade : cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl namespace
{ {
public: cv::Size operator -(const cv::Size& a, const cv::Size& b)
struct Stage {
return cv::Size(a.width - b.width, a.height - b.height);
}
cv::Size operator +(const cv::Size& a, const int& i)
{
return cv::Size(a.width + i, a.height + i);
}
cv::Size operator *(const cv::Size& a, const float& f)
{
return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
}
cv::Size operator /(const cv::Size& a, const float& f)
{
return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
}
bool operator <=(const cv::Size& a, const cv::Size& b)
{
return a.width <= b.width && a.height <= b.width;
}
struct PyrLavel
{ {
int first; PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize)
int ntrees; {
float threshold; do
{
order = _order;
scale = pow(_scale, order);
sFrame = frame / scale;
workArea = sFrame - window + 1;
sWindow = window * scale;
_order++;
} while (sWindow <= minObjectSize);
}
bool isFeasible(cv::Size maxObj)
{
return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
}
PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize)
{
return PyrLavel(order + 1, factor, frame, window, minObjectSize);
}
int order;
float scale;
cv::Size sFrame;
cv::Size workArea;
cv::Size sWindow;
};
class LbpCascade_Impl : public CascadeClassifierBase
{
public:
explicit LbpCascade_Impl(const FileStorage& file);
virtual Size getClassifierSize() const { return NxM; }
virtual void detectMultiScale(InputArray image,
OutputArray objects,
Stream& stream);
virtual void convert(OutputArray gpu_objects,
std::vector<Rect>& objects);
private:
bool load(const FileNode &root);
void allocateBuffers(cv::Size frame);
private:
struct Stage
{
int first;
int ntrees;
float threshold;
};
enum stage { BOOST = 0 };
enum feature { LBP = 1, HAAR = 2 };
static const stage stageType = BOOST;
static const feature featureType = LBP;
cv::Size NxM;
bool isStumps;
int ncategories;
int subsetSize;
int nodeStep;
// gpu representation of classifier
GpuMat stage_mat;
GpuMat trees_mat;
GpuMat nodes_mat;
GpuMat leaves_mat;
GpuMat subsets_mat;
GpuMat features_mat;
GpuMat integral;
GpuMat integralBuffer;
GpuMat resuzeBuffer;
GpuMat candidates;
static const int integralFactor = 4;
}; };
LbpCascade(){} LbpCascade_Impl::LbpCascade_Impl(const FileStorage& file)
virtual ~LbpCascade(){} {
load(file.getFirstTopLevelNode());
}
virtual unsigned int process(const GpuMat& image, GpuMat& objects, float scaleFactor, int groupThreshold, bool /*findLargestObject*/, void LbpCascade_Impl::detectMultiScale(InputArray _image,
bool /*visualizeInPlace*/, cv::Size minObjectSize, cv::Size maxObjectSize) OutputArray _objects,
Stream& stream)
{ {
CV_Assert(scaleFactor > 1 && image.depth() == CV_8U); const GpuMat image = _image.getGpuMat();
CV_Assert( image.depth() == CV_8U);
CV_Assert( scaleFactor_ > 1 );
CV_Assert( !stream );
// const int defaultObjSearchNum = 100;
const float grouping_eps = 0.2f; const float grouping_eps = 0.2f;
if( !objects.empty() && objects.depth() == CV_32S) BufferPool pool(stream);
objects.reshape(4, 1); GpuMat objects = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
else
objects.create(1 , image.cols >> 4, CV_32SC4);
// used for debug // used for debug
// candidates.setTo(cv::Scalar::all(0)); // candidates.setTo(cv::Scalar::all(0));
// objects.setTo(cv::Scalar::all(0)); // objects.setTo(cv::Scalar::all(0));
if (maxObjectSize == cv::Size()) if (maxObjectSize_ == cv::Size())
maxObjectSize = image.size(); maxObjectSize_ = image.size();
allocateBuffers(image.size()); allocateBuffers(image.size());
@ -437,9 +548,9 @@ public:
GpuMat dclassified(1, 1, CV_32S); GpuMat dclassified(1, 1, CV_32S);
cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) ); cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
PyrLavel level(0, scaleFactor, image.size(), NxM, minObjectSize); PyrLavel level(0, scaleFactor_, image.size(), NxM, minObjectSize_);
while (level.isFeasible(maxObjectSize)) while (level.isFeasible(maxObjectSize_))
{ {
int acc = level.sFrame.width + 1; int acc = level.sFrame.width + 1;
float iniScale = level.scale; float iniScale = level.scale;
@ -449,7 +560,7 @@ public:
int total = 0, prev = 0; int total = 0, prev = 0;
while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize)) while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize_))
{ {
// create sutable matrix headers // create sutable matrix headers
GpuMat src = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height)); GpuMat src = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height));
@ -465,7 +576,7 @@ public:
total += totalWidth * (level.workArea.height / step); total += totalWidth * (level.workArea.height / step);
// go to next pyramide level // go to next pyramide level
level = level.next(scaleFactor, image.size(), NxM, minObjectSize); level = level.next(scaleFactor_, image.size(), NxM, minObjectSize_);
area = level.workArea; area = level.workArea;
step = (1 + (level.scale <= 2.f)); step = (1 + (level.scale <= 2.f));
@ -473,60 +584,55 @@ public:
acc += level.sFrame.width + 1; acc += level.sFrame.width + 1;
} }
device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor_, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral); leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral);
} }
if (groupThreshold <= 0 || objects.empty()) if (minNeighbors_ <= 0 || objects.empty())
return 0; return;
cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
device::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr<unsigned int>()); device::lbp::connectedConmonents(candidates, classified, objects, minNeighbors_, grouping_eps, dclassified.ptr<unsigned int>());
cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
return classified;
}
virtual cv::Size getClassifierCvSize() const { return NxM; }
bool read(const String& classifierAsXml) if (classified > 0)
{ {
FileStorage fs(classifierAsXml, FileStorage::READ); objects.colRange(0, classified).copyTo(_objects);
return fs.isOpened() ? read(fs.getFirstTopLevelNode()) : false; }
else
{
_objects.release();
}
} }
private: void LbpCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
void allocateBuffers(cv::Size frame)
{ {
if (frame == cv::Size()) if (_gpu_objects.empty())
{
objects.clear();
return; return;
}
if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows) Mat gpu_objects;
if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
{ {
resuzeBuffer.create(frame, CV_8UC1); _gpu_objects.getGpuMat().download(gpu_objects);
}
integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1); else
{
#ifdef HAVE_OPENCV_CUDALEGACY gpu_objects = _gpu_objects.getMat();
NcvSize32u roiSize; }
roiSize.width = frame.width;
roiSize.height = frame.height;
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
Ncv32u bufSize; CV_Assert( gpu_objects.rows == 1 );
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) ); CV_Assert( gpu_objects.type() == DataType<Rect>::type );
integralBuffer.create(1, bufSize, CV_8UC1);
#endif
candidates.create(1 , frame.width >> 1, CV_32SC4); Rect* ptr = gpu_objects.ptr<Rect>();
} objects.assign(ptr, ptr + gpu_objects.cols);
} }
bool read(const FileNode &root) bool LbpCascade_Impl::load(const FileNode &root)
{ {
const char *CUDA_CC_STAGE_TYPE = "stageType"; const char *CUDA_CC_STAGE_TYPE = "stageType";
const char *CUDA_CC_FEATURE_TYPE = "featureType"; const char *CUDA_CC_FEATURE_TYPE = "featureType";
@ -667,92 +773,90 @@ private:
return true; return true;
} }
enum stage { BOOST = 0 }; void LbpCascade_Impl::allocateBuffers(cv::Size frame)
enum feature { LBP = 1, HAAR = 2 }; {
static const stage stageType = BOOST; if (frame == cv::Size())
static const feature featureType = LBP; return;
cv::Size NxM;
bool isStumps;
int ncategories;
int subsetSize;
int nodeStep;
// gpu representation of classifier
GpuMat stage_mat;
GpuMat trees_mat;
GpuMat nodes_mat;
GpuMat leaves_mat;
GpuMat subsets_mat;
GpuMat features_mat;
GpuMat integral;
GpuMat integralBuffer;
GpuMat resuzeBuffer;
GpuMat candidates;
static const int integralFactor = 4;
};
cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA() if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
: findLargestObject(false), visualizeInPlace(false), impl(0) {} {
resuzeBuffer.create(frame, CV_8UC1);
cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA(const String& filename) integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
: findLargestObject(false), visualizeInPlace(false), impl(0) { load(filename); }
cv::cuda::CascadeClassifier_CUDA::~CascadeClassifier_CUDA() { release(); } #ifdef HAVE_OPENCV_CUDALEGACY
NcvSize32u roiSize;
roiSize.width = frame.width;
roiSize.height = frame.height;
void cv::cuda::CascadeClassifier_CUDA::release() { if (impl) { delete impl; impl = 0; } } cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
bool cv::cuda::CascadeClassifier_CUDA::empty() const { return impl == 0; } Ncv32u bufSize;
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
integralBuffer.create(1, bufSize, CV_8UC1);
#endif
Size cv::cuda::CascadeClassifier_CUDA::getClassifierSize() const candidates.create(1 , frame.width >> 1, CV_32SC4);
{ }
return this->empty() ? Size() : impl->getClassifierCvSize(); }
}
int cv::cuda::CascadeClassifier_CUDA::detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor, int minNeighbors, Size minSize)
{
CV_Assert( !this->empty());
return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, cv::Size());
} }
int cv::cuda::CascadeClassifier_CUDA::detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize, double scaleFactor, int minNeighbors) //
{ // create
CV_Assert( !this->empty()); //
return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, maxObjectSize);
}
bool cv::cuda::CascadeClassifier_CUDA::load(const String& filename) Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String& filename)
{ {
release();
String fext = filename.substr(filename.find_last_of(".") + 1); String fext = filename.substr(filename.find_last_of(".") + 1);
fext = fext.toLowerCase(); fext = fext.toLowerCase();
if (fext == "nvbin") if (fext == "nvbin")
{ {
impl = new HaarCascade(); #ifndef HAVE_OPENCV_CUDALEGACY
return impl->read(filename); CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
return Ptr<cuda::CascadeClassifier>();
#else
return makePtr<HaarCascade_Impl>(filename);
#endif
} }
FileStorage fs(filename, FileStorage::READ); FileStorage fs(filename, FileStorage::READ);
if (!fs.isOpened()) if (!fs.isOpened())
{ {
impl = new HaarCascade(); #ifndef HAVE_OPENCV_CUDALEGACY
return impl->read(filename); CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
return Ptr<cuda::CascadeClassifier>();
#else
return makePtr<HaarCascade_Impl>(filename);
#endif
} }
const char *CUDA_CC_LBP = "LBP"; const char *CUDA_CC_LBP = "LBP";
String featureTypeStr = (String)fs.getFirstTopLevelNode()["featureType"]; String featureTypeStr = (String)fs.getFirstTopLevelNode()["featureType"];
if (featureTypeStr == CUDA_CC_LBP) if (featureTypeStr == CUDA_CC_LBP)
impl = new LbpCascade(); {
return makePtr<LbpCascade_Impl>(fs);
}
else else
impl = new HaarCascade(); {
#ifndef HAVE_OPENCV_CUDALEGACY
CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
return Ptr<cuda::CascadeClassifier>();
#else
return makePtr<HaarCascade_Impl>(filename);
#endif
}
impl->read(filename); CV_Error(Error::StsUnsupportedFormat, "Unsupported format for CUDA CascadeClassifier");
return !this->empty(); return Ptr<cuda::CascadeClassifier>();
}
Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage& file)
{
return makePtr<LbpCascade_Impl>(file);
} }
#endif #endif

@ -287,9 +287,15 @@ PARAM_TEST_CASE(LBP_Read_classifier, cv::cuda::DeviceInfo, int)
CUDA_TEST_P(LBP_Read_classifier, Accuracy) CUDA_TEST_P(LBP_Read_classifier, Accuracy)
{ {
cv::cuda::CascadeClassifier_CUDA classifier;
std::string classifierXmlPath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/lbpcascade_frontalface.xml"; std::string classifierXmlPath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/lbpcascade_frontalface.xml";
ASSERT_TRUE(classifier.load(classifierXmlPath));
cv::Ptr<cv::cuda::CascadeClassifier> d_cascade;
ASSERT_NO_THROW(
d_cascade = cv::cuda::CascadeClassifier::create(classifierXmlPath);
);
ASSERT_FALSE(d_cascade.empty());
} }
INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, LBP_Read_classifier, INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, LBP_Read_classifier,
@ -329,29 +335,28 @@ CUDA_TEST_P(LBP_classify, Accuracy)
for (; it != rects.end(); ++it) for (; it != rects.end(); ++it)
cv::rectangle(markedImage, *it, cv::Scalar(255, 0, 0)); cv::rectangle(markedImage, *it, cv::Scalar(255, 0, 0));
cv::cuda::CascadeClassifier_CUDA gpuClassifier; cv::Ptr<cv::cuda::CascadeClassifier> gpuClassifier =
ASSERT_TRUE(gpuClassifier.load(classifierXmlPath)); cv::cuda::CascadeClassifier::create(classifierXmlPath);
cv::cuda::GpuMat gpu_rects;
cv::cuda::GpuMat tested(grey); cv::cuda::GpuMat tested(grey);
int count = gpuClassifier.detectMultiScale(tested, gpu_rects); cv::cuda::GpuMat gpu_rects_buf;
gpuClassifier->detectMultiScale(tested, gpu_rects_buf);
std::vector<cv::Rect> gpu_rects;
gpuClassifier->convert(gpu_rects_buf, gpu_rects);
#if defined (LOG_CASCADE_STATISTIC) #if defined (LOG_CASCADE_STATISTIC)
cv::Mat downloaded(gpu_rects); for (size_t i = 0; i < gpu_rects.size(); i++)
const cv::Rect* faces = downloaded.ptr<cv::Rect>();
for (int i = 0; i < count; i++)
{ {
cv::Rect r = faces[i]; cv::Rect r = gpu_rects[i];
std::cout << r.x << " " << r.y << " " << r.width << " " << r.height << std::endl; std::cout << r.x << " " << r.y << " " << r.width << " " << r.height << std::endl;
cv::rectangle(markedImage, r , CV_RGB(255, 0, 0)); cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
} }
#endif
#if defined (LOG_CASCADE_STATISTIC) cv::imshow("Res", markedImage);
cv::imshow("Res", markedImage); cv::waitKey(); cv::waitKey();
#endif #endif
(void)count;
} }
INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, LBP_classify, INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, LBP_classify,

@ -173,13 +173,9 @@ int main(int argc, const char *argv[])
} }
} }
CascadeClassifier_CUDA cascade_gpu; Ptr<cuda::CascadeClassifier> cascade_gpu = cuda::CascadeClassifier::create(cascadeName);
if (!cascade_gpu.load(cascadeName))
{
return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
}
CascadeClassifier cascade_cpu; cv::CascadeClassifier cascade_cpu;
if (!cascade_cpu.load(cascadeName)) if (!cascade_cpu.load(cascadeName))
{ {
return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1; return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
@ -206,8 +202,8 @@ int main(int argc, const char *argv[])
namedWindow("result", 1); namedWindow("result", 1);
Mat frame, frame_cpu, gray_cpu, resized_cpu, faces_downloaded, frameDisp; Mat frame, frame_cpu, gray_cpu, resized_cpu, frameDisp;
vector<Rect> facesBuf_cpu; vector<Rect> faces;
GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu; GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu;
@ -218,7 +214,6 @@ int main(int argc, const char *argv[])
bool filterRects = true; bool filterRects = true;
bool helpScreen = false; bool helpScreen = false;
int detections_num;
for (;;) for (;;)
{ {
if (isInputCamera || isInputVideo) if (isInputCamera || isInputVideo)
@ -241,40 +236,26 @@ int main(int argc, const char *argv[])
if (useGPU) if (useGPU)
{ {
//cascade_gpu.visualizeInPlace = true; cascade_gpu->setFindLargestObject(findLargestObject);
cascade_gpu.findLargestObject = findLargestObject; cascade_gpu->setScaleFactor(1.2);
cascade_gpu->setMinNeighbors((filterRects || findLargestObject) ? 4 : 0);
detections_num = cascade_gpu.detectMultiScale(resized_gpu, facesBuf_gpu, 1.2, cascade_gpu->detectMultiScale(resized_gpu, facesBuf_gpu);
(filterRects || findLargestObject) ? 4 : 0); cascade_gpu->convert(facesBuf_gpu, faces);
facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded);
} }
else else
{ {
Size minSize = cascade_gpu.getClassifierSize(); Size minSize = cascade_gpu->getClassifierSize();
cascade_cpu.detectMultiScale(resized_cpu, facesBuf_cpu, 1.2, cascade_cpu.detectMultiScale(resized_cpu, faces, 1.2,
(filterRects || findLargestObject) ? 4 : 0, (filterRects || findLargestObject) ? 4 : 0,
(findLargestObject ? CASCADE_FIND_BIGGEST_OBJECT : 0) (findLargestObject ? CASCADE_FIND_BIGGEST_OBJECT : 0)
| CASCADE_SCALE_IMAGE, | CASCADE_SCALE_IMAGE,
minSize); minSize);
detections_num = (int)facesBuf_cpu.size();
}
if (!useGPU && detections_num)
{
for (int i = 0; i < detections_num; ++i)
{
rectangle(resized_cpu, facesBuf_cpu[i], Scalar(255));
}
} }
if (useGPU) for (size_t i = 0; i < faces.size(); ++i)
{ {
resized_gpu.download(resized_cpu); rectangle(resized_cpu, faces[i], Scalar(255));
for (int i = 0; i < detections_num; ++i)
{
rectangle(resized_cpu, faces_downloaded.ptr<cv::Rect>()[i], Scalar(255));
}
} }
tm.stop(); tm.stop();
@ -283,16 +264,15 @@ int main(int argc, const char *argv[])
//print detections to console //print detections to console
cout << setfill(' ') << setprecision(2); cout << setfill(' ') << setprecision(2);
cout << setw(6) << fixed << fps << " FPS, " << detections_num << " det"; cout << setw(6) << fixed << fps << " FPS, " << faces.size() << " det";
if ((filterRects || findLargestObject) && detections_num > 0) if ((filterRects || findLargestObject) && !faces.empty())
{ {
Rect *faceRects = useGPU ? faces_downloaded.ptr<Rect>() : &facesBuf_cpu[0]; for (size_t i = 0; i < faces.size(); ++i)
for (int i = 0; i < min(detections_num, 2); ++i)
{ {
cout << ", [" << setw(4) << faceRects[i].x cout << ", [" << setw(4) << faces[i].x
<< ", " << setw(4) << faceRects[i].y << ", " << setw(4) << faces[i].y
<< ", " << setw(4) << faceRects[i].width << ", " << setw(4) << faces[i].width
<< ", " << setw(4) << faceRects[i].height << "]"; << ", " << setw(4) << faces[i].height << "]";
} }
} }
cout << endl; cout << endl;

Loading…
Cancel
Save