Only for VS2008 now. Sample for it. new NPP_staging for VS2008 onlypull/13383/head
parent
31e582e314
commit
1a94186195
17 changed files with 6066 additions and 185 deletions
Binary file not shown.
@ -0,0 +1,362 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other materials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors "as is" and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#include <cstdio> |
||||||
|
#include <cuda_runtime.h> |
||||||
|
|
||||||
|
#define CV_NO_BACKWARD_COMPATIBILITY |
||||||
|
|
||||||
|
#include "opencv2/opencv.hpp" |
||||||
|
|
||||||
|
#include "NCVHaarObjectDetection.hpp" |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace std; |
||||||
|
|
||||||
|
const Size preferredVideoFrameSize(640, 480); |
||||||
|
|
||||||
|
string preferredClassifier = "haarcascade_frontalface_alt.xml"; |
||||||
|
string wndTitle = "NVIDIA Computer Vision SDK :: Face Detection in Video Feed"; |
||||||
|
|
||||||
|
|
||||||
|
void printSyntax(void) |
||||||
|
{ |
||||||
|
printf("Syntax: FaceDetectionFeed.exe [-c cameranum | -v filename] classifier.xml\n"); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
void imagePrintf(Mat& img, int lineOffsY, Scalar color, const char *format, ...) |
||||||
|
{ |
||||||
|
int fontFace = CV_FONT_HERSHEY_PLAIN; |
||||||
|
double fontScale = 1; |
||||||
|
|
||||||
|
int baseline; |
||||||
|
Size textSize = cv::getTextSize("T", fontFace, fontScale, 1, &baseline); |
||||||
|
|
||||||
|
va_list arg_ptr; |
||||||
|
va_start(arg_ptr, format); |
||||||
|
int len = _vscprintf(format, arg_ptr) + 1; |
||||||
|
|
||||||
|
vector<char> strBuf(len); |
||||||
|
vsprintf_s(&strBuf[0], len, format, arg_ptr); |
||||||
|
|
||||||
|
Point org(1, 3 * textSize.height * (lineOffsY + 1) / 2); |
||||||
|
putText(img, &strBuf[0], org, fontFace, fontScale, color); |
||||||
|
va_end(arg_ptr); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus process(Mat *srcdst, |
||||||
|
Ncv32u width, Ncv32u height, |
||||||
|
NcvBool bShowAllHypotheses, NcvBool bLargestFace, |
||||||
|
HaarClassifierCascadeDescriptor &haar, |
||||||
|
NCVVector<HaarStage64> &d_haarStages, NCVVector<HaarClassifierNode128> &d_haarNodes, |
||||||
|
NCVVector<HaarFeature64> &d_haarFeatures, NCVVector<HaarStage64> &h_haarStages, |
||||||
|
INCVMemAllocator &gpuAllocator, |
||||||
|
INCVMemAllocator &cpuAllocator, |
||||||
|
cudaDeviceProp &devProp) |
||||||
|
{ |
||||||
|
ncvAssertReturn(!((srcdst == NULL) ^ gpuAllocator.isCounting()), NCV_NULL_PTR); |
||||||
|
|
||||||
|
NCVStatus ncvStat; |
||||||
|
|
||||||
|
NCV_SET_SKIP_COND(gpuAllocator.isCounting()); |
||||||
|
|
||||||
|
NCVMatrixAlloc<Ncv8u> d_src(gpuAllocator, width, height); |
||||||
|
ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); |
||||||
|
NCVMatrixAlloc<Ncv8u> h_src(cpuAllocator, width, height); |
||||||
|
ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); |
||||||
|
NCVVectorAlloc<NcvRect32u> d_rects(gpuAllocator, 100); |
||||||
|
ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); |
||||||
|
|
||||||
|
Mat h_src_hdr(Size(width, height), CV_8U, h_src.ptr(), h_src.stride()); |
||||||
|
|
||||||
|
NCV_SKIP_COND_BEGIN |
||||||
|
|
||||||
|
(*srcdst).copyTo(h_src_hdr); |
||||||
|
|
||||||
|
ncvStat = h_src.copySolid(d_src, 0); |
||||||
|
ncvAssertReturnNcvStat(ncvStat); |
||||||
|
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); |
||||||
|
|
||||||
|
NCV_SKIP_COND_END |
||||||
|
|
||||||
|
NcvSize32u roi; |
||||||
|
roi.width = d_src.width(); |
||||||
|
roi.height = d_src.height(); |
||||||
|
|
||||||
|
Ncv32u numDetections; |
||||||
|
ncvStat = ncvDetectObjectsMultiScale_device( |
||||||
|
d_src, roi, d_rects, numDetections, haar, h_haarStages, |
||||||
|
d_haarStages, d_haarNodes, d_haarFeatures, |
||||||
|
haar.ClassifierSize, |
||||||
|
bShowAllHypotheses ? 0 : 4, |
||||||
|
1.2f, 1, |
||||||
|
(bLargestFace ? NCVPipeObjDet_FindLargestObject : 0) | NCVPipeObjDet_VisualizeInPlace, |
||||||
|
gpuAllocator, cpuAllocator, devProp.major, devProp.minor, 0); |
||||||
|
ncvAssertReturnNcvStat(ncvStat); |
||||||
|
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); |
||||||
|
|
||||||
|
NCV_SKIP_COND_BEGIN |
||||||
|
|
||||||
|
ncvStat = d_src.copySolid(h_src, 0); |
||||||
|
ncvAssertReturnNcvStat(ncvStat); |
||||||
|
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); |
||||||
|
|
||||||
|
h_src_hdr.copyTo(*srcdst); |
||||||
|
|
||||||
|
NCV_SKIP_COND_END |
||||||
|
|
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
int main( int argc, const char** argv ) |
||||||
|
{ |
||||||
|
NCVStatus ncvStat; |
||||||
|
|
||||||
|
printf("NVIDIA Computer Vision SDK\n"); |
||||||
|
printf("Face Detection in video and live feed\n"); |
||||||
|
printf("=========================================\n"); |
||||||
|
printf(" Esc - Quit\n"); |
||||||
|
printf(" Space - Switch between NCV and OpenCV\n"); |
||||||
|
printf(" L - Switch between FullSearch and LargestFace modes\n"); |
||||||
|
printf(" U - Toggle unfiltered hypotheses visualization in FullSearch\n"); |
||||||
|
|
||||||
|
if (argc != 4 && argc != 1) |
||||||
|
return printSyntax(), -1; |
||||||
|
|
||||||
|
VideoCapture capture; |
||||||
|
Size frameSize; |
||||||
|
|
||||||
|
if (argc == 1 || strcmp(argv[1], "-c") == 0) |
||||||
|
{ |
||||||
|
// Camera input is specified |
||||||
|
int camIdx = (argc == 3) ? atoi(argv[2]) : 0; |
||||||
|
if(!capture.open(camIdx)) |
||||||
|
return printf("Error opening camera\n"), -1; |
||||||
|
|
||||||
|
capture.set(CV_CAP_PROP_FRAME_WIDTH, preferredVideoFrameSize.width); |
||||||
|
capture.set(CV_CAP_PROP_FRAME_HEIGHT, preferredVideoFrameSize.height); |
||||||
|
capture.set(CV_CAP_PROP_FPS, 25); |
||||||
|
frameSize = preferredVideoFrameSize; |
||||||
|
} |
||||||
|
else if (strcmp(argv[1], "-v") == 0) |
||||||
|
{ |
||||||
|
// Video file input (avi) |
||||||
|
if(!capture.open(argv[2])) |
||||||
|
return printf("Error opening video file\n"), -1; |
||||||
|
|
||||||
|
frameSize.width = (int)capture.get(CV_CAP_PROP_FRAME_WIDTH); |
||||||
|
frameSize.height = (int)capture.get(CV_CAP_PROP_FRAME_HEIGHT); |
||||||
|
} |
||||||
|
else |
||||||
|
return printSyntax(), -1; |
||||||
|
|
||||||
|
NcvBool bUseOpenCV = true; |
||||||
|
NcvBool bLargestFace = true; |
||||||
|
NcvBool bShowAllHypotheses = false; |
||||||
|
|
||||||
|
string classifierFile = (argc == 1) ? preferredClassifier : argv[3]; |
||||||
|
|
||||||
|
CascadeClassifier classifierOpenCV; |
||||||
|
if (!classifierOpenCV.load(classifierFile)) |
||||||
|
return printf("Error (in OpenCV) opening classifier\n"), printSyntax(), -1; |
||||||
|
|
||||||
|
int devId; |
||||||
|
ncvAssertCUDAReturn(cudaGetDevice(&devId), -1); |
||||||
|
cudaDeviceProp devProp; |
||||||
|
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), -1); |
||||||
|
printf("Using GPU %d %s, arch=%d.%d\n", devId, devProp.name, devProp.major, devProp.minor); |
||||||
|
|
||||||
|
//============================================================================== |
||||||
|
// |
||||||
|
// Load the classifier from file (assuming its size is about 1 mb) |
||||||
|
// using a simple allocator |
||||||
|
// |
||||||
|
//============================================================================== |
||||||
|
|
||||||
|
NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice); |
||||||
|
ncvAssertPrintReturn(gpuCascadeAllocator.isInitialized(), "Error creating cascade GPU allocator", -1); |
||||||
|
NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned); |
||||||
|
ncvAssertPrintReturn(cpuCascadeAllocator.isInitialized(), "Error creating cascade CPU allocator", -1); |
||||||
|
|
||||||
|
Ncv32u haarNumStages, haarNumNodes, haarNumFeatures; |
||||||
|
ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures); |
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", -1); |
||||||
|
|
||||||
|
NCVVectorAlloc<HaarStage64> h_haarStages(cpuCascadeAllocator, haarNumStages); |
||||||
|
ncvAssertPrintReturn(h_haarStages.isMemAllocated(), "Error in cascade CPU allocator", -1); |
||||||
|
NCVVectorAlloc<HaarClassifierNode128> h_haarNodes(cpuCascadeAllocator, haarNumNodes); |
||||||
|
ncvAssertPrintReturn(h_haarNodes.isMemAllocated(), "Error in cascade CPU allocator", -1); |
||||||
|
NCVVectorAlloc<HaarFeature64> h_haarFeatures(cpuCascadeAllocator, haarNumFeatures); |
||||||
|
ncvAssertPrintReturn(h_haarFeatures.isMemAllocated(), "Error in cascade CPU allocator", -1); |
||||||
|
|
||||||
|
HaarClassifierCascadeDescriptor haar; |
||||||
|
ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, h_haarStages, h_haarNodes, h_haarFeatures); |
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", -1); |
||||||
|
|
||||||
|
NCVVectorAlloc<HaarStage64> d_haarStages(gpuCascadeAllocator, haarNumStages); |
||||||
|
ncvAssertPrintReturn(d_haarStages.isMemAllocated(), "Error in cascade GPU allocator", -1); |
||||||
|
NCVVectorAlloc<HaarClassifierNode128> d_haarNodes(gpuCascadeAllocator, haarNumNodes); |
||||||
|
ncvAssertPrintReturn(d_haarNodes.isMemAllocated(), "Error in cascade GPU allocator", -1); |
||||||
|
NCVVectorAlloc<HaarFeature64> d_haarFeatures(gpuCascadeAllocator, haarNumFeatures); |
||||||
|
ncvAssertPrintReturn(d_haarFeatures.isMemAllocated(), "Error in cascade GPU allocator", -1); |
||||||
|
|
||||||
|
ncvStat = h_haarStages.copySolid(d_haarStages, 0); |
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1); |
||||||
|
ncvStat = h_haarNodes.copySolid(d_haarNodes, 0); |
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1); |
||||||
|
ncvStat = h_haarFeatures.copySolid(d_haarFeatures, 0); |
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1); |
||||||
|
|
||||||
|
//============================================================================== |
||||||
|
// |
||||||
|
// Calculate memory requirements and create real allocators |
||||||
|
// |
||||||
|
//============================================================================== |
||||||
|
|
||||||
|
NCVMemStackAllocator gpuCounter(devProp.textureAlignment); |
||||||
|
ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", -1); |
||||||
|
NCVMemStackAllocator cpuCounter(devProp.textureAlignment); |
||||||
|
ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", -1); |
||||||
|
|
||||||
|
ncvStat = process(NULL, frameSize.width, frameSize.height, |
||||||
|
false, false, haar, |
||||||
|
d_haarStages, d_haarNodes, |
||||||
|
d_haarFeatures, h_haarStages, |
||||||
|
gpuCounter, cpuCounter, devProp); |
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1); |
||||||
|
|
||||||
|
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), devProp.textureAlignment); |
||||||
|
ncvAssertPrintReturn(gpuAllocator.isInitialized(), "Error creating GPU memory allocator", -1); |
||||||
|
NCVMemStackAllocator cpuAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), devProp.textureAlignment); |
||||||
|
ncvAssertPrintReturn(cpuAllocator.isInitialized(), "Error creating CPU memory allocator", -1); |
||||||
|
|
||||||
|
printf("Initialized for frame size [%dx%d]\n", frameSize.width, frameSize.height); |
||||||
|
|
||||||
|
//============================================================================== |
||||||
|
// |
||||||
|
// Main processing loop |
||||||
|
// |
||||||
|
//============================================================================== |
||||||
|
|
||||||
|
namedWindow(wndTitle, 1); |
||||||
|
|
||||||
|
Mat frame, gray, frameDisp; |
||||||
|
|
||||||
|
for(;;) |
||||||
|
{ |
||||||
|
// For camera and video file, capture the next image |
||||||
|
capture >> frame; |
||||||
|
if (frame.empty()) |
||||||
|
break; |
||||||
|
|
||||||
|
cvtColor(frame, gray, CV_BGR2GRAY); |
||||||
|
|
||||||
|
// process |
||||||
|
NcvSize32u minSize = haar.ClassifierSize; |
||||||
|
if (bLargestFace) |
||||||
|
{ |
||||||
|
Ncv32u ratioX = preferredVideoFrameSize.width / minSize.width; |
||||||
|
Ncv32u ratioY = preferredVideoFrameSize.height / minSize.height; |
||||||
|
Ncv32u ratioSmallest = std::min(ratioX, ratioY); |
||||||
|
ratioSmallest = (Ncv32u)std::max(ratioSmallest / 2.5f, 1.f); |
||||||
|
minSize.width *= ratioSmallest; |
||||||
|
minSize.height *= ratioSmallest; |
||||||
|
} |
||||||
|
|
||||||
|
NcvTimer timer = ncvStartTimer(); |
||||||
|
|
||||||
|
if (!bUseOpenCV) |
||||||
|
{ |
||||||
|
ncvStat = process(&gray, frameSize.width, frameSize.height, |
||||||
|
bShowAllHypotheses, bLargestFace, haar, |
||||||
|
d_haarStages, d_haarNodes, |
||||||
|
d_haarFeatures, h_haarStages, |
||||||
|
gpuAllocator, cpuAllocator, devProp); |
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
vector<Rect> rectsOpenCV; |
||||||
|
|
||||||
|
classifierOpenCV.detectMultiScale( |
||||||
|
gray, |
||||||
|
rectsOpenCV, |
||||||
|
1.2f, |
||||||
|
bShowAllHypotheses && !bLargestFace ? 0 : 4, |
||||||
|
(bLargestFace ? CV_HAAR_FIND_BIGGEST_OBJECT : 0) | CV_HAAR_SCALE_IMAGE, |
||||||
|
Size(minSize.width, minSize.height)); |
||||||
|
|
||||||
|
for (size_t rt = 0; rt < rectsOpenCV.size(); ++rt) |
||||||
|
rectangle(gray, rectsOpenCV[rt], Scalar(255)); |
||||||
|
} |
||||||
|
|
||||||
|
Ncv32f avgTime = (Ncv32f)ncvEndQueryTimerMs(timer); |
||||||
|
|
||||||
|
cvtColor(gray, frameDisp, CV_GRAY2BGR); |
||||||
|
|
||||||
|
imagePrintf(frameDisp, 0, CV_RGB(255, 0,0), "Space - Switch NCV%s / OpenCV%s", bUseOpenCV?"":" (ON)", bUseOpenCV?" (ON)":""); |
||||||
|
imagePrintf(frameDisp, 1, CV_RGB(255, 0,0), "L - Switch FullSearch%s / LargestFace%s modes", bLargestFace?"":" (ON)", bLargestFace?" (ON)":""); |
||||||
|
imagePrintf(frameDisp, 2, CV_RGB(255, 0,0), "U - Toggle unfiltered hypotheses visualization in FullSearch %s", bShowAllHypotheses?"(ON)":"(OFF)"); |
||||||
|
imagePrintf(frameDisp, 3, CV_RGB(118,185,0), " Running at %f FPS on %s", 1000.0f / avgTime, bUseOpenCV?"CPU":"GPU"); |
||||||
|
|
||||||
|
cv::imshow(wndTitle, frameDisp); |
||||||
|
|
||||||
|
switch (cvWaitKey(1)) |
||||||
|
{ |
||||||
|
case ' ': |
||||||
|
bUseOpenCV = !bUseOpenCV; |
||||||
|
break; |
||||||
|
case 'L':case 'l': |
||||||
|
bLargestFace = !bLargestFace; |
||||||
|
break; |
||||||
|
case 'U':case 'u': |
||||||
|
bShowAllHypotheses = !bShowAllHypotheses; |
||||||
|
break; |
||||||
|
case 27: |
||||||
|
return 0; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return 0; |
||||||
|
} |
@ -0,0 +1,571 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <precomp.hpp> |
||||||
|
|
||||||
|
|
||||||
|
#if !defined (HAVE_CUDA) |
||||||
|
|
||||||
|
|
||||||
|
#else /* !defined (HAVE_CUDA) */ |
||||||
|
|
||||||
|
|
||||||
|
#include <stdarg.h> |
||||||
|
#include "NCV.hpp" |
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Error handling helpers
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
static void stdioDebugOutput(const char *msg) |
||||||
|
{ |
||||||
|
printf("%s", msg); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
static NCVDebugOutputHandler *debugOutputHandler = stdioDebugOutput; |
||||||
|
|
||||||
|
|
||||||
|
void ncvDebugOutput(const char *msg, ...) |
||||||
|
{ |
||||||
|
const int K_DEBUG_STRING_MAXLEN = 1024; |
||||||
|
char buffer[K_DEBUG_STRING_MAXLEN]; |
||||||
|
va_list args; |
||||||
|
va_start(args, msg); |
||||||
|
vsnprintf_s(buffer, K_DEBUG_STRING_MAXLEN, K_DEBUG_STRING_MAXLEN-1, msg, args); |
||||||
|
va_end (args); |
||||||
|
debugOutputHandler(buffer); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func) |
||||||
|
{ |
||||||
|
debugOutputHandler = func; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Memory wrappers and helpers
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
NCVStatus GPUAlignmentValue(Ncv32u &alignment) |
||||||
|
{ |
||||||
|
int curDev; |
||||||
|
cudaDeviceProp curProp; |
||||||
|
ncvAssertCUDAReturn(cudaGetDevice(&curDev), NCV_CUDA_ERROR); |
||||||
|
ncvAssertCUDAReturn(cudaGetDeviceProperties(&curProp, curDev), NCV_CUDA_ERROR); |
||||||
|
alignment = curProp.textureAlignment; //GPUAlignmentValue(curProp.major);
|
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
Ncv32u alignUp(Ncv32u what, Ncv32u alignment) |
||||||
|
{ |
||||||
|
Ncv32u alignMask = alignment-1; |
||||||
|
Ncv32u inverseAlignMask = ~alignMask; |
||||||
|
Ncv32u res = (what + alignMask) & inverseAlignMask; |
||||||
|
return res; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
void NCVMemPtr::clear() |
||||||
|
{ |
||||||
|
ptr = NULL; |
||||||
|
memtype = NCVMemoryTypeNone; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
void NCVMemSegment::clear() |
||||||
|
{ |
||||||
|
begin.clear(); |
||||||
|
size = 0; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType, const void *src, NCVMemoryType srcType, size_t sz, cudaStream_t cuStream) |
||||||
|
{ |
||||||
|
NCVStatus ncvStat; |
||||||
|
switch (dstType) |
||||||
|
{ |
||||||
|
case NCVMemoryTypeHostPageable: |
||||||
|
case NCVMemoryTypeHostPinned: |
||||||
|
switch (srcType) |
||||||
|
{ |
||||||
|
case NCVMemoryTypeHostPageable: |
||||||
|
case NCVMemoryTypeHostPinned: |
||||||
|
memcpy(dst, src, sz); |
||||||
|
ncvStat = NCV_SUCCESS; |
||||||
|
break; |
||||||
|
case NCVMemoryTypeDevice: |
||||||
|
if (cuStream != 0) |
||||||
|
{ |
||||||
|
ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToHost, cuStream), NCV_CUDA_ERROR); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToHost), NCV_CUDA_ERROR); |
||||||
|
} |
||||||
|
ncvStat = NCV_SUCCESS; |
||||||
|
break; |
||||||
|
default: |
||||||
|
ncvStat = NCV_MEM_RESIDENCE_ERROR; |
||||||
|
} |
||||||
|
break; |
||||||
|
case NCVMemoryTypeDevice: |
||||||
|
switch (srcType) |
||||||
|
{ |
||||||
|
case NCVMemoryTypeHostPageable: |
||||||
|
case NCVMemoryTypeHostPinned: |
||||||
|
if (cuStream != 0) |
||||||
|
{ |
||||||
|
ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyHostToDevice, cuStream), NCV_CUDA_ERROR); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyHostToDevice), NCV_CUDA_ERROR); |
||||||
|
} |
||||||
|
ncvStat = NCV_SUCCESS; |
||||||
|
break; |
||||||
|
case NCVMemoryTypeDevice: |
||||||
|
if (cuStream != 0) |
||||||
|
{ |
||||||
|
ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToDevice, cuStream), NCV_CUDA_ERROR); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToDevice), NCV_CUDA_ERROR); |
||||||
|
} |
||||||
|
ncvStat = NCV_SUCCESS; |
||||||
|
break; |
||||||
|
default: |
||||||
|
ncvStat = NCV_MEM_RESIDENCE_ERROR; |
||||||
|
} |
||||||
|
break; |
||||||
|
default: |
||||||
|
ncvStat = NCV_MEM_RESIDENCE_ERROR; |
||||||
|
} |
||||||
|
|
||||||
|
return ncvStat; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
//===================================================================
|
||||||
|
//
|
||||||
|
// NCVMemStackAllocator class members implementation
|
||||||
|
//
|
||||||
|
//===================================================================
|
||||||
|
|
||||||
|
|
||||||
|
NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment) |
||||||
|
: |
||||||
|
currentSize(0), |
||||||
|
_maxSize(0), |
||||||
|
allocBegin(NULL), |
||||||
|
begin(NULL), |
||||||
|
_memType(NCVMemoryTypeNone), |
||||||
|
_alignment(alignment) |
||||||
|
{ |
||||||
|
NcvBool bProperAlignment = (alignment & (alignment-1)) == 0; |
||||||
|
ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: alignment not power of 2"); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment) |
||||||
|
: |
||||||
|
currentSize(0), |
||||||
|
_maxSize(0), |
||||||
|
allocBegin(NULL), |
||||||
|
_memType(memT), |
||||||
|
_alignment(alignment) |
||||||
|
{ |
||||||
|
NcvBool bProperAlignment = (alignment & (alignment-1)) == 0; |
||||||
|
ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: _alignment not power of 2"); |
||||||
|
|
||||||
|
allocBegin = NULL; |
||||||
|
|
||||||
|
switch (memT) |
||||||
|
{ |
||||||
|
case NCVMemoryTypeDevice: |
||||||
|
ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), ); |
||||||
|
break; |
||||||
|
case NCVMemoryTypeHostPinned: |
||||||
|
ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), ); |
||||||
|
break; |
||||||
|
case NCVMemoryTypeHostPageable: |
||||||
|
allocBegin = (Ncv8u *)malloc(capacity); |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
if (capacity == 0) |
||||||
|
{ |
||||||
|
allocBegin = (Ncv8u *)(0x1); |
||||||
|
} |
||||||
|
|
||||||
|
if (!isCounting()) |
||||||
|
{ |
||||||
|
begin = allocBegin; |
||||||
|
end = begin + capacity; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVMemStackAllocator::~NCVMemStackAllocator() |
||||||
|
{ |
||||||
|
if (allocBegin != NULL) |
||||||
|
{ |
||||||
|
ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction"); |
||||||
|
switch (_memType) |
||||||
|
{ |
||||||
|
case NCVMemoryTypeDevice: |
||||||
|
ncvAssertCUDAReturn(cudaFree(allocBegin), ); |
||||||
|
break; |
||||||
|
case NCVMemoryTypeHostPinned: |
||||||
|
ncvAssertCUDAReturn(cudaFreeHost(allocBegin), ); |
||||||
|
break; |
||||||
|
case NCVMemoryTypeHostPageable: |
||||||
|
free(allocBegin); |
||||||
|
break; |
||||||
|
} |
||||||
|
allocBegin = NULL; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus NCVMemStackAllocator::alloc(NCVMemSegment &seg, size_t size) |
||||||
|
{ |
||||||
|
seg.clear(); |
||||||
|
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC); |
||||||
|
|
||||||
|
size = alignUp(size, this->_alignment); |
||||||
|
this->currentSize += size; |
||||||
|
this->_maxSize = std::max(this->_maxSize, this->currentSize); |
||||||
|
|
||||||
|
if (!isCounting()) |
||||||
|
{ |
||||||
|
size_t availSize = end - begin; |
||||||
|
ncvAssertReturn(size <= availSize, NCV_ALLOCATOR_INSUFFICIENT_CAPACITY); |
||||||
|
} |
||||||
|
|
||||||
|
seg.begin.ptr = begin; |
||||||
|
seg.begin.memtype = this->_memType; |
||||||
|
seg.size = size; |
||||||
|
begin += size; |
||||||
|
|
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus NCVMemStackAllocator::dealloc(NCVMemSegment &seg) |
||||||
|
{ |
||||||
|
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC); |
||||||
|
ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC); |
||||||
|
ncvAssertReturn(seg.begin.ptr != NULL || isCounting(), NCV_ALLOCATOR_BAD_DEALLOC); |
||||||
|
ncvAssertReturn(seg.begin.ptr == begin - seg.size, NCV_ALLOCATOR_DEALLOC_ORDER); |
||||||
|
|
||||||
|
currentSize -= seg.size; |
||||||
|
begin -= seg.size; |
||||||
|
|
||||||
|
seg.clear(); |
||||||
|
|
||||||
|
ncvAssertReturn(allocBegin <= begin, NCV_ALLOCATOR_BAD_DEALLOC); |
||||||
|
|
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NcvBool NCVMemStackAllocator::isInitialized(void) const |
||||||
|
{ |
||||||
|
return ((this->_alignment & (this->_alignment-1)) == 0) && isCounting() || this->allocBegin != NULL; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NcvBool NCVMemStackAllocator::isCounting(void) const |
||||||
|
{ |
||||||
|
return this->_memType == NCVMemoryTypeNone; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVMemoryType NCVMemStackAllocator::memType(void) const |
||||||
|
{ |
||||||
|
return this->_memType; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
Ncv32u NCVMemStackAllocator::alignment(void) const |
||||||
|
{ |
||||||
|
return this->_alignment; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
size_t NCVMemStackAllocator::maxSize(void) const |
||||||
|
{ |
||||||
|
return this->_maxSize; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
//===================================================================
|
||||||
|
//
|
||||||
|
// NCVMemNativeAllocator class members implementation
|
||||||
|
//
|
||||||
|
//===================================================================
|
||||||
|
|
||||||
|
|
||||||
|
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT) |
||||||
|
: |
||||||
|
currentSize(0), |
||||||
|
_maxSize(0), |
||||||
|
_memType(memT) |
||||||
|
{ |
||||||
|
ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", ); |
||||||
|
ncvAssertPrintReturn(NCV_SUCCESS == GPUAlignmentValue(this->_alignment), "NCVMemNativeAllocator ctor:: couldn't get device _alignment", ); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVMemNativeAllocator::~NCVMemNativeAllocator() |
||||||
|
{ |
||||||
|
ncvAssertPrintCheck(currentSize == 0, "NCVMemNativeAllocator dtor:: detected memory leak"); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus NCVMemNativeAllocator::alloc(NCVMemSegment &seg, size_t size) |
||||||
|
{ |
||||||
|
seg.clear(); |
||||||
|
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC); |
||||||
|
|
||||||
|
switch (this->_memType) |
||||||
|
{ |
||||||
|
case NCVMemoryTypeDevice: |
||||||
|
ncvAssertCUDAReturn(cudaMalloc(&seg.begin.ptr, size), NCV_CUDA_ERROR); |
||||||
|
break; |
||||||
|
case NCVMemoryTypeHostPinned: |
||||||
|
ncvAssertCUDAReturn(cudaMallocHost(&seg.begin.ptr, size), NCV_CUDA_ERROR); |
||||||
|
break; |
||||||
|
case NCVMemoryTypeHostPageable: |
||||||
|
seg.begin.ptr = (Ncv8u *)malloc(size); |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
this->currentSize += alignUp(size, this->_alignment); |
||||||
|
this->_maxSize = std::max(this->_maxSize, this->currentSize); |
||||||
|
|
||||||
|
seg.begin.memtype = this->_memType; |
||||||
|
seg.size = size; |
||||||
|
|
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus NCVMemNativeAllocator::dealloc(NCVMemSegment &seg) |
||||||
|
{ |
||||||
|
ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC); |
||||||
|
ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC); |
||||||
|
ncvAssertReturn(seg.begin.ptr != NULL, NCV_ALLOCATOR_BAD_DEALLOC); |
||||||
|
|
||||||
|
ncvAssertReturn(currentSize >= alignUp(seg.size, this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC); |
||||||
|
currentSize -= alignUp(seg.size, this->_alignment); |
||||||
|
|
||||||
|
switch (this->_memType) |
||||||
|
{ |
||||||
|
case NCVMemoryTypeDevice: |
||||||
|
ncvAssertCUDAReturn(cudaFree(seg.begin.ptr), NCV_CUDA_ERROR); |
||||||
|
break; |
||||||
|
case NCVMemoryTypeHostPinned: |
||||||
|
ncvAssertCUDAReturn(cudaFreeHost(seg.begin.ptr), NCV_CUDA_ERROR); |
||||||
|
break; |
||||||
|
case NCVMemoryTypeHostPageable: |
||||||
|
free(seg.begin.ptr); |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
seg.clear(); |
||||||
|
|
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NcvBool NCVMemNativeAllocator::isInitialized(void) const |
||||||
|
{ |
||||||
|
return (this->_alignment != 0); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NcvBool NCVMemNativeAllocator::isCounting(void) const |
||||||
|
{ |
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVMemoryType NCVMemNativeAllocator::memType(void) const |
||||||
|
{ |
||||||
|
return this->_memType; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
Ncv32u NCVMemNativeAllocator::alignment(void) const |
||||||
|
{ |
||||||
|
return this->_alignment; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
size_t NCVMemNativeAllocator::maxSize(void) const |
||||||
|
{ |
||||||
|
return this->_maxSize; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
//===================================================================
|
||||||
|
//
|
||||||
|
// Time and timer routines
|
||||||
|
//
|
||||||
|
//===================================================================
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct _NcvTimeMoment NcvTimeMoment; |
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64) |
||||||
|
|
||||||
|
#include <Windows.h> |
||||||
|
|
||||||
|
typedef struct _NcvTimeMoment |
||||||
|
{ |
||||||
|
LONGLONG moment, freq; |
||||||
|
} NcvTimeMoment; |
||||||
|
|
||||||
|
|
||||||
|
static void _ncvQueryMoment(NcvTimeMoment *t) |
||||||
|
{ |
||||||
|
QueryPerformanceFrequency((LARGE_INTEGER *)&(t->freq)); |
||||||
|
QueryPerformanceCounter((LARGE_INTEGER *)&(t->moment)); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
double _ncvMomentToMicroseconds(NcvTimeMoment *t) |
||||||
|
{ |
||||||
|
return 1000000.0 * t->moment / t->freq; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
double _ncvMomentsDiffToMicroseconds(NcvTimeMoment *t1, NcvTimeMoment *t2) |
||||||
|
{ |
||||||
|
return 1000000.0 * 2 * ((t2->moment) - (t1->moment)) / (t1->freq + t2->freq); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
double _ncvMomentsDiffToMilliseconds(NcvTimeMoment *t1, NcvTimeMoment *t2) |
||||||
|
{ |
||||||
|
return 1000.0 * 2 * ((t2->moment) - (t1->moment)) / (t1->freq + t2->freq); |
||||||
|
} |
||||||
|
|
||||||
|
#elif defined(__unix__) |
||||||
|
|
||||||
|
#include <sys/time.h> |
||||||
|
|
||||||
|
typedef struct _NcvTimeMoment |
||||||
|
{ |
||||||
|
struct timeval tv;
|
||||||
|
struct timezone tz; |
||||||
|
} NcvTimeMoment; |
||||||
|
|
||||||
|
|
||||||
|
void _ncvQueryMoment(NcvTimeMoment *t) |
||||||
|
{ |
||||||
|
gettimeofday(& t->tv, & t->tz); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
double _ncvMomentToMicroseconds(NcvTimeMoment *t) |
||||||
|
{ |
||||||
|
return 1000000.0 * t->tv.tv_sec + (double)t->tv.tv_usec; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
double _ncvMomentsDiffToMicroseconds(NcvTimeMoment *t1, NcvTimeMoment *t2) |
||||||
|
{ |
||||||
|
return (((double)t2->tv.tv_sec - (double)t1->tv.tv_sec) * 1000000 + (double)t2->tv.tv_usec - (double)t1->tv.tv_usec); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
#endif //#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
|
||||||
|
|
||||||
|
struct _NcvTimer |
||||||
|
{ |
||||||
|
NcvTimeMoment t1, t2; |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
NcvTimer ncvStartTimer(void) |
||||||
|
{ |
||||||
|
struct _NcvTimer *t; |
||||||
|
t = (struct _NcvTimer *)malloc(sizeof(struct _NcvTimer)); |
||||||
|
_ncvQueryMoment(&t->t1); |
||||||
|
return t; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
double ncvEndQueryTimerUs(NcvTimer t) |
||||||
|
{ |
||||||
|
double res; |
||||||
|
_ncvQueryMoment(&t->t2); |
||||||
|
res = _ncvMomentsDiffToMicroseconds(&t->t1, &t->t2); |
||||||
|
free(t); |
||||||
|
return res; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
double ncvEndQueryTimerMs(NcvTimer t) |
||||||
|
{ |
||||||
|
double res; |
||||||
|
_ncvQueryMoment(&t->t2); |
||||||
|
res = _ncvMomentsDiffToMilliseconds(&t->t1, &t->t2); |
||||||
|
free(t); |
||||||
|
return res; |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* !defined (HAVE_CUDA) */ |
@ -0,0 +1,837 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef _ncv_hpp_ |
||||||
|
#define _ncv_hpp_ |
||||||
|
|
||||||
|
#include <cuda_runtime.h> |
||||||
|
#include "npp_staging.h" |
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Alignment macros
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
#if !defined(__align__) && !defined(__CUDACC__) |
||||||
|
#if defined(_WIN32) || defined(_WIN64) |
||||||
|
#define __align__(n) __declspec(align(n)) |
||||||
|
#elif defined(__unix__) |
||||||
|
#define __align__(n) __attribute__((__aligned__(n))) |
||||||
|
#endif |
||||||
|
#endif |
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Integral and compound types of guaranteed size
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
typedef bool NcvBool; |
||||||
|
typedef long long Ncv64s; |
||||||
|
typedef unsigned long long Ncv64u; |
||||||
|
typedef int Ncv32s; |
||||||
|
typedef unsigned int Ncv32u; |
||||||
|
typedef short Ncv16s; |
||||||
|
typedef unsigned short Ncv16u; |
||||||
|
typedef char Ncv8s; |
||||||
|
typedef unsigned char Ncv8u; |
||||||
|
typedef float Ncv32f; |
||||||
|
typedef double Ncv64f; |
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{ |
||||||
|
Ncv8u x; |
||||||
|
Ncv8u y; |
||||||
|
Ncv8u width; |
||||||
|
Ncv8u height; |
||||||
|
} NcvRect8u; |
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{ |
||||||
|
Ncv32s x; ///< x-coordinate of upper left corner.
|
||||||
|
Ncv32s y; ///< y-coordinate of upper left corner.
|
||||||
|
Ncv32s width; ///< Rectangle width.
|
||||||
|
Ncv32s height; ///< Rectangle height.
|
||||||
|
} NcvRect32s; |
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{ |
||||||
|
Ncv32u x; ///< x-coordinate of upper left corner.
|
||||||
|
Ncv32u y; ///< y-coordinate of upper left corner.
|
||||||
|
Ncv32u width; ///< Rectangle width.
|
||||||
|
Ncv32u height; ///< Rectangle height.
|
||||||
|
} NcvRect32u; |
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{ |
||||||
|
Ncv32s width; ///< Rectangle width.
|
||||||
|
Ncv32s height; ///< Rectangle height.
|
||||||
|
} NcvSize32s; |
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{ |
||||||
|
Ncv32u width; ///< Rectangle width.
|
||||||
|
Ncv32u height; ///< Rectangle height.
|
||||||
|
} NcvSize32u; |
||||||
|
|
||||||
|
|
||||||
|
NPPST_CT_ASSERT(sizeof(NcvBool) <= 4); |
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv64s) == 8); |
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv64u) == 8); |
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv32s) == 4); |
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv32u) == 4); |
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv16s) == 2); |
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv16u) == 2); |
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv8s) == 1); |
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv8u) == 1); |
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv32f) == 4); |
||||||
|
NPPST_CT_ASSERT(sizeof(Ncv64f) == 8); |
||||||
|
NPPST_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u)); |
||||||
|
NPPST_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s)); |
||||||
|
NPPST_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u)); |
||||||
|
NPPST_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u)); |
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Persistent constants
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
const Ncv32u K_WARP_SIZE = 32; |
||||||
|
const Ncv32u K_LOG2_WARP_SIZE = 5; |
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Error handling
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
#define NCV_CT_PREP_STRINGIZE_AUX(x) #x |
||||||
|
#define NCV_CT_PREP_STRINGIZE(x) NCV_CT_PREP_STRINGIZE_AUX(x) |
||||||
|
|
||||||
|
|
||||||
|
void ncvDebugOutput(const char *msg, ...); |
||||||
|
|
||||||
|
|
||||||
|
typedef void NCVDebugOutputHandler(const char* msg); |
||||||
|
|
||||||
|
|
||||||
|
void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func); |
||||||
|
|
||||||
|
|
||||||
|
#define ncvAssertPrintCheck(pred, msg) \ |
||||||
|
((pred) ? true : (ncvDebugOutput("\n%s\n", \
|
||||||
|
"NCV Assertion Failed: " msg ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__) \
|
||||||
|
), false)) |
||||||
|
|
||||||
|
|
||||||
|
#define ncvAssertPrintReturn(pred, msg, err) \ |
||||||
|
if (ncvAssertPrintCheck(pred, msg)) ; else return err |
||||||
|
|
||||||
|
|
||||||
|
#define ncvAssertReturn(pred, err) \ |
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
if (!(pred)) \
|
||||||
|
{ \
|
||||||
|
ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: retcode=", (int)err, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
|
||||||
|
return err; \
|
||||||
|
} \
|
||||||
|
} while (0) |
||||||
|
|
||||||
|
|
||||||
|
#define ncvAssertReturnNcvStat(ncvOp) \ |
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
NCVStatus _ncvStat = ncvOp; \
|
||||||
|
if (NCV_SUCCESS != _ncvStat) \
|
||||||
|
{ \
|
||||||
|
ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: NcvStat=", (int)_ncvStat, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
|
||||||
|
return _ncvStat; \
|
||||||
|
} \
|
||||||
|
} while (0) |
||||||
|
|
||||||
|
|
||||||
|
#define ncvAssertCUDAReturn(cudacall, errCode) \ |
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
cudaError_t resCall = cudacall; \
|
||||||
|
cudaError_t resGLE = cudaGetLastError(); \
|
||||||
|
if (cudaSuccess != resCall || cudaSuccess != resGLE) \
|
||||||
|
{ \
|
||||||
|
ncvDebugOutput("\n%s%d%s\n", "NCV CUDA Assertion Failed: cudaError_t=", (int)(resCall | resGLE), ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \
|
||||||
|
return errCode; \
|
||||||
|
} \
|
||||||
|
} while (0) |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return-codes for status notification, errors and warnings |
||||||
|
*/ |
||||||
|
enum NCVStatus |
||||||
|
{ |
||||||
|
NCV_SUCCESS, |
||||||
|
|
||||||
|
NCV_CUDA_ERROR, |
||||||
|
NCV_NPP_ERROR, |
||||||
|
NCV_FILE_ERROR, |
||||||
|
|
||||||
|
NCV_NULL_PTR, |
||||||
|
NCV_INCONSISTENT_INPUT, |
||||||
|
NCV_TEXTURE_BIND_ERROR, |
||||||
|
NCV_DIMENSIONS_INVALID, |
||||||
|
|
||||||
|
NCV_INVALID_ROI, |
||||||
|
NCV_INVALID_STEP, |
||||||
|
NCV_INVALID_SCALE, |
||||||
|
|
||||||
|
NCV_ALLOCATOR_NOT_INITIALIZED, |
||||||
|
NCV_ALLOCATOR_BAD_ALLOC, |
||||||
|
NCV_ALLOCATOR_BAD_DEALLOC, |
||||||
|
NCV_ALLOCATOR_INSUFFICIENT_CAPACITY, |
||||||
|
NCV_ALLOCATOR_DEALLOC_ORDER, |
||||||
|
NCV_ALLOCATOR_BAD_REUSE, |
||||||
|
|
||||||
|
NCV_MEM_COPY_ERROR, |
||||||
|
NCV_MEM_RESIDENCE_ERROR, |
||||||
|
NCV_MEM_INSUFFICIENT_CAPACITY, |
||||||
|
|
||||||
|
NCV_HAAR_INVALID_PIXEL_STEP, |
||||||
|
NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER, |
||||||
|
NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE, |
||||||
|
NCV_HAAR_TOO_LARGE_FEATURES, |
||||||
|
NCV_HAAR_XML_LOADING_EXCEPTION, |
||||||
|
|
||||||
|
NCV_NOIMPL_HAAR_TILTED_FEATURES, |
||||||
|
|
||||||
|
NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW, |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
#define NCV_SET_SKIP_COND(x) \ |
||||||
|
bool __ncv_skip_cond = x |
||||||
|
|
||||||
|
|
||||||
|
#define NCV_RESET_SKIP_COND(x) \ |
||||||
|
__ncv_skip_cond = x |
||||||
|
|
||||||
|
|
||||||
|
#define NCV_SKIP_COND_BEGIN \ |
||||||
|
if (!__ncv_skip_cond) { |
||||||
|
|
||||||
|
|
||||||
|
#define NCV_SKIP_COND_END \ |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Timer
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct _NcvTimer *NcvTimer; |
||||||
|
|
||||||
|
NcvTimer ncvStartTimer(void); |
||||||
|
|
||||||
|
double ncvEndQueryTimerUs(NcvTimer t); |
||||||
|
|
||||||
|
double ncvEndQueryTimerMs(NcvTimer t); |
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Memory management classes template compound types
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Alignment of GPU memory chunks in bytes |
||||||
|
*/ |
||||||
|
NCVStatus GPUAlignmentValue(Ncv32u &alignment); |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the aligned top bound value |
||||||
|
*/ |
||||||
|
Ncv32u alignUp(Ncv32u what, Ncv32u alignment); |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMemoryType |
||||||
|
*/ |
||||||
|
enum NCVMemoryType |
||||||
|
{ |
||||||
|
NCVMemoryTypeNone, |
||||||
|
NCVMemoryTypeHostPageable, |
||||||
|
NCVMemoryTypeHostPinned, |
||||||
|
NCVMemoryTypeDevice |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMemPtr |
||||||
|
*/ |
||||||
|
struct NCVMemPtr |
||||||
|
{ |
||||||
|
void *ptr; |
||||||
|
NCVMemoryType memtype; |
||||||
|
void clear(); |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMemSegment |
||||||
|
*/ |
||||||
|
struct NCVMemSegment |
||||||
|
{ |
||||||
|
NCVMemPtr begin; |
||||||
|
size_t size; |
||||||
|
void clear(); |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* INCVMemAllocator (Interface) |
||||||
|
*/ |
||||||
|
class INCVMemAllocator |
||||||
|
{ |
||||||
|
public: |
||||||
|
virtual ~INCVMemAllocator() = 0; |
||||||
|
|
||||||
|
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size) = 0; |
||||||
|
virtual NCVStatus dealloc(NCVMemSegment &seg) = 0; |
||||||
|
|
||||||
|
virtual NcvBool isInitialized(void) const = 0; |
||||||
|
virtual NcvBool isCounting(void) const = 0; |
||||||
|
|
||||||
|
virtual NCVMemoryType memType(void) const = 0; |
||||||
|
virtual Ncv32u alignment(void) const = 0; |
||||||
|
virtual size_t maxSize(void) const = 0; |
||||||
|
}; |
||||||
|
|
||||||
|
inline INCVMemAllocator::~INCVMemAllocator() {} |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMemStackAllocator |
||||||
|
*/ |
||||||
|
class NCVMemStackAllocator : public INCVMemAllocator |
||||||
|
{ |
||||||
|
NCVMemStackAllocator(); |
||||||
|
NCVMemStackAllocator(const NCVMemStackAllocator &); |
||||||
|
|
||||||
|
public: |
||||||
|
|
||||||
|
explicit NCVMemStackAllocator(Ncv32u alignment); |
||||||
|
NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment); |
||||||
|
virtual ~NCVMemStackAllocator(); |
||||||
|
|
||||||
|
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size); |
||||||
|
virtual NCVStatus dealloc(NCVMemSegment &seg); |
||||||
|
|
||||||
|
virtual NcvBool isInitialized(void) const; |
||||||
|
virtual NcvBool isCounting(void) const; |
||||||
|
|
||||||
|
virtual NCVMemoryType memType(void) const; |
||||||
|
virtual Ncv32u alignment(void) const; |
||||||
|
virtual size_t maxSize(void) const; |
||||||
|
|
||||||
|
private: |
||||||
|
|
||||||
|
NCVMemoryType _memType; |
||||||
|
Ncv32u _alignment; |
||||||
|
Ncv8u *allocBegin; |
||||||
|
Ncv8u *begin; |
||||||
|
Ncv8u *end; |
||||||
|
size_t currentSize; |
||||||
|
size_t _maxSize; |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMemNativeAllocator |
||||||
|
*/ |
||||||
|
class NCVMemNativeAllocator : public INCVMemAllocator |
||||||
|
{ |
||||||
|
public: |
||||||
|
|
||||||
|
NCVMemNativeAllocator(NCVMemoryType memT); |
||||||
|
virtual ~NCVMemNativeAllocator(); |
||||||
|
|
||||||
|
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size); |
||||||
|
virtual NCVStatus dealloc(NCVMemSegment &seg); |
||||||
|
|
||||||
|
virtual NcvBool isInitialized(void) const; |
||||||
|
virtual NcvBool isCounting(void) const; |
||||||
|
|
||||||
|
virtual NCVMemoryType memType(void) const; |
||||||
|
virtual Ncv32u alignment(void) const; |
||||||
|
virtual size_t maxSize(void) const; |
||||||
|
|
||||||
|
private: |
||||||
|
|
||||||
|
NCVMemNativeAllocator(); |
||||||
|
NCVMemNativeAllocator(const NCVMemNativeAllocator &); |
||||||
|
|
||||||
|
NCVMemoryType _memType; |
||||||
|
Ncv32u _alignment; |
||||||
|
size_t currentSize; |
||||||
|
size_t _maxSize; |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy dispatcher |
||||||
|
*/ |
||||||
|
NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType, |
||||||
|
const void *src, NCVMemoryType srcType, |
||||||
|
size_t sz, cudaStream_t cuStream); |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVVector (1D) |
||||||
|
*/ |
||||||
|
template <class T> |
||||||
|
class NCVVector |
||||||
|
{ |
||||||
|
NCVVector(const NCVVector &); |
||||||
|
|
||||||
|
public: |
||||||
|
|
||||||
|
NCVVector() |
||||||
|
{ |
||||||
|
clear(); |
||||||
|
} |
||||||
|
|
||||||
|
virtual ~NCVVector() {} |
||||||
|
|
||||||
|
void clear() |
||||||
|
{ |
||||||
|
_ptr = NULL; |
||||||
|
_length = 0; |
||||||
|
_memtype = NCVMemoryTypeNone; |
||||||
|
} |
||||||
|
|
||||||
|
NCVStatus copySolid(NCVVector<T> &dst, cudaStream_t cuStream, size_t howMuch=0) |
||||||
|
{ |
||||||
|
if (howMuch == 0) |
||||||
|
{ |
||||||
|
ncvAssertReturn(dst._length == this->_length, NCV_MEM_COPY_ERROR); |
||||||
|
howMuch = this->_length * sizeof(T); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
ncvAssertReturn(dst._length * sizeof(T) >= howMuch &&
|
||||||
|
this->_length * sizeof(T) >= howMuch && |
||||||
|
howMuch > 0, NCV_MEM_COPY_ERROR); |
||||||
|
} |
||||||
|
ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
|
||||||
|
(dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR); |
||||||
|
|
||||||
|
NCVStatus ncvStat = NCV_SUCCESS; |
||||||
|
if (this->_memtype != NCVMemoryTypeNone) |
||||||
|
{ |
||||||
|
ncvStat = memSegCopyHelper(dst._ptr, dst._memtype, |
||||||
|
this->_ptr, this->_memtype, |
||||||
|
howMuch, cuStream); |
||||||
|
} |
||||||
|
|
||||||
|
return ncvStat; |
||||||
|
} |
||||||
|
|
||||||
|
T *ptr() const {return this->_ptr;} |
||||||
|
size_t length() const {return this->_length;} |
||||||
|
NCVMemoryType memType() const {return this->_memtype;} |
||||||
|
|
||||||
|
protected: |
||||||
|
|
||||||
|
T *_ptr; |
||||||
|
size_t _length; |
||||||
|
NCVMemoryType _memtype; |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVVectorAlloc |
||||||
|
*/ |
||||||
|
template <class T> |
||||||
|
class NCVVectorAlloc : public NCVVector<T> |
||||||
|
{ |
||||||
|
NCVVectorAlloc(); |
||||||
|
NCVVectorAlloc(const NCVVectorAlloc &); |
||||||
|
|
||||||
|
public: |
||||||
|
|
||||||
|
NCVVectorAlloc(INCVMemAllocator &allocator, Ncv32u length) |
||||||
|
: |
||||||
|
allocator(allocator) |
||||||
|
{ |
||||||
|
NCVStatus ncvStat; |
||||||
|
|
||||||
|
this->clear(); |
||||||
|
this->allocatedMem.clear(); |
||||||
|
|
||||||
|
ncvStat = allocator.alloc(this->allocatedMem, length * sizeof(T)); |
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVVectorAlloc ctor:: alloc failed", ); |
||||||
|
|
||||||
|
this->_ptr = (T *)this->allocatedMem.begin.ptr; |
||||||
|
this->_length = length; |
||||||
|
this->_memtype = this->allocatedMem.begin.memtype; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
~NCVVectorAlloc() |
||||||
|
{ |
||||||
|
NCVStatus ncvStat; |
||||||
|
|
||||||
|
ncvStat = allocator.dealloc(this->allocatedMem); |
||||||
|
ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVVectorAlloc dtor:: dealloc failed"); |
||||||
|
|
||||||
|
this->clear(); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NcvBool isMemAllocated() const |
||||||
|
{ |
||||||
|
return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting()); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
Ncv32u getAllocatorsAlignment() const |
||||||
|
{ |
||||||
|
return allocator.alignment(); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVMemSegment getSegment() const |
||||||
|
{ |
||||||
|
return allocatedMem; |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
|
||||||
|
INCVMemAllocator &allocator; |
||||||
|
NCVMemSegment allocatedMem; |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVVectorReuse |
||||||
|
*/ |
||||||
|
template <class T> |
||||||
|
class NCVVectorReuse : public NCVVector<T> |
||||||
|
{ |
||||||
|
NCVVectorReuse(); |
||||||
|
NCVVectorReuse(const NCVVectorReuse &); |
||||||
|
|
||||||
|
public: |
||||||
|
|
||||||
|
explicit NCVVectorReuse(const NCVMemSegment &memSegment) |
||||||
|
{ |
||||||
|
this->bReused = false; |
||||||
|
this->clear(); |
||||||
|
|
||||||
|
this->_length = memSegment.size / sizeof(T); |
||||||
|
this->_ptr = (T *)memSegment.begin.ptr; |
||||||
|
this->_memtype = memSegment.begin.memtype; |
||||||
|
|
||||||
|
this->bReused = true; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length) |
||||||
|
{ |
||||||
|
this->bReused = false; |
||||||
|
this->clear(); |
||||||
|
|
||||||
|
ncvAssertPrintReturn(length * sizeof(T) <= memSegment.size, \
|
||||||
|
"NCVVectorReuse ctor:: memory binding failed due to size mismatch", ); |
||||||
|
|
||||||
|
this->_length = length; |
||||||
|
this->_ptr = (T *)memSegment.begin.ptr; |
||||||
|
this->_memtype = memSegment.begin.memtype; |
||||||
|
|
||||||
|
this->bReused = true; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NcvBool isMemReused() const |
||||||
|
{ |
||||||
|
return this->bReused; |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
|
||||||
|
NcvBool bReused; |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMatrix (2D) |
||||||
|
*/ |
||||||
|
template <class T> |
||||||
|
class NCVMatrix |
||||||
|
{ |
||||||
|
NCVMatrix(const NCVMatrix &); |
||||||
|
|
||||||
|
public: |
||||||
|
|
||||||
|
NCVMatrix() |
||||||
|
{ |
||||||
|
clear(); |
||||||
|
} |
||||||
|
|
||||||
|
virtual ~NCVMatrix() {} |
||||||
|
|
||||||
|
|
||||||
|
void clear() |
||||||
|
{ |
||||||
|
_ptr = NULL; |
||||||
|
_pitch = 0; |
||||||
|
_width = 0; |
||||||
|
_height = 0; |
||||||
|
_memtype = NCVMemoryTypeNone; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
Ncv32u stride() const |
||||||
|
{ |
||||||
|
return _pitch / sizeof(T); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus copySolid(NCVMatrix<T> &dst, cudaStream_t cuStream, size_t howMuch=0) |
||||||
|
{ |
||||||
|
if (howMuch == 0) |
||||||
|
{ |
||||||
|
ncvAssertReturn(dst._pitch == this->_pitch && |
||||||
|
dst._height == this->_height, NCV_MEM_COPY_ERROR); |
||||||
|
howMuch = this->_pitch * this->_height; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
ncvAssertReturn(dst._pitch * dst._height >= howMuch &&
|
||||||
|
this->_pitch * this->_height >= howMuch && |
||||||
|
howMuch > 0, NCV_MEM_COPY_ERROR); |
||||||
|
} |
||||||
|
ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
|
||||||
|
(dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR); |
||||||
|
|
||||||
|
NCVStatus ncvStat = NCV_SUCCESS; |
||||||
|
if (this->_memtype != NCVMemoryTypeNone) |
||||||
|
{ |
||||||
|
ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
|
||||||
|
this->_ptr, this->_memtype,
|
||||||
|
howMuch, cuStream); |
||||||
|
} |
||||||
|
|
||||||
|
return ncvStat; |
||||||
|
} |
||||||
|
|
||||||
|
T *ptr() const {return this->_ptr;} |
||||||
|
Ncv32u width() const {return this->_width;} |
||||||
|
Ncv32u height() const {return this->_height;} |
||||||
|
Ncv32u pitch() const {return this->_pitch;} |
||||||
|
NCVMemoryType memType() const {return this->_memtype;} |
||||||
|
|
||||||
|
protected: |
||||||
|
|
||||||
|
T *_ptr; |
||||||
|
Ncv32u _width; |
||||||
|
Ncv32u _height; |
||||||
|
Ncv32u _pitch; |
||||||
|
NCVMemoryType _memtype; |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMatrixAlloc |
||||||
|
*/ |
||||||
|
template <class T> |
||||||
|
class NCVMatrixAlloc : public NCVMatrix<T> |
||||||
|
{ |
||||||
|
NCVMatrixAlloc(); |
||||||
|
NCVMatrixAlloc(const NCVMatrixAlloc &); |
||||||
|
|
||||||
|
public: |
||||||
|
|
||||||
|
NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0) |
||||||
|
: |
||||||
|
allocator(allocator) |
||||||
|
{ |
||||||
|
NCVStatus ncvStat; |
||||||
|
|
||||||
|
this->clear(); |
||||||
|
this->allocatedMem.clear(); |
||||||
|
|
||||||
|
Ncv32u widthBytes = width * sizeof(T); |
||||||
|
Ncv32u pitchBytes = alignUp(widthBytes, allocator.alignment()); |
||||||
|
|
||||||
|
if (pitch != 0) |
||||||
|
{ |
||||||
|
ncvAssertPrintReturn(pitch >= pitchBytes && |
||||||
|
(pitch & (allocator.alignment() - 1)) == 0, |
||||||
|
"NCVMatrixAlloc ctor:: incorrect pitch passed", ); |
||||||
|
pitchBytes = pitch; |
||||||
|
} |
||||||
|
|
||||||
|
Ncv32u requiredAllocSize = pitchBytes * height; |
||||||
|
|
||||||
|
ncvStat = allocator.alloc(this->allocatedMem, requiredAllocSize); |
||||||
|
ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc ctor:: alloc failed", ); |
||||||
|
|
||||||
|
this->_ptr = (T *)this->allocatedMem.begin.ptr; |
||||||
|
this->_width = width; |
||||||
|
this->_height = height; |
||||||
|
this->_pitch = pitchBytes; |
||||||
|
this->_memtype = this->allocatedMem.begin.memtype; |
||||||
|
} |
||||||
|
|
||||||
|
~NCVMatrixAlloc() |
||||||
|
{ |
||||||
|
NCVStatus ncvStat; |
||||||
|
|
||||||
|
ncvStat = allocator.dealloc(this->allocatedMem); |
||||||
|
ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc dtor:: dealloc failed"); |
||||||
|
|
||||||
|
this->clear(); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NcvBool isMemAllocated() const |
||||||
|
{ |
||||||
|
return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting()); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
Ncv32u getAllocatorsAlignment() const |
||||||
|
{ |
||||||
|
return allocator.alignment(); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NCVMemSegment getSegment() const |
||||||
|
{ |
||||||
|
return allocatedMem; |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
|
||||||
|
INCVMemAllocator &allocator; |
||||||
|
NCVMemSegment allocatedMem; |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NCVMatrixReuse |
||||||
|
*/ |
||||||
|
template <class T> |
||||||
|
class NCVMatrixReuse : public NCVMatrix<T> |
||||||
|
{ |
||||||
|
NCVMatrixReuse(); |
||||||
|
NCVMatrixReuse(const NCVMatrixReuse &); |
||||||
|
|
||||||
|
public: |
||||||
|
|
||||||
|
NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width, Ncv32u height, Ncv32u pitch=0, NcvBool bSkipPitchCheck=false) |
||||||
|
{ |
||||||
|
this->bReused = false; |
||||||
|
this->clear(); |
||||||
|
|
||||||
|
Ncv32u widthBytes = width * sizeof(T); |
||||||
|
Ncv32u pitchBytes = alignUp(widthBytes, alignment); |
||||||
|
|
||||||
|
if (pitch != 0) |
||||||
|
{ |
||||||
|
if (!bSkipPitchCheck) |
||||||
|
{ |
||||||
|
ncvAssertPrintReturn(pitch >= pitchBytes && |
||||||
|
(pitch & (alignment - 1)) == 0, |
||||||
|
"NCVMatrixReuse ctor:: incorrect pitch passed", ); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
ncvAssertPrintReturn(pitch >= widthBytes, "NCVMatrixReuse ctor:: incorrect pitch passed", ); |
||||||
|
} |
||||||
|
pitchBytes = pitch; |
||||||
|
} |
||||||
|
|
||||||
|
ncvAssertPrintReturn(pitchBytes * height <= memSegment.size, \
|
||||||
|
"NCVMatrixReuse ctor:: memory binding failed due to size mismatch", ); |
||||||
|
|
||||||
|
this->_width = width; |
||||||
|
this->_height = height; |
||||||
|
this->_pitch = pitchBytes; |
||||||
|
this->_ptr = (T *)memSegment.begin.ptr; |
||||||
|
this->_memtype = memSegment.begin.memtype; |
||||||
|
|
||||||
|
this->bReused = true; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
NcvBool isMemReused() const |
||||||
|
{ |
||||||
|
return this->bReused; |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
|
||||||
|
NcvBool bReused; |
||||||
|
}; |
||||||
|
|
||||||
|
#endif // _ncv_hpp_
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,501 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// NVIDIA CUDA implementation of Viola-Jones Object Detection Framework
|
||||||
|
//
|
||||||
|
// The algorithm and code are explained in the upcoming GPU Computing Gems
|
||||||
|
// chapter in detail:
|
||||||
|
//
|
||||||
|
// Anton Obukhov, "Haar Classifiers for Object Detection with CUDA"
|
||||||
|
// PDF URL placeholder
|
||||||
|
// email: aobukhov@nvidia.com, devsupport@nvidia.com
|
||||||
|
//
|
||||||
|
// Credits for help with the code to:
|
||||||
|
// Alexey Mendelenko, Cyril Crassin, and Mikhail Smirnov.
|
||||||
|
//
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifndef _ncvhaarobjectdetection_hpp_ |
||||||
|
#define _ncvhaarobjectdetection_hpp_ |
||||||
|
|
||||||
|
#include <string> |
||||||
|
#include "NCV.hpp" |
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Guaranteed size cross-platform classifier structures
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
struct HaarFeature64 |
||||||
|
{ |
||||||
|
uint2 _ui2; |
||||||
|
|
||||||
|
#define HaarFeature64_CreateCheck_MaxRectField 0xFF |
||||||
|
|
||||||
|
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u clsWidth, Ncv32u clsHeight) |
||||||
|
{ |
||||||
|
ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES); |
||||||
|
((NcvRect8u*)&(this->_ui2.x))->x = rectX; |
||||||
|
((NcvRect8u*)&(this->_ui2.x))->y = rectY; |
||||||
|
((NcvRect8u*)&(this->_ui2.x))->width = rectWidth; |
||||||
|
((NcvRect8u*)&(this->_ui2.x))->height = rectHeight; |
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ NCVStatus setWeight(Ncv32f weight) |
||||||
|
{ |
||||||
|
((Ncv32f*)&(this->_ui2.y))[0] = weight; |
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
__device__ __host__ void getRect(Ncv32u *rectX, Ncv32u *rectY, Ncv32u *rectWidth, Ncv32u *rectHeight) |
||||||
|
{ |
||||||
|
NcvRect8u tmpRect = *(NcvRect8u*)(&this->_ui2.x); |
||||||
|
*rectX = tmpRect.x; |
||||||
|
*rectY = tmpRect.y; |
||||||
|
*rectWidth = tmpRect.width; |
||||||
|
*rectHeight = tmpRect.height; |
||||||
|
} |
||||||
|
|
||||||
|
__device__ __host__ Ncv32f getWeight(void) |
||||||
|
{ |
||||||
|
return *(Ncv32f*)(&this->_ui2.y); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
struct HaarFeatureDescriptor32 |
||||||
|
{ |
||||||
|
private: |
||||||
|
|
||||||
|
#define HaarFeatureDescriptor32_Interpret_MaskFlagTilted 0x80000000 |
||||||
|
#define HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures 0x7F |
||||||
|
#define HaarFeatureDescriptor32_NumFeatures_Shift 24 |
||||||
|
#define HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset 0x00FFFFFF |
||||||
|
|
||||||
|
Ncv32u desc; |
||||||
|
|
||||||
|
public: |
||||||
|
|
||||||
|
__host__ NCVStatus create(NcvBool bTilted, Ncv32u numFeatures, Ncv32u offsetFeatures) |
||||||
|
{ |
||||||
|
if (numFeatures > HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures) |
||||||
|
{ |
||||||
|
return NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER; |
||||||
|
} |
||||||
|
if (offsetFeatures > HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset) |
||||||
|
{ |
||||||
|
return NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE; |
||||||
|
} |
||||||
|
this->desc = 0; |
||||||
|
this->desc |= (bTilted ? HaarFeatureDescriptor32_Interpret_MaskFlagTilted : 0); |
||||||
|
this->desc |= (numFeatures << HaarFeatureDescriptor32_NumFeatures_Shift); |
||||||
|
this->desc |= offsetFeatures; |
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
__device__ __host__ NcvBool isTilted(void) |
||||||
|
{ |
||||||
|
return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagTilted) != 0; |
||||||
|
} |
||||||
|
|
||||||
|
__device__ __host__ Ncv32u getNumFeatures(void) |
||||||
|
{ |
||||||
|
return (this->desc & ~HaarFeatureDescriptor32_Interpret_MaskFlagTilted) >> HaarFeatureDescriptor32_NumFeatures_Shift; |
||||||
|
} |
||||||
|
|
||||||
|
__device__ __host__ Ncv32u getFeaturesOffset(void) |
||||||
|
{ |
||||||
|
return this->desc & HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
struct HaarClassifierNodeDescriptor32 |
||||||
|
{ |
||||||
|
uint1 _ui1; |
||||||
|
|
||||||
|
#define HaarClassifierNodeDescriptor32_Interpret_MaskSwitch (1 << 30) |
||||||
|
|
||||||
|
__host__ NCVStatus create(Ncv32f leafValue) |
||||||
|
{ |
||||||
|
if ((*(Ncv32u *)&leafValue) & HaarClassifierNodeDescriptor32_Interpret_MaskSwitch) |
||||||
|
{ |
||||||
|
return NCV_HAAR_XML_LOADING_EXCEPTION; |
||||||
|
} |
||||||
|
*(Ncv32f *)&this->_ui1 = leafValue; |
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ NCVStatus create(Ncv32u offsetHaarClassifierNode) |
||||||
|
{ |
||||||
|
if (offsetHaarClassifierNode >= HaarClassifierNodeDescriptor32_Interpret_MaskSwitch) |
||||||
|
{ |
||||||
|
return NCV_HAAR_XML_LOADING_EXCEPTION; |
||||||
|
} |
||||||
|
this->_ui1.x = offsetHaarClassifierNode; |
||||||
|
this->_ui1.x |= HaarClassifierNodeDescriptor32_Interpret_MaskSwitch; |
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
__device__ __host__ NcvBool isLeaf(void) |
||||||
|
{ |
||||||
|
return !(this->_ui1.x & HaarClassifierNodeDescriptor32_Interpret_MaskSwitch); |
||||||
|
} |
||||||
|
|
||||||
|
__host__ Ncv32f getLeafValueHost(void) |
||||||
|
{ |
||||||
|
return *(Ncv32f *)&this->_ui1.x; |
||||||
|
} |
||||||
|
|
||||||
|
#ifdef __CUDACC__ |
||||||
|
__device__ Ncv32f getLeafValue(void) |
||||||
|
{ |
||||||
|
return __int_as_float(this->_ui1.x); |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
__device__ __host__ Ncv32u getNextNodeOffset(void) |
||||||
|
{ |
||||||
|
return (this->_ui1.x & ~HaarClassifierNodeDescriptor32_Interpret_MaskSwitch); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
struct HaarClassifierNode128 |
||||||
|
{ |
||||||
|
uint4 _ui4; |
||||||
|
|
||||||
|
__host__ NCVStatus setFeatureDesc(HaarFeatureDescriptor32 f) |
||||||
|
{ |
||||||
|
this->_ui4.x = *(Ncv32u *)&f; |
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ NCVStatus setThreshold(Ncv32f t) |
||||||
|
{ |
||||||
|
this->_ui4.y = *(Ncv32u *)&t; |
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ NCVStatus setLeftNodeDesc(HaarClassifierNodeDescriptor32 nl) |
||||||
|
{ |
||||||
|
this->_ui4.z = *(Ncv32u *)&nl; |
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ NCVStatus setRightNodeDesc(HaarClassifierNodeDescriptor32 nr) |
||||||
|
{ |
||||||
|
this->_ui4.w = *(Ncv32u *)&nr; |
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ __device__ HaarFeatureDescriptor32 getFeatureDesc(void) |
||||||
|
{ |
||||||
|
return *(HaarFeatureDescriptor32 *)&this->_ui4.x; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ __device__ Ncv32f getThreshold(void) |
||||||
|
{ |
||||||
|
return *(Ncv32f*)&this->_ui4.y; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ __device__ HaarClassifierNodeDescriptor32 getLeftNodeDesc(void) |
||||||
|
{ |
||||||
|
return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.z; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ __device__ HaarClassifierNodeDescriptor32 getRightNodeDesc(void) |
||||||
|
{ |
||||||
|
return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.w; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
struct HaarStage64 |
||||||
|
{ |
||||||
|
#define HaarStage64_Interpret_MaskRootNodes 0x0000FFFF |
||||||
|
#define HaarStage64_Interpret_MaskRootNodeOffset 0xFFFF0000 |
||||||
|
#define HaarStage64_Interpret_ShiftRootNodeOffset 16 |
||||||
|
|
||||||
|
uint2 _ui2; |
||||||
|
|
||||||
|
__host__ NCVStatus setStageThreshold(Ncv32f t) |
||||||
|
{ |
||||||
|
this->_ui2.x = *(Ncv32u *)&t; |
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ NCVStatus setStartClassifierRootNodeOffset(Ncv32u val) |
||||||
|
{ |
||||||
|
if (val > (HaarStage64_Interpret_MaskRootNodeOffset >> HaarStage64_Interpret_ShiftRootNodeOffset)) |
||||||
|
{ |
||||||
|
return NCV_HAAR_XML_LOADING_EXCEPTION; |
||||||
|
} |
||||||
|
this->_ui2.y = (val << HaarStage64_Interpret_ShiftRootNodeOffset) | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes); |
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ NCVStatus setNumClassifierRootNodes(Ncv32u val) |
||||||
|
{ |
||||||
|
if (val > HaarStage64_Interpret_MaskRootNodes) |
||||||
|
{ |
||||||
|
return NCV_HAAR_XML_LOADING_EXCEPTION; |
||||||
|
} |
||||||
|
this->_ui2.y = val | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodeOffset); |
||||||
|
return NCV_SUCCESS; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ __device__ Ncv32f getStageThreshold(void) |
||||||
|
{ |
||||||
|
return *(Ncv32f*)&this->_ui2.x; |
||||||
|
} |
||||||
|
|
||||||
|
__host__ __device__ Ncv32u getStartClassifierRootNodeOffset(void) |
||||||
|
{ |
||||||
|
return (this->_ui2.y >> HaarStage64_Interpret_ShiftRootNodeOffset); |
||||||
|
} |
||||||
|
|
||||||
|
__host__ __device__ Ncv32u getNumClassifierRootNodes(void) |
||||||
|
{ |
||||||
|
return (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
NPPST_CT_ASSERT(sizeof(HaarFeature64) == 8); |
||||||
|
NPPST_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4); |
||||||
|
NPPST_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4); |
||||||
|
NPPST_CT_ASSERT(sizeof(HaarClassifierNode128) == 16); |
||||||
|
NPPST_CT_ASSERT(sizeof(HaarStage64) == 8); |
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Classifier cascade descriptor
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
struct HaarClassifierCascadeDescriptor |
||||||
|
{ |
||||||
|
Ncv32u NumStages; |
||||||
|
Ncv32u NumClassifierRootNodes; |
||||||
|
Ncv32u NumClassifierTotalNodes; |
||||||
|
Ncv32u NumFeatures; |
||||||
|
NcvSize32u ClassifierSize; |
||||||
|
NcvBool bNeedsTiltedII; |
||||||
|
NcvBool bHasStumpsOnly; |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
//
|
||||||
|
// Functional interface
|
||||||
|
//
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
enum
|
||||||
|
{ |
||||||
|
NCVPipeObjDet_Default = 0x000, |
||||||
|
NCVPipeObjDet_UseFairImageScaling = 0x001, |
||||||
|
NCVPipeObjDet_FindLargestObject = 0x002, |
||||||
|
NCVPipeObjDet_VisualizeInPlace = 0x004, |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg, |
||||||
|
NcvSize32u srcRoi, |
||||||
|
NCVVector<NcvRect32u> &d_dstRects, |
||||||
|
Ncv32u &dstNumRects, |
||||||
|
|
||||||
|
HaarClassifierCascadeDescriptor &haar, |
||||||
|
NCVVector<HaarStage64> &h_HaarStages, |
||||||
|
NCVVector<HaarStage64> &d_HaarStages, |
||||||
|
NCVVector<HaarClassifierNode128> &d_HaarNodes, |
||||||
|
NCVVector<HaarFeature64> &d_HaarFeatures, |
||||||
|
|
||||||
|
NcvSize32u minObjSize, |
||||||
|
Ncv32u minNeighbors, //default 4
|
||||||
|
Ncv32f scaleStep, //default 1.2f
|
||||||
|
Ncv32u pixelStep, //default 1
|
||||||
|
Ncv32u flags, //default NCVPipeObjDet_Default
|
||||||
|
|
||||||
|
INCVMemAllocator &gpuAllocator, |
||||||
|
INCVMemAllocator &cpuAllocator, |
||||||
|
Ncv32u devPropMajor, |
||||||
|
Ncv32u devPropMinor, |
||||||
|
cudaStream_t cuStream); |
||||||
|
|
||||||
|
|
||||||
|
#define OBJDET_MASK_ELEMENT_INVALID_32U 0xFFFFFFFF |
||||||
|
#define HAAR_STDDEV_BORDER 1 |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage, |
||||||
|
NCVMatrix<Ncv32f> &d_weights, |
||||||
|
NCVMatrixAlloc<Ncv32u> &d_pixelMask, |
||||||
|
Ncv32u &numDetections, |
||||||
|
HaarClassifierCascadeDescriptor &haar, |
||||||
|
NCVVector<HaarStage64> &h_HaarStages, |
||||||
|
NCVVector<HaarStage64> &d_HaarStages, |
||||||
|
NCVVector<HaarClassifierNode128> &d_HaarNodes, |
||||||
|
NCVVector<HaarFeature64> &d_HaarFeatures, |
||||||
|
NcvBool bMaskElements, |
||||||
|
NcvSize32u anchorsRoi, |
||||||
|
Ncv32u pixelStep, |
||||||
|
Ncv32f scaleArea, |
||||||
|
INCVMemAllocator &gpuAllocator, |
||||||
|
INCVMemAllocator &cpuAllocator, |
||||||
|
Ncv32u devPropMajor, |
||||||
|
Ncv32u devPropMinor, |
||||||
|
cudaStream_t cuStream); |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage, |
||||||
|
NCVMatrix<Ncv32f> &h_weights, |
||||||
|
NCVMatrixAlloc<Ncv32u> &h_pixelMask, |
||||||
|
Ncv32u &numDetections, |
||||||
|
HaarClassifierCascadeDescriptor &haar, |
||||||
|
NCVVector<HaarStage64> &h_HaarStages, |
||||||
|
NCVVector<HaarClassifierNode128> &h_HaarNodes, |
||||||
|
NCVVector<HaarFeature64> &h_HaarFeatures, |
||||||
|
NcvBool bMaskElements, |
||||||
|
NcvSize32u anchorsRoi, |
||||||
|
Ncv32u pixelStep, |
||||||
|
Ncv32f scaleArea); |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, |
||||||
|
Ncv32u dstStride, |
||||||
|
Ncv32u dstWidth, |
||||||
|
Ncv32u dstHeight, |
||||||
|
NcvRect32u *d_rects, |
||||||
|
Ncv32u numRects, |
||||||
|
Ncv8u color, |
||||||
|
cudaStream_t cuStream); |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, |
||||||
|
Ncv32u dstStride, |
||||||
|
Ncv32u dstWidth, |
||||||
|
Ncv32u dstHeight, |
||||||
|
NcvRect32u *d_rects, |
||||||
|
Ncv32u numRects, |
||||||
|
Ncv32u color, |
||||||
|
cudaStream_t cuStream); |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, |
||||||
|
Ncv32u dstStride, |
||||||
|
Ncv32u dstWidth, |
||||||
|
Ncv32u dstHeight, |
||||||
|
NcvRect32u *h_rects, |
||||||
|
Ncv32u numRects, |
||||||
|
Ncv8u color); |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, |
||||||
|
Ncv32u dstStride, |
||||||
|
Ncv32u dstWidth, |
||||||
|
Ncv32u dstHeight, |
||||||
|
NcvRect32u *h_rects, |
||||||
|
Ncv32u numRects, |
||||||
|
Ncv32u color); |
||||||
|
|
||||||
|
|
||||||
|
#define RECT_SIMILARITY_PROPORTION 0.2f |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask, |
||||||
|
Ncv32u numPixelMaskDetections, |
||||||
|
NCVVector<NcvRect32u> &hypotheses, |
||||||
|
Ncv32u &totalDetections, |
||||||
|
Ncv32u totalMaxDetections, |
||||||
|
Ncv32u rectWidth, |
||||||
|
Ncv32u rectHeight, |
||||||
|
Ncv32f curScale, |
||||||
|
cudaStream_t cuStream); |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask, |
||||||
|
Ncv32u numPixelMaskDetections, |
||||||
|
NCVVector<NcvRect32u> &hypotheses, |
||||||
|
Ncv32u &totalDetections, |
||||||
|
Ncv32u totalMaxDetections, |
||||||
|
Ncv32u rectWidth, |
||||||
|
Ncv32u rectHeight, |
||||||
|
Ncv32f curScale); |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses, |
||||||
|
Ncv32u &numHypotheses, |
||||||
|
Ncv32u minNeighbors, |
||||||
|
Ncv32f intersectEps, |
||||||
|
NCVVector<Ncv32u> *hypothesesWeights); |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvHaarGetClassifierSize(const std::string &filename, Ncv32u &numStages, |
||||||
|
Ncv32u &numNodes, Ncv32u &numFeatures); |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvHaarLoadFromFile_host(const std::string &filename, |
||||||
|
HaarClassifierCascadeDescriptor &haar, |
||||||
|
NCVVector<HaarStage64> &h_HaarStages, |
||||||
|
NCVVector<HaarClassifierNode128> &h_HaarNodes, |
||||||
|
NCVVector<HaarFeature64> &h_HaarFeatures); |
||||||
|
|
||||||
|
|
||||||
|
NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename, |
||||||
|
HaarClassifierCascadeDescriptor haar, |
||||||
|
NCVVector<HaarStage64> &h_HaarStages, |
||||||
|
NCVVector<HaarClassifierNode128> &h_HaarNodes, |
||||||
|
NCVVector<HaarFeature64> &h_HaarFeatures); |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif // _ncvhaarobjectdetection_hpp_
|
@ -0,0 +1,174 @@ |
|||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// The Loki Library
|
||||||
|
// Copyright (c) 2001 by Andrei Alexandrescu
|
||||||
|
// This code accompanies the book:
|
||||||
|
// Alexandrescu, Andrei. "Modern C++ Design: Generic Programming and Design
|
||||||
|
// Patterns Applied". Copyright (c) 2001. Addison-Wesley.
|
||||||
|
// Permission to use, copy, modify, distribute and sell this software for any
|
||||||
|
// purpose is hereby granted without fee, provided that the above copyright
|
||||||
|
// notice appear in all copies and that both that copyright notice and this
|
||||||
|
// permission notice appear in supporting documentation.
|
||||||
|
// The author or Addison-Welsey Longman make no representations about the
|
||||||
|
// suitability of this software for any purpose. It is provided "as is"
|
||||||
|
// without express or implied warranty.
|
||||||
|
// http://loki-lib.sourceforge.net/index.php?n=Main.License
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifndef _ncvruntimetemplates_hpp_ |
||||||
|
#define _ncvruntimetemplates_hpp_ |
||||||
|
|
||||||
|
#include <stdarg.h> |
||||||
|
#include <vector> |
||||||
|
|
||||||
|
|
||||||
|
namespace Loki |
||||||
|
{ |
||||||
|
//==============================================================================
|
||||||
|
// class NullType
|
||||||
|
// Used as a placeholder for "no type here"
|
||||||
|
// Useful as an end marker in typelists
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
class NullType {}; |
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
// class template Typelist
|
||||||
|
// The building block of typelists of any length
|
||||||
|
// Use it through the LOKI_TYPELIST_NN macros
|
||||||
|
// Defines nested types:
|
||||||
|
// Head (first element, a non-typelist type by convention)
|
||||||
|
// Tail (second element, can be another typelist)
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
template <class T, class U> |
||||||
|
struct Typelist |
||||||
|
{ |
||||||
|
typedef T Head; |
||||||
|
typedef U Tail; |
||||||
|
}; |
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
// class template Int2Type
|
||||||
|
// Converts each integral constant into a unique type
|
||||||
|
// Invocation: Int2Type<v> where v is a compile-time constant integral
|
||||||
|
// Defines 'value', an enum that evaluates to v
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
template <int v> |
||||||
|
struct Int2Type |
||||||
|
{ |
||||||
|
enum { value = v }; |
||||||
|
}; |
||||||
|
|
||||||
|
namespace TL |
||||||
|
{ |
||||||
|
//==============================================================================
|
||||||
|
// class template TypeAt
|
||||||
|
// Finds the type at a given index in a typelist
|
||||||
|
// Invocation (TList is a typelist and index is a compile-time integral
|
||||||
|
// constant):
|
||||||
|
// TypeAt<TList, index>::Result
|
||||||
|
// returns the type in position 'index' in TList
|
||||||
|
// If you pass an out-of-bounds index, the result is a compile-time error
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
template <class TList, unsigned int index> struct TypeAt; |
||||||
|
|
||||||
|
template <class Head, class Tail> |
||||||
|
struct TypeAt<Typelist<Head, Tail>, 0> |
||||||
|
{ |
||||||
|
typedef Head Result; |
||||||
|
}; |
||||||
|
|
||||||
|
template <class Head, class Tail, unsigned int i> |
||||||
|
struct TypeAt<Typelist<Head, Tail>, i> |
||||||
|
{ |
||||||
|
typedef typename TypeAt<Tail, i - 1>::Result Result; |
||||||
|
}; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Runtime boolean template instance dispatcher
|
||||||
|
// Cyril Crassin <cyril.crassin@icare3d.org>
|
||||||
|
// NVIDIA, 2010
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
namespace NCVRuntimeTemplateBool |
||||||
|
{ |
||||||
|
//This struct is used to transform a list of parameters into template arguments
|
||||||
|
//The idea is to build a typelist containing the arguments
|
||||||
|
//and to pass this typelist to a user defined functor
|
||||||
|
template<typename TList, int NumArguments, class Func> |
||||||
|
struct KernelCaller |
||||||
|
{ |
||||||
|
//Convenience function used by the user
|
||||||
|
//Takes a variable argument list, transforms it into a list
|
||||||
|
static void call(Func &functor, int dummy, ...) |
||||||
|
{ |
||||||
|
//Vector used to collect arguments
|
||||||
|
std::vector<int> templateParamList; |
||||||
|
|
||||||
|
//Variable argument list manipulation
|
||||||
|
va_list listPointer; |
||||||
|
va_start(listPointer, dummy); |
||||||
|
//Collect parameters into the list
|
||||||
|
for(int i=0; i<NumArguments; i++) |
||||||
|
{ |
||||||
|
int val = va_arg(listPointer, int); |
||||||
|
templateParamList.push_back(val); |
||||||
|
} |
||||||
|
va_end(listPointer); |
||||||
|
|
||||||
|
//Call the actual typelist building function
|
||||||
|
call(functor, templateParamList); |
||||||
|
} |
||||||
|
|
||||||
|
//Actual function called recursively to build a typelist based
|
||||||
|
//on a list of values
|
||||||
|
static void call( Func &functor, std::vector<int> &templateParamList) |
||||||
|
{ |
||||||
|
//Get current parameter value in the list
|
||||||
|
int val = templateParamList[templateParamList.size() - 1]; |
||||||
|
templateParamList.pop_back(); |
||||||
|
|
||||||
|
//Select the compile time value to add into the typelist
|
||||||
|
//depending on the runtime variable and make recursive call.
|
||||||
|
//Both versions are really instantiated
|
||||||
|
if(val) |
||||||
|
{ |
||||||
|
KernelCaller< |
||||||
|
Loki::Typelist<typename Loki::Int2Type<true>, TList >, |
||||||
|
NumArguments-1, Func > |
||||||
|
::call(functor, templateParamList); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
KernelCaller<
|
||||||
|
Loki::Typelist<typename Loki::Int2Type<false>, TList >, |
||||||
|
NumArguments-1, Func > |
||||||
|
::call(functor, templateParamList); |
||||||
|
} |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
//Specialization for 0 value left in the list
|
||||||
|
//-> actual kernel functor call
|
||||||
|
template<class TList, class Func> |
||||||
|
struct KernelCaller<TList, 0, Func> |
||||||
|
{ |
||||||
|
static void call(Func &functor) |
||||||
|
{ |
||||||
|
//Call to the functor's kernel call method
|
||||||
|
functor.call(TList()); //TList instantiated to get the method template parameter resolved
|
||||||
|
} |
||||||
|
|
||||||
|
static void call(Func &functor, std::vector<int> &templateParams) |
||||||
|
{ |
||||||
|
functor.call(TList()); |
||||||
|
} |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
#endif //_ncvruntimetemplates_hpp_
|
@ -0,0 +1,193 @@ |
|||||||
|
// WARNING: this sample is under construction! Use it on your own risk.
|
||||||
|
|
||||||
|
#include <opencv2/contrib/contrib.hpp> |
||||||
|
#include <opencv2/objdetect/objdetect.hpp> |
||||||
|
#include <opencv2/highgui/highgui.hpp> |
||||||
|
#include <opencv2/imgproc/imgproc.hpp> |
||||||
|
#include <opencv2/gpu/gpu.hpp> |
||||||
|
|
||||||
|
#include <iostream> |
||||||
|
#include <iomanip> |
||||||
|
#include <stdio.h> |
||||||
|
|
||||||
|
using namespace std; |
||||||
|
using namespace cv; |
||||||
|
using namespace cv::gpu; |
||||||
|
|
||||||
|
void help() |
||||||
|
{ |
||||||
|
cout << "Usage: ./cascadeclassifier <cascade_file> <image_or_video_or_cameraid>\n"
|
||||||
|
"Using OpenCV version " << CV_VERSION << endl << endl; |
||||||
|
} |
||||||
|
|
||||||
|
void DetectAndDraw(Mat& img, CascadeClassifier_GPU& cascade); |
||||||
|
|
||||||
|
String cascadeName = "../../data/haarcascades/haarcascade_frontalface_alt.xml"; |
||||||
|
String nestedCascadeName = "../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml"; |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<class T> void convertAndReseize(const T& src, T& gray, T& resized, double scale = 2.0) |
||||||
|
{ |
||||||
|
if (src.channels() == 3) |
||||||
|
cvtColor( src, gray, CV_BGR2GRAY ); |
||||||
|
else |
||||||
|
gray = src; |
||||||
|
|
||||||
|
Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale)); |
||||||
|
if (scale != 1) |
||||||
|
resize(gray, resized, sz); |
||||||
|
else |
||||||
|
resized = gray; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int main( int argc, const char** argv ) |
||||||
|
{
|
||||||
|
if (argc != 3) |
||||||
|
return help(), -1; |
||||||
|
|
||||||
|
if (cv::gpu::getCudaEnabledDeviceCount() == 0) |
||||||
|
return cerr << "No GPU found or the library is compiled without GPU support" << endl, -1; |
||||||
|
|
||||||
|
VideoCapture capture; |
||||||
|
|
||||||
|
string cascadeName = argv[1]; |
||||||
|
string inputName = argv[2]; |
||||||
|
|
||||||
|
cv::gpu::CascadeClassifier_GPU cascade_gpu; |
||||||
|
if( !cascade_gpu.load( cascadeName ) ) |
||||||
|
return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1; |
||||||
|
|
||||||
|
cv::CascadeClassifier cascade_cpu; |
||||||
|
if( !cascade_cpu.load( cascadeName ) ) |
||||||
|
return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1; |
||||||
|
|
||||||
|
Mat image = imread( inputName); |
||||||
|
if( image.empty() ) |
||||||
|
if (!capture.open(inputName)) |
||||||
|
{ |
||||||
|
int camid = 0; |
||||||
|
sscanf(inputName.c_str(), "%d", &camid); |
||||||
|
if(!capture.open(camid)) |
||||||
|
cout << "Can't open source" << endl; |
||||||
|
} |
||||||
|
|
||||||
|
namedWindow( "result", 1 );
|
||||||
|
Size fontSz = cv::getTextSize("T[]", FONT_HERSHEY_SIMPLEX, 1.0, 2, 0); |
||||||
|
|
||||||
|
Mat frame, frame_cpu, gray_cpu, resized_cpu, faces_downloaded, frameDisp; |
||||||
|
vector<Rect> facesBuf_cpu; |
||||||
|
|
||||||
|
GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu;
|
||||||
|
|
||||||
|
/* parameters */ |
||||||
|
bool useGPU = true; |
||||||
|
double scale_factor = 2; |
||||||
|
|
||||||
|
bool visualizeInPlace = false;
|
||||||
|
bool findLargestObject = false;
|
||||||
|
|
||||||
|
printf("\t<space> - toggle GPU/CPU\n"); |
||||||
|
printf("\tL - toggle lagest faces\n"); |
||||||
|
printf("\tV - toggle visualisation in-place (for GPU only)\n"); |
||||||
|
printf("\t1/q - inc/dec scale\n"); |
||||||
|
|
||||||
|
int detections_num; |
||||||
|
for(;;) |
||||||
|
{
|
||||||
|
if( capture.isOpened() ) |
||||||
|
{ |
||||||
|
capture >> frame;
|
||||||
|
if( frame.empty()) |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
(image.empty() ? frame : image).copyTo(frame_cpu); |
||||||
|
frame_gpu.upload( image.empty() ? frame : image); |
||||||
|
|
||||||
|
convertAndReseize(frame_gpu, gray_gpu, resized_gpu, scale_factor); |
||||||
|
convertAndReseize(frame_cpu, gray_cpu, resized_cpu, scale_factor); |
||||||
|
|
||||||
|
cv::TickMeter tm; |
||||||
|
tm.start();
|
||||||
|
|
||||||
|
if (useGPU) |
||||||
|
{ |
||||||
|
cascade_gpu.visualizeInPlace = visualizeInPlace;
|
||||||
|
cascade_gpu.findLargestObject = findLargestObject;
|
||||||
|
|
||||||
|
detections_num = cascade_gpu.detectMultiScale( resized_gpu, facesBuf_gpu );
|
||||||
|
facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded); |
||||||
|
|
||||||
|
} |
||||||
|
else /* so use CPU */ |
||||||
|
{
|
||||||
|
Size minSize = cascade_gpu.getClassifierSize(); |
||||||
|
if (findLargestObject) |
||||||
|
{
|
||||||
|
float ratio = (float)std::min(frame.cols / minSize.width, frame.rows / minSize.height); |
||||||
|
ratio = std::max(ratio / 2.5f, 1.f); |
||||||
|
minSize = Size(cvRound(minSize.width * ratio), cvRound(minSize.height * ratio));
|
||||||
|
} |
||||||
|
|
||||||
|
cascade_cpu.detectMultiScale(resized_cpu, facesBuf_cpu, 1.2, 4, (findLargestObject ? CV_HAAR_FIND_BIGGEST_OBJECT : 0) | CV_HAAR_SCALE_IMAGE, minSize);
|
||||||
|
detections_num = (int)facesBuf_cpu.size(); |
||||||
|
} |
||||||
|
|
||||||
|
tm.stop(); |
||||||
|
printf( "detection time = %g ms\n", tm.getTimeMilli() ); |
||||||
|
|
||||||
|
if (useGPU) |
||||||
|
resized_gpu.download(resized_cpu); |
||||||
|
|
||||||
|
if (!visualizeInPlace || !useGPU) |
||||||
|
if (detections_num) |
||||||
|
{ |
||||||
|
Rect* faces = useGPU ? faces_downloaded.ptr<Rect>() : &facesBuf_cpu[0];
|
||||||
|
for(int i = 0; i < detections_num; ++i)
|
||||||
|
cv::rectangle(resized_cpu, faces[i], Scalar(255));
|
||||||
|
} |
||||||
|
|
||||||
|
Point text_pos(5, 25); |
||||||
|
int offs = fontSz.height + 5; |
||||||
|
Scalar color = CV_RGB(255, 0, 0); |
||||||
|
|
||||||
|
|
||||||
|
cv::cvtColor(resized_cpu, frameDisp, CV_GRAY2BGR); |
||||||
|
|
||||||
|
char buf[4096]; |
||||||
|
sprintf(buf, "%s, FPS = %0.3g", useGPU ? "GPU" : "CPU", 1.0/tm.getTimeSec());
|
||||||
|
putText(frameDisp, buf, text_pos, FONT_HERSHEY_SIMPLEX, 1.0, color, 2); |
||||||
|
sprintf(buf, "scale = %0.3g, [%d*scale x %d*scale]", scale_factor, frame.cols, frame.rows);
|
||||||
|
putText(frameDisp, buf, text_pos+=Point(0,offs), FONT_HERSHEY_SIMPLEX, 1.0, color, 2); |
||||||
|
putText(frameDisp, "Hotkeys: space, 1, Q, L, V, Esc", text_pos+=Point(0,offs), FONT_HERSHEY_SIMPLEX, 1.0, color, 2); |
||||||
|
|
||||||
|
if (findLargestObject) |
||||||
|
putText(frameDisp, "FindLargestObject", text_pos+=Point(0,offs), FONT_HERSHEY_SIMPLEX, 1.0, color, 2); |
||||||
|
|
||||||
|
if (visualizeInPlace && useGPU) |
||||||
|
putText(frameDisp, "VisualizeInPlace", text_pos+Point(0,offs), FONT_HERSHEY_SIMPLEX, 1.0, color, 2); |
||||||
|
|
||||||
|
cv::imshow( "result", frameDisp); |
||||||
|
|
||||||
|
int key = waitKey( 5 ); |
||||||
|
if( key == 27) |
||||||
|
break; |
||||||
|
|
||||||
|
switch (key) |
||||||
|
{ |
||||||
|
case (int)' ': useGPU = !useGPU; printf("Using %s\n", useGPU ? "GPU" : "CPU");break; |
||||||
|
case (int)'v': case (int)'V': visualizeInPlace = !visualizeInPlace; printf("VisualizeInPlace = %d\n", visualizeInPlace); break; |
||||||
|
case (int)'l': case (int)'L': findLargestObject = !findLargestObject; printf("FindLargestObject = %d\n", findLargestObject); break; |
||||||
|
case (int)'1': scale_factor*=1.05; printf("Scale factor = %g\n", scale_factor); break; |
||||||
|
case (int)'q': case (int)'Q':scale_factor/=1.05; printf("Scale factor = %g\n", scale_factor); break; |
||||||
|
} |
||||||
|
|
||||||
|
}
|
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in new issue