Merge pull request #1046 from SpecLad:merge-2.4

pull/1047/merge
Andrey Pavlenko 12 years ago committed by OpenCV Buildbot
commit 89086bdb8d
  1. 2
      cmake/OpenCVDetectOpenCL.cmake
  2. 4
      modules/calib3d/include/opencv2/calib3d.hpp
  3. 231
      modules/core/include/opencv2/core/cuda/limits.hpp
  4. 3
      modules/core/include/opencv2/core/mat.hpp
  5. 24
      modules/core/include/opencv2/core/private.hpp
  6. 30
      modules/core/src/matrix.cpp
  7. 27
      modules/core/src/parallel.cpp
  8. 2
      modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp
  9. 4
      modules/gpulegacy/src/cuda/NCVPixelOperations.hpp
  10. 12
      modules/highgui/include/opencv2/highgui/cap_ios.h
  11. 86
      modules/highgui/src/cap_ios_abstract_camera.mm
  12. 54
      modules/highgui/src/cap_ios_video_camera.mm
  13. 89
      modules/highgui/src/window_QT.cpp
  14. 1
      modules/highgui/src/window_QT.h
  15. 14
      modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java
  16. 73
      modules/nonfree/test/test_features2d.cpp
  17. 8
      modules/ocl/include/opencv2/ocl.hpp
  18. 2
      modules/ocl/include/opencv2/ocl/private/util.hpp
  19. 2
      modules/ocl/perf/main.cpp
  20. 91
      modules/ocl/perf/perf_calib3d.cpp
  21. 16
      modules/ocl/perf/perf_filters.cpp
  22. 76
      modules/ocl/perf/perf_hog.cpp
  23. 46
      modules/ocl/perf/perf_imgproc.cpp
  24. 62
      modules/ocl/perf/perf_moments.cpp
  25. 14
      modules/ocl/perf/precomp.cpp
  26. 471
      modules/ocl/src/hog.cpp
  27. 31
      modules/ocl/src/matrix_operations.cpp
  28. 4
      modules/ocl/src/mcwutil.cpp
  29. 520
      modules/ocl/src/opencl/objdetect_hog.cl
  30. 180
      modules/ocl/test/test_haar.cpp
  31. 46
      modules/ocl/test/test_imgproc.cpp
  32. 216
      modules/ocl/test/test_objdetect.cpp
  33. 44
      modules/ocl/test/test_pyramids.cpp
  34. 90
      modules/ocl/test/test_pyrup.cpp
  35. 102
      modules/ocl/test/utility.cpp
  36. 11
      modules/ocl/test/utility.hpp
  37. 39
      modules/ts/misc/testlog_parser.py
  38. 167
      modules/ts/misc/xls-report.py
  39. 72
      modules/ts/src/ts_func.cpp
  40. 127
      samples/ocl/facedetect.cpp
  41. 334
      samples/ocl/hog.cpp
  42. 48
      samples/ocl/pyrlk_optical_flow.cpp
  43. 235
      samples/ocl/squares.cpp
  44. 311
      samples/ocl/stereo_match.cpp
  45. 175
      samples/ocl/surf_matcher.cpp
  46. 264
      samples/ocl/tvl1_optical_flow.cpp

@ -44,7 +44,7 @@ if(OPENCL_FOUND)
set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR})
set(OPENCL_LIBRARIES ${OPENCL_LIBRARY})
if(WIN64)
if(WIN32 AND X86_64)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64/import)
elseif(WIN32)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import)

@ -278,8 +278,8 @@ CV_EXPORTS int recoverPose( InputArray E, InputArray points1, InputArray points2
//! finds coordinates of epipolar lines corresponding the specified points
CV_EXPORTS void computeCorrespondEpilines( InputArray points, int whichImage,
InputArray F, OutputArray lines );
CV_EXPORTS_W void computeCorrespondEpilines( InputArray points, int whichImage,
InputArray F, OutputArray lines );
CV_EXPORTS_W void triangulatePoints( InputArray projMatr1, InputArray projMatr2,
InputArray projPoints1, InputArray projPoints2,

@ -43,193 +43,80 @@
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
#define __OPENCV_GPU_LIMITS_GPU_HPP__
#include <limits>
#include <limits.h>
#include <float.h>
#include "common.hpp"
namespace cv { namespace gpu { namespace cudev
{
template<class T> struct numeric_limits
{
typedef T type;
__device__ __forceinline__ static type min() { return type(); };
__device__ __forceinline__ static type max() { return type(); };
__device__ __forceinline__ static type epsilon() { return type(); }
__device__ __forceinline__ static type round_error() { return type(); }
__device__ __forceinline__ static type denorm_min() { return type(); }
__device__ __forceinline__ static type infinity() { return type(); }
__device__ __forceinline__ static type quiet_NaN() { return type(); }
__device__ __forceinline__ static type signaling_NaN() { return T(); }
static const bool is_signed;
};
template<> struct numeric_limits<bool>
{
typedef bool type;
__device__ __forceinline__ static type min() { return false; };
__device__ __forceinline__ static type max() { return true; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template <class T> struct numeric_limits;
template<> struct numeric_limits<char>
{
typedef char type;
__device__ __forceinline__ static type min() { return CHAR_MIN; };
__device__ __forceinline__ static type max() { return CHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = (char)-1 == -1;
};
template<> struct numeric_limits<signed char>
{
typedef char type;
__device__ __forceinline__ static type min() { return SCHAR_MIN; };
__device__ __forceinline__ static type max() { return SCHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = (signed char)-1 == -1;
};
template<> struct numeric_limits<unsigned char>
{
typedef unsigned char type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return UCHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template <> struct numeric_limits<bool>
{
__device__ __forceinline__ static bool min() { return false; }
__device__ __forceinline__ static bool max() { return true; }
static const bool is_signed = false;
};
template<> struct numeric_limits<short>
{
typedef short type;
__device__ __forceinline__ static type min() { return SHRT_MIN; };
__device__ __forceinline__ static type max() { return SHRT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template <> struct numeric_limits<signed char>
{
__device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
__device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
static const bool is_signed = true;
};
template<> struct numeric_limits<unsigned short>
{
typedef unsigned short type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return USHRT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template <> struct numeric_limits<unsigned char>
{
__device__ __forceinline__ static unsigned char min() { return 0; }
__device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
static const bool is_signed = false;
};
template<> struct numeric_limits<int>
{
typedef int type;
__device__ __forceinline__ static type min() { return INT_MIN; };
__device__ __forceinline__ static type max() { return INT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template <> struct numeric_limits<short>
{
__device__ __forceinline__ static short min() { return SHRT_MIN; }
__device__ __forceinline__ static short max() { return SHRT_MAX; }
static const bool is_signed = true;
};
template <> struct numeric_limits<unsigned short>
{
__device__ __forceinline__ static unsigned short min() { return 0; }
__device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
static const bool is_signed = false;
};
template<> struct numeric_limits<unsigned int>
{
typedef unsigned int type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return UINT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template <> struct numeric_limits<int>
{
__device__ __forceinline__ static int min() { return INT_MIN; }
__device__ __forceinline__ static int max() { return INT_MAX; }
static const bool is_signed = true;
};
template<> struct numeric_limits<long>
{
typedef long type;
__device__ __forceinline__ static type min() { return LONG_MIN; };
__device__ __forceinline__ static type max() { return LONG_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template <> struct numeric_limits<unsigned int>
{
__device__ __forceinline__ static unsigned int min() { return 0; }
__device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
static const bool is_signed = false;
};
template<> struct numeric_limits<unsigned long>
{
typedef unsigned long type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return ULONG_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template <> struct numeric_limits<float>
{
__device__ __forceinline__ static float min() { return FLT_MIN; }
__device__ __forceinline__ static float max() { return FLT_MAX; }
__device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
static const bool is_signed = true;
};
template<> struct numeric_limits<float>
{
typedef float type;
__device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
__device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; };
__device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; };
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template <> struct numeric_limits<double>
{
__device__ __forceinline__ static double min() { return DBL_MIN; }
__device__ __forceinline__ static double max() { return DBL_MAX; }
__device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
static const bool is_signed = true;
};
template<> struct numeric_limits<double>
{
typedef double type;
__device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
__device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
}}} // namespace cv { namespace gpu { namespace cudev {
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__

@ -78,7 +78,8 @@ public:
EXPR = 6 << KIND_SHIFT,
OPENGL_BUFFER = 7 << KIND_SHIFT,
CUDA_MEM = 8 << KIND_SHIFT,
GPU_MAT = 9 << KIND_SHIFT
GPU_MAT = 9 << KIND_SHIFT,
OCL_MAT =10 << KIND_SHIFT
};
_InputArray();

@ -71,6 +71,30 @@
# endif
#endif
#ifdef _OPENMP
# define HAVE_OPENMP
#endif
#ifdef __APPLE__
# define HAVE_GCD
#endif
#if defined _MSC_VER && _MSC_VER >= 1600
# define HAVE_CONCURRENCY
#endif
#if defined HAVE_TBB
# define CV_PARALLEL_FRAMEWORK "tbb"
#elif defined HAVE_CSTRIPES
# define CV_PARALLEL_FRAMEWORK "cstripes"
#elif defined HAVE_OPENMP
# define CV_PARALLEL_FRAMEWORK "openmp"
#elif defined HAVE_GCD
# define CV_PARALLEL_FRAMEWORK "gcd"
#elif defined HAVE_CONCURRENCY
# define CV_PARALLEL_FRAMEWORK "ms-concurrency"
#endif
namespace cv
{
#ifdef HAVE_TBB

@ -995,6 +995,11 @@ Mat _InputArray::getMat(int i) const
return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat();
}
if( k == OCL_MAT )
{
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
}
if( k == STD_VECTOR_MAT )
{
const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
@ -1100,6 +1105,11 @@ void _InputArray::getMatVector(std::vector<Mat>& mv) const
return;
}
if( k == OCL_MAT )
{
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
}
CV_Assert( k == STD_VECTOR_MAT );
//if( k == STD_VECTOR_MAT )
{
@ -1224,6 +1234,11 @@ Size _InputArray::size(int i) const
return d_mat->size();
}
if( k == OCL_MAT )
{
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
}
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
{
@ -1338,6 +1353,11 @@ bool _InputArray::empty() const
if( k == OPENGL_BUFFER )
return ((const ogl::Buffer*)obj)->empty();
if( k == OCL_MAT )
{
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
}
if( k == GPU_MAT )
return ((const gpu::GpuMat*)obj)->empty();
@ -1573,6 +1593,11 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool all
return;
}
if( k == OCL_MAT )
{
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
}
if( k == NONE )
{
CV_Error(CV_StsNullPtr, "create() called for the missing output array" );
@ -1684,6 +1709,11 @@ void _OutputArray::release() const
return;
}
if( k == OCL_MAT )
{
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
}
CV_Assert( k == STD_VECTOR_MAT );
//if( k == STD_VECTOR_MAT )
{

@ -61,17 +61,6 @@
#endif
#endif
#ifdef _OPENMP
#define HAVE_OPENMP
#endif
#ifdef __APPLE__
#define HAVE_GCD
#endif
#if defined _MSC_VER && _MSC_VER >= 1600
#define HAVE_CONCURRENCY
#endif
/* IMPORTANT: always use the same order of defines
1. HAVE_TBB - 3rdparty library, should be explicitly enabled
@ -110,10 +99,6 @@
#endif
#endif
#if defined HAVE_TBB || defined HAVE_CSTRIPES || defined HAVE_OPENMP || defined HAVE_GCD || defined HAVE_CONCURRENCY
#define HAVE_PARALLEL_FRAMEWORK
#endif
namespace cv
{
ParallelLoopBody::~ParallelLoopBody() {}
@ -121,7 +106,7 @@ namespace cv
namespace
{
#ifdef HAVE_PARALLEL_FRAMEWORK
#ifdef CV_PARALLEL_FRAMEWORK
class ParallelLoopBodyWrapper
{
public:
@ -218,7 +203,7 @@ public:
static SchedPtr pplScheduler;
#endif
#endif // HAVE_PARALLEL_FRAMEWORK
#endif // CV_PARALLEL_FRAMEWORK
} //namespace
@ -226,7 +211,7 @@ static SchedPtr pplScheduler;
void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
{
#ifdef HAVE_PARALLEL_FRAMEWORK
#ifdef CV_PARALLEL_FRAMEWORK
if(numThreads != 0)
{
@ -281,7 +266,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
}
else
#endif // HAVE_PARALLEL_FRAMEWORK
#endif // CV_PARALLEL_FRAMEWORK
{
(void)nstripes;
body(range);
@ -290,7 +275,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
int cv::getNumThreads(void)
{
#ifdef HAVE_PARALLEL_FRAMEWORK
#ifdef CV_PARALLEL_FRAMEWORK
if(numThreads == 0)
return 1;
@ -333,7 +318,7 @@ int cv::getNumThreads(void)
void cv::setNumThreads( int threads )
{
(void)threads;
#ifdef HAVE_PARALLEL_FRAMEWORK
#ifdef CV_PARALLEL_FRAMEWORK
numThreads = threads;
#endif

@ -126,7 +126,7 @@ typedef int Ncv32s;
typedef unsigned int Ncv32u;
typedef short Ncv16s;
typedef unsigned short Ncv16u;
typedef char Ncv8s;
typedef signed char Ncv8s;
typedef unsigned char Ncv8u;
typedef float Ncv32f;
typedef double Ncv64f;

@ -51,7 +51,7 @@ template<typename TBase> inline __host__ __device__ TBase _pixMaxVal();
template<> static inline __host__ __device__ Ncv8u _pixMaxVal<Ncv8u>() {return UCHAR_MAX;}
template<> static inline __host__ __device__ Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
template<> static inline __host__ __device__ Ncv32u _pixMaxVal<Ncv32u>() {return UINT_MAX;}
template<> static inline __host__ __device__ Ncv8s _pixMaxVal<Ncv8s>() {return CHAR_MAX;}
template<> static inline __host__ __device__ Ncv8s _pixMaxVal<Ncv8s>() {return SCHAR_MAX;}
template<> static inline __host__ __device__ Ncv16s _pixMaxVal<Ncv16s>() {return SHRT_MAX;}
template<> static inline __host__ __device__ Ncv32s _pixMaxVal<Ncv32s>() {return INT_MAX;}
template<> static inline __host__ __device__ Ncv32f _pixMaxVal<Ncv32f>() {return FLT_MAX;}
@ -61,7 +61,7 @@ template<typename TBase> inline __host__ __device__ TBase _pixMinVal();
template<> static inline __host__ __device__ Ncv8u _pixMinVal<Ncv8u>() {return 0;}
template<> static inline __host__ __device__ Ncv16u _pixMinVal<Ncv16u>() {return 0;}
template<> static inline __host__ __device__ Ncv32u _pixMinVal<Ncv32u>() {return 0;}
template<> static inline __host__ __device__ Ncv8s _pixMinVal<Ncv8s>() {return CHAR_MIN;}
template<> static inline __host__ __device__ Ncv8s _pixMinVal<Ncv8s>() {return SCHAR_MIN;}
template<> static inline __host__ __device__ Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
template<> static inline __host__ __device__ Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
template<> static inline __host__ __device__ Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}

@ -1,6 +1,4 @@
/*
* cap_ios.h
* For iOS video I/O
/* For iOS video I/O
* by Eduard Feicho on 29/07/12
* Copyright 2012. All rights reserved.
*
@ -90,6 +88,12 @@
- (void)createVideoPreviewLayer;
- (void)updateOrientation;
- (void)lockFocus;
- (void)unlockFocus;
- (void)lockExposure;
- (void)unlockExposure;
- (void)lockBalance;
- (void)unlockBalance;
@end
@ -116,6 +120,7 @@
BOOL grayscaleMode;
BOOL recordVideo;
BOOL rotateVideo;
AVAssetWriterInput* recordAssetWriterInput;
AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor;
AVAssetWriter* recordAssetWriter;
@ -128,6 +133,7 @@
@property (nonatomic, assign) BOOL grayscaleMode;
@property (nonatomic, assign) BOOL recordVideo;
@property (nonatomic, assign) BOOL rotateVideo;
@property (nonatomic, retain) AVAssetWriterInput* recordAssetWriterInput;
@property (nonatomic, retain) AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor;
@property (nonatomic, retain) AVAssetWriter* recordAssetWriter;

@ -2,6 +2,7 @@
* cap_ios_abstract_camera.mm
* For iOS video I/O
* by Eduard Feicho on 29/07/12
* by Alexander Shishkov on 17/07/13
* Copyright 2012. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -405,4 +406,89 @@
}
}
- (void)lockFocus;
{
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([device isFocusModeSupported:AVCaptureFocusModeLocked]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.focusMode = AVCaptureFocusModeLocked;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for locked focus configuration %@", [error localizedDescription]);
}
}
}
- (void) unlockFocus;
{
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([device isFocusModeSupported:AVCaptureFocusModeContinuousAutoFocus]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.focusMode = AVCaptureFocusModeContinuousAutoFocus;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for autofocus configuration %@", [error localizedDescription]);
}
}
}
- (void)lockExposure;
{
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([device isExposureModeSupported:AVCaptureExposureModeLocked]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.exposureMode = AVCaptureExposureModeLocked;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for locked exposure configuration %@", [error localizedDescription]);
}
}
}
- (void) unlockExposure;
{
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([device isExposureModeSupported:AVCaptureExposureModeContinuousAutoExposure]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.exposureMode = AVCaptureExposureModeContinuousAutoExposure;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for autoexposure configuration %@", [error localizedDescription]);
}
}
}
- (void)lockBalance;
{
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([device isWhiteBalanceModeSupported:AVCaptureWhiteBalanceModeLocked]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.whiteBalanceMode = AVCaptureWhiteBalanceModeLocked;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for locked white balance configuration %@", [error localizedDescription]);
}
}
}
- (void) unlockBalance;
{
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([device isWhiteBalanceModeSupported:AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.whiteBalanceMode = AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for auto white balance configuration %@", [error localizedDescription]);
}
}
}
@end

@ -2,6 +2,7 @@
* cap_ios_video_camera.mm
* For iOS video I/O
* by Eduard Feicho on 29/07/12
* by Alexander Shishkov on 17/07/13
* Copyright 2012. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -30,7 +31,6 @@
#import "opencv2/highgui/cap_ios.h"
#include "precomp.hpp"
#import <AssetsLibrary/AssetsLibrary.h>
@ -70,6 +70,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
@synthesize videoDataOutput;
@synthesize recordVideo;
@synthesize rotateVideo;
//@synthesize videoFileOutput;
@synthesize recordAssetWriterInput;
@synthesize recordPixelBufferAdaptor;
@ -85,6 +86,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
if (self) {
self.useAVCaptureVideoPreviewLayer = NO;
self.recordVideo = NO;
self.rotateVideo = NO;
}
return self;
}
@ -269,13 +271,8 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
}
#pragma mark - Private Interface
- (void)createVideoDataOutput;
{
// Make a video data output
@ -389,6 +386,38 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
[self.parentView.layer addSublayer:self.customPreviewLayer];
}
- (CVPixelBufferRef) pixelBufferFromCGImage: (CGImageRef) image
{
CGSize frameSize = CGSizeMake(CGImageGetWidth(image), CGImageGetHeight(image));
NSDictionary *options = [NSDictionary dictionaryWithObjectsAndKeys:
[NSNumber numberWithBool:NO], kCVPixelBufferCGImageCompatibilityKey,
[NSNumber numberWithBool:NO], kCVPixelBufferCGBitmapContextCompatibilityKey,
nil];
CVPixelBufferRef pxbuffer = NULL;
CVReturn status = CVPixelBufferCreate(kCFAllocatorDefault, frameSize.width,
frameSize.height, kCVPixelFormatType_32ARGB, (CFDictionaryRef) CFBridgingRetain(options),
&pxbuffer);
NSParameterAssert(status == kCVReturnSuccess && pxbuffer != NULL);
CVPixelBufferLockBaseAddress(pxbuffer, 0);
void *pxdata = CVPixelBufferGetBaseAddress(pxbuffer);
CGColorSpaceRef rgbColorSpace = CGColorSpaceCreateDeviceRGB();
CGContextRef context = CGBitmapContextCreate(pxdata, frameSize.width,
frameSize.height, 8, 4*frameSize.width, rgbColorSpace,
kCGImageAlphaPremultipliedFirst);
CGContextDrawImage(context, CGRectMake(0, 0, CGImageGetWidth(image),
CGImageGetHeight(image)), image);
CGColorSpaceRelease(rgbColorSpace);
CGContextRelease(context);
CVPixelBufferUnlockBaseAddress(pxbuffer, 0);
return pxbuffer;
}
#pragma mark - Protocol AVCaptureVideoDataOutputSampleBufferDelegate
@ -522,7 +551,8 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
}
if (self.recordAssetWriterInput.readyForMoreMediaData) {
if (! [self.recordPixelBufferAdaptor appendPixelBuffer:imageBuffer
CVImageBufferRef pixelBuffer = [self pixelBufferFromCGImage:dstImage];
if (! [self.recordPixelBufferAdaptor appendPixelBuffer:pixelBuffer
withPresentationTime:lastSampleTime] ) {
NSLog(@"Video Writing Error");
}
@ -543,9 +573,12 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
- (void)updateOrientation;
{
NSLog(@"rotate..");
self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height);
[self layoutPreviewLayer];
if (self.rotateVideo == YES)
{
NSLog(@"rotate..");
self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height);
[self layoutPreviewLayer];
}
}
@ -583,3 +616,4 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
}
@end

@ -2473,35 +2473,33 @@ void DefaultViewPort::saveView()
if (!fileName.isEmpty()) //save the picture
{
QString extension = fileName.right(3);
// (no need anymore) create the image resized to receive the 'screenshot'
// image2Draw_qt_resized = QImage(viewport()->width(), viewport()->height(),QImage::Format_RGB888);
QPainter saveimage(&image2Draw_qt_resized);
this->render(&saveimage);
// Create a new pixmap to render the viewport into
QPixmap viewportPixmap(viewport()->size());
viewport()->render(&viewportPixmap);
// Save it..
if (QString::compare(extension, "png", Qt::CaseInsensitive) == 0)
{
image2Draw_qt_resized.save(fileName, "PNG");
viewportPixmap.save(fileName, "PNG");
return;
}
if (QString::compare(extension, "jpg", Qt::CaseInsensitive) == 0)
{
image2Draw_qt_resized.save(fileName, "JPG");
viewportPixmap.save(fileName, "JPG");
return;
}
if (QString::compare(extension, "bmp", Qt::CaseInsensitive) == 0)
{
image2Draw_qt_resized.save(fileName, "BMP");
viewportPixmap.save(fileName, "BMP");
return;
}
if (QString::compare(extension, "jpeg", Qt::CaseInsensitive) == 0)
{
image2Draw_qt_resized.save(fileName, "JPEG");
viewportPixmap.save(fileName, "JPEG");
return;
}
@ -2651,17 +2649,16 @@ void DefaultViewPort::paintEvent(QPaintEvent* evnt)
//Now disable matrixWorld for overlay display
myPainter.setWorldMatrixEnabled(false);
//overlay pixel values if zoomed in far enough
if (param_matrixWorld.m11()*ratioX >= threshold_zoom_img_region &&
param_matrixWorld.m11()*ratioY >= threshold_zoom_img_region)
{
drawImgRegion(&myPainter);
}
//in mode zoom/panning
if (param_matrixWorld.m11() > 1)
{
if (param_matrixWorld.m11() >= threshold_zoom_img_region)
{
if (centralWidget->param_flags == CV_WINDOW_NORMAL)
startDisplayInfo("WARNING: The values displayed are the resized image's values. If you want the original image's values, use CV_WINDOW_AUTOSIZE", 1000);
drawImgRegion(&myPainter);
}
drawViewOverview(&myPainter);
}
@ -2887,22 +2884,24 @@ void DefaultViewPort::drawStatusBar()
//accept only CV_8UC1 and CV_8UC8 image for now
void DefaultViewPort::drawImgRegion(QPainter *painter)
{
if (nbChannelOriginImage!=CV_8UC1 && nbChannelOriginImage!=CV_8UC3)
return;
qreal offsetX = param_matrixWorld.dx()/param_matrixWorld.m11();
double pixel_width = param_matrixWorld.m11()*ratioX;
double pixel_height = param_matrixWorld.m11()*ratioY;
qreal offsetX = param_matrixWorld.dx()/pixel_width;
offsetX = offsetX - floor(offsetX);
qreal offsetY = param_matrixWorld.dy()/param_matrixWorld.m11();
qreal offsetY = param_matrixWorld.dy()/pixel_height;
offsetY = offsetY - floor(offsetY);
QSize view = size();
QVarLengthArray<QLineF, 30> linesX;
for (qreal _x = offsetX*param_matrixWorld.m11(); _x < view.width(); _x += param_matrixWorld.m11() )
for (qreal _x = offsetX*pixel_width; _x < view.width(); _x += pixel_width )
linesX.append(QLineF(_x, 0, _x, view.height()));
QVarLengthArray<QLineF, 30> linesY;
for (qreal _y = offsetY*param_matrixWorld.m11(); _y < view.height(); _y += param_matrixWorld.m11() )
for (qreal _y = offsetY*pixel_height; _y < view.height(); _y += pixel_height )
linesY.append(QLineF(0, _y, view.width(), _y));
@ -2910,27 +2909,25 @@ void DefaultViewPort::drawImgRegion(QPainter *painter)
int original_font_size = f.pointSize();
//change font size
//f.setPointSize(4+(param_matrixWorld.m11()-threshold_zoom_img_region)/5);
f.setPixelSize(10+(param_matrixWorld.m11()-threshold_zoom_img_region)/5);
f.setPixelSize(10+(pixel_height-threshold_zoom_img_region)/5);
painter->setFont(f);
QString val;
QRgb rgbValue;
QPointF point1;//sorry, I do not know how to name it
QPointF point2;//idem
for (int j=-1;j<height()/param_matrixWorld.m11();j++)//-1 because display the pixels top rows left colums
for (int i=-1;i<width()/param_matrixWorld.m11();i++)//-1
for (int j=-1;j<height()/pixel_height;j++)//-1 because display the pixels top rows left columns
for (int i=-1;i<width()/pixel_width;i++)//-1
{
point1.setX((i+offsetX)*param_matrixWorld.m11());
point1.setY((j+offsetY)*param_matrixWorld.m11());
matrixWorld_inv.map(point1.x(),point1.y(),&point2.rx(),&point2.ry());
point2.rx()= (long) (point2.x() + 0.5);
point2.ry()= (long) (point2.y() + 0.5);
if (point2.x() >= 0 && point2.y() >= 0)
rgbValue = image2Draw_qt_resized.pixel(QPoint(point2.x(),point2.y()));
// Calculate top left of the pixel's position in the viewport (screen space)
QPointF pos_in_view((i+offsetX)*pixel_width, (j+offsetY)*pixel_height);
// Calculate top left of the pixel's position in the image (image space)
QPointF pos_in_image = matrixWorld_inv.map(pos_in_view);// Top left of pixel in view
pos_in_image.rx() = pos_in_image.x()/ratioX;
pos_in_image.ry() = pos_in_image.y()/ratioY;
QPoint point_in_image(pos_in_image.x() + 0.5f,pos_in_image.y() + 0.5f);// Add 0.5 for rounding
QRgb rgbValue;
if (image2Draw_qt.valid(point_in_image))
rgbValue = image2Draw_qt.pixel(point_in_image);
else
rgbValue = qRgb(0,0,0);
@ -2943,29 +2940,29 @@ void DefaultViewPort::drawImgRegion(QPainter *painter)
painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/2),
Qt::AlignCenter, val);
*/
QString val;
val = tr("%1").arg(qRed(rgbValue));
painter->setPen(QPen(Qt::red, 1));
painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/3),
painter->drawText(QRect(pos_in_view.x(),pos_in_view.y(),pixel_width,pixel_height/3),
Qt::AlignCenter, val);
val = tr("%1").arg(qGreen(rgbValue));
painter->setPen(QPen(Qt::green, 1));
painter->drawText(QRect(point1.x(),point1.y()+param_matrixWorld.m11()/3,param_matrixWorld.m11(),param_matrixWorld.m11()/3),
painter->drawText(QRect(pos_in_view.x(),pos_in_view.y()+pixel_height/3,pixel_width,pixel_height/3),
Qt::AlignCenter, val);
val = tr("%1").arg(qBlue(rgbValue));
painter->setPen(QPen(Qt::blue, 1));
painter->drawText(QRect(point1.x(),point1.y()+2*param_matrixWorld.m11()/3,param_matrixWorld.m11(),param_matrixWorld.m11()/3),
painter->drawText(QRect(pos_in_view.x(),pos_in_view.y()+2*pixel_height/3,pixel_width,pixel_height/3),
Qt::AlignCenter, val);
}
if (nbChannelOriginImage==CV_8UC1)
{
val = tr("%1").arg(qRed(rgbValue));
painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()),
QString val = tr("%1").arg(qRed(rgbValue));
painter->drawText(QRect(pos_in_view.x(),pos_in_view.y(),pixel_width,pixel_height),
Qt::AlignCenter, val);
}
}

@ -522,7 +522,6 @@ private:
CvMat* image2Draw_mat;
QImage image2Draw_qt;
QImage image2Draw_qt_resized;
int nbChannelOriginImage;
//for mouse callback

@ -585,4 +585,18 @@ public class Calib3dTest extends OpenCVTestCase {
public void testValidateDisparityMatMatIntIntInt() {
fail("Not yet implemented");
}
public void testComputeCorrespondEpilines()
{
Mat fundamental = new Mat(3, 3, CvType.CV_64F);
fundamental.put(0, 0, 0, -0.577, 0.288, 0.577, 0, 0.288, -0.288, -0.288, 0);
MatOfPoint2f left = new MatOfPoint2f();
left.alloc(1);
left.put(0, 0, 2, 3); //add(new Point(x, y));
Mat lines = new Mat();
Mat truth = new Mat(1, 1, CvType.CV_32FC3);
truth.put(0, 0, -0.70735186, 0.70686162, -0.70588124);
Calib3d.computeCorrespondEpilines(left, 1, fundamental, lines);
assertMatEqual(truth, lines, EPS);
}
}

@ -1149,3 +1149,76 @@ protected:
TEST(Features2d_SIFTHomographyTest, regression) { CV_DetectPlanarTest test("SIFT", 80); test.safe_run(); }
TEST(Features2d_SURFHomographyTest, regression) { CV_DetectPlanarTest test("SURF", 80); test.safe_run(); }
class FeatureDetectorUsingMaskTest : public cvtest::BaseTest
{
public:
FeatureDetectorUsingMaskTest(const Ptr<FeatureDetector>& featureDetector) :
featureDetector_(featureDetector)
{
CV_Assert(!featureDetector_.empty());
}
protected:
void run(int)
{
const int nStepX = 2;
const int nStepY = 2;
const string imageFilename = string(ts->get_data_path()) + "/features2d/tsukuba.png";
Mat image = imread(imageFilename);
if(image.empty())
{
ts->printf(cvtest::TS::LOG, "Image %s can not be read.\n", imageFilename.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
return;
}
Mat mask(image.size(), CV_8U);
const int stepX = image.size().width / nStepX;
const int stepY = image.size().height / nStepY;
vector<KeyPoint> keyPoints;
vector<Point2f> points;
for(int i=0; i<nStepX; ++i)
for(int j=0; j<nStepY; ++j)
{
mask.setTo(0);
Rect whiteArea(i * stepX, j * stepY, stepX, stepY);
mask(whiteArea).setTo(255);
featureDetector_->detect(image, keyPoints, mask);
KeyPoint::convert(keyPoints, points);
for(size_t k=0; k<points.size(); ++k)
{
if ( !whiteArea.contains(points[k]) )
{
ts->printf(cvtest::TS::LOG, "The feature point is outside of the mask.");
ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
return;
}
}
}
ts->set_failed_test_info( cvtest::TS::OK );
}
Ptr<FeatureDetector> featureDetector_;
};
TEST(Features2d_SIFT_using_mask, regression)
{
FeatureDetectorUsingMaskTest test(Algorithm::create<FeatureDetector>("Feature2D.SIFT"));
test.safe_run();
}
TEST(DISABLED_Features2d_SURF_using_mask, regression)
{
FeatureDetectorUsingMaskTest test(Algorithm::create<FeatureDetector>("Feature2D.SURF"));
test.safe_run();
}

@ -245,6 +245,11 @@ namespace cv
operator Mat() const;
void download(cv::Mat &m) const;
//! convert to _InputArray
operator _InputArray();
//! convert to _OutputArray
operator _OutputArray();
//! returns a new oclMatrix header for the specified row
oclMat row(int y) const;
@ -386,6 +391,9 @@ namespace cv
int wholecols;
};
// convert InputArray/OutputArray to oclMat references
CV_EXPORTS oclMat& getOclMatRef(InputArray src);
CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
///////////////////// mat split and merge /////////////////////////////////
//! Compose a multi-channel array from several single-channel arrays

@ -113,7 +113,7 @@ namespace cv
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
int depth, const char *build_options, FLUSH_MODE finish_mode = DISABLE);
// bind oclMat to OpenCL image textures
// note:
// 1. there is no memory management. User need to explicitly release the resource

@ -52,6 +52,8 @@ int main(int argc, const char *argv[])
cerr << "no device found\n";
return -1;
}
// set this to overwrite binary cache every time the test starts
ocl::setBinaryDiskCache(ocl::CACHE_UPDATE);
int devidx = 0;

@ -15,8 +15,8 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// Chunpeng Zhang chunpeng@multicorewareinc.com
//
// Fangfang Bai, fangfang@multicorewareinc.com
// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@ -31,7 +31,7 @@
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
@ -45,50 +45,57 @@
//M*/
#include "precomp.hpp"
#include <iomanip>
///////////// StereoMatchBM ////////////////////////
PERFTEST(StereoMatchBM)
{
Mat left_image = imread(abspath("aloeL.jpg"), cv::IMREAD_GRAYSCALE);
Mat right_image = imread(abspath("aloeR.jpg"), cv::IMREAD_GRAYSCALE);
Mat disp,dst;
ocl::oclMat d_left, d_right,d_disp;
int n_disp= 128;
int winSize =19;
#ifdef HAVE_OPENCL
SUBTEST << left_image.cols << 'x' << left_image.rows << "; aloeL.jpg ;"<< right_image.cols << 'x' << right_image.rows << "; aloeR.jpg ";
PARAM_TEST_CASE(ColumnSum, cv::Size)
{
cv::Size size;
cv::Mat src;
Ptr<StereoBM> bm = createStereoBM(n_disp, winSize);
bm->compute(left_image, right_image, dst);
virtual void SetUp()
{
size = GET_PARAM(0);
}
};
CPU_ON;
bm->compute(left_image, right_image, dst);
CPU_OFF;
TEST_P(ColumnSum, Accuracy)
{
cv::Mat src = randomMat(size, CV_32FC1);
cv::ocl::oclMat d_dst;
cv::ocl::oclMat d_src(src);
cv::ocl::columnSum(d_src, d_dst);
cv::Mat dst(d_dst);
for (int j = 0; j < src.cols; ++j)
{
float gold = src.at<float>(0, j);
float res = dst.at<float>(0, j);
ASSERT_NEAR(res, gold, 1e-5);
}
for (int i = 1; i < src.rows; ++i)
{
for (int j = 0; j < src.cols; ++j)
{
float gold = src.at<float>(i, j) += src.at<float>(i - 1, j);
float res = dst.at<float>(i, j);
ASSERT_NEAR(res, gold, 1e-5);
}
}
d_left.upload(left_image);
d_right.upload(right_image);
ocl::StereoBM_OCL d_bm(0, n_disp, winSize);
WARMUP_ON;
d_bm(d_left, d_right, d_disp);
WARMUP_OFF;
cv::Mat ocl_mat;
d_disp.download(ocl_mat);
ocl_mat.convertTo(ocl_mat, dst.type());
GPU_ON;
d_bm(d_left, d_right, d_disp);
GPU_OFF;
GPU_FULL_ON;
d_left.upload(left_image);
d_right.upload(right_image);
d_bm(d_left, d_right, d_disp);
d_disp.download(disp);
GPU_FULL_OFF;
TestSystem::instance().setAccurate(-1, 0.);
}
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES);
#endif

@ -284,6 +284,7 @@ PERFTEST(GaussianBlur)
Mat src, dst, ocl_dst;
int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
const int ksize = 7;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
@ -291,29 +292,28 @@ PERFTEST(GaussianBlur)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 5, 16);
gen(src, size, size, all_type[j], 0, 256);
GaussianBlur(src, dst, Size(9, 9), 0);
GaussianBlur(src, dst, Size(ksize, ksize), 0);
CPU_ON;
GaussianBlur(src, dst, Size(9, 9), 0);
GaussianBlur(src, dst, Size(ksize, ksize), 0);
CPU_OFF;
ocl::oclMat d_src(src);
ocl::oclMat d_dst(src.size(), src.type());
ocl::oclMat d_buf;
ocl::oclMat d_dst;
WARMUP_ON;
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0);
WARMUP_OFF;
GPU_ON;
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0);
d_dst.download(ocl_dst);
GPU_FULL_OFF;

@ -46,11 +46,6 @@
#include "precomp.hpp"
///////////// HOG////////////////////////
bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
{
return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) &&
(abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
}
PERFTEST(HOG)
{
@ -61,13 +56,12 @@ PERFTEST(HOG)
throw runtime_error("can't open road.png");
}
cv::HOGDescriptor hog;
hog.setSVMDetector(hog.getDefaultPeopleDetector());
std::vector<cv::Rect> found_locations;
std::vector<cv::Rect> d_found_locations;
SUBTEST << 768 << 'x' << 576 << "; road.png";
SUBTEST << src.cols << 'x' << src.rows << "; road.png";
hog.detectMultiScale(src, found_locations);
@ -84,70 +78,10 @@ PERFTEST(HOG)
ocl_hog.detectMultiScale(d_src, d_found_locations);
WARMUP_OFF;
// Ground-truth rectangular people window
cv::Rect win1_64x128(231, 190, 72, 144);
cv::Rect win2_64x128(621, 156, 97, 194);
cv::Rect win1_48x96(238, 198, 63, 126);
cv::Rect win2_48x96(619, 161, 92, 185);
cv::Rect win3_48x96(488, 136, 56, 112);
// Compare whether ground-truth windows are detected and compare the number of windows detected.
std::vector<int> d_comp(4);
std::vector<int> comp(4);
for(int i = 0; i < (int)d_comp.size(); i++)
{
d_comp[i] = 0;
comp[i] = 0;
}
int threshold = 10;
int val = 32;
d_comp[0] = (int)d_found_locations.size();
comp[0] = (int)found_locations.size();
cv::Size winSize = hog.winSize;
if (winSize == cv::Size(48, 96))
{
for(int i = 0; i < (int)d_found_locations.size(); i++)
{
if (match_rect(d_found_locations[i], win1_48x96, threshold))
d_comp[1] = val;
if (match_rect(d_found_locations[i], win2_48x96, threshold))
d_comp[2] = val;
if (match_rect(d_found_locations[i], win3_48x96, threshold))
d_comp[3] = val;
}
for(int i = 0; i < (int)found_locations.size(); i++)
{
if (match_rect(found_locations[i], win1_48x96, threshold))
comp[1] = val;
if (match_rect(found_locations[i], win2_48x96, threshold))
comp[2] = val;
if (match_rect(found_locations[i], win3_48x96, threshold))
comp[3] = val;
}
}
else if (winSize == cv::Size(64, 128))
{
for(int i = 0; i < (int)d_found_locations.size(); i++)
{
if (match_rect(d_found_locations[i], win1_64x128, threshold))
d_comp[1] = val;
if (match_rect(d_found_locations[i], win2_64x128, threshold))
d_comp[2] = val;
}
for(int i = 0; i < (int)found_locations.size(); i++)
{
if (match_rect(found_locations[i], win1_64x128, threshold))
comp[1] = val;
if (match_rect(found_locations[i], win2_64x128, threshold))
comp[2] = val;
}
}
cv::Mat gpu_rst(d_comp), cpu_rst(comp);
TestSystem::instance().ExpectedMatNear(gpu_rst, cpu_rst, 3);
if(d_found_locations.size() == found_locations.size())
TestSystem::instance().setAccurate(1, 0);
else
TestSystem::instance().setAccurate(0, abs((int)found_locations.size() - (int)d_found_locations.size()));
GPU_ON;
ocl_hog.detectMultiScale(d_src, found_locations);

@ -743,12 +743,12 @@ PERFTEST(meanShiftFiltering)
WARMUP_OFF;
GPU_ON;
ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit);
d_dst.download(ocl_dst);
GPU_FULL_OFF;
@ -969,3 +969,45 @@ PERFTEST(CLAHE)
}
}
}
///////////// columnSum////////////////////////
PERFTEST(columnSum)
{
Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
SUBTEST << size << 'x' << size << "; CV_32FC1";
gen(src, size, size, CV_32FC1, 0, 256);
CPU_ON;
dst.create(src.size(), src.type());
for (int j = 0; j < src.cols; j++)
dst.at<float>(0, j) = src.at<float>(0, j);
for (int i = 1; i < src.rows; ++i)
for (int j = 0; j < src.cols; ++j)
dst.at<float>(i, j) = dst.at<float>(i - 1 , j) + src.at<float>(i , j);
CPU_OFF;
d_src.upload(src);
WARMUP_ON;
ocl::columnSum(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::columnSum(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::columnSum(d_src, d_dst);
d_dst.download(ocl_dst);
GPU_FULL_OFF;
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1);
}
}

@ -44,45 +44,49 @@
//
//M*/
#include "precomp.hpp"
///////////// columnSum////////////////////////
PERFTEST(columnSum)
///////////// Moments ////////////////////////
PERFTEST(Moments)
{
Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
Mat src;
bool binaryImage = 0;
int all_type[] = {CV_8UC1, CV_16SC1, CV_32FC1, CV_64FC1};
std::string type_name[] = {"CV_8UC1", "CV_16SC1", "CV_32FC1", "CV_64FC1"};
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
SUBTEST << size << 'x' << size << "; CV_32FC1";
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
gen(src, size, size, all_type[j], 0, 256);
cv::Moments CvMom = moments(src, binaryImage);
gen(src, size, size, CV_32FC1, 0, 256);
CPU_ON;
moments(src, binaryImage);
CPU_OFF;
CPU_ON;
dst.create(src.size(), src.type());
for (int j = 0; j < src.cols; j++)
dst.at<float>(0, j) = src.at<float>(0, j);
cv::Moments oclMom;
WARMUP_ON;
oclMom = ocl::ocl_moments(src, binaryImage);
WARMUP_OFF;
for (int i = 1; i < src.rows; ++i)
for (int j = 0; j < src.cols; ++j)
dst.at<float>(i, j) = dst.at<float>(i - 1 , j) + src.at<float>(i , j);
CPU_OFF;
Mat gpu_dst, cpu_dst;
HuMoments(CvMom, cpu_dst);
HuMoments(oclMom, gpu_dst);
d_src.upload(src);
GPU_ON;
ocl::ocl_moments(src, binaryImage);
GPU_OFF;
WARMUP_ON;
ocl::columnSum(d_src, d_dst);
WARMUP_OFF;
GPU_FULL_ON;
ocl::ocl_moments(src, binaryImage);
GPU_FULL_OFF;
GPU_ON;
ocl::columnSum(d_src, d_dst);
GPU_OFF;
TestSystem::instance().ExpectedMatNear(gpu_dst, cpu_dst, .5);
GPU_FULL_ON;
d_src.upload(src);
ocl::columnSum(d_src, d_dst);
d_dst.download(ocl_dst);
GPU_FULL_OFF;
}
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1);
}
}
}

@ -331,20 +331,6 @@ void TestSystem::printMetrics(int is_accurate, double cpu_time, double gpu_time,
cout << setiosflags(ios_base::left);
stringstream stream;
#if 0
if(is_accurate == 1)
stream << "Pass";
else if(is_accurate_ == 0)
stream << "Fail";
else if(is_accurate == -1)
stream << " ";
else
{
std::cout<<"is_accurate errer: "<<is_accurate<<"\n";
exit(-1);
}
#endif
std::stringstream &cur_subtest_description = getCurSubtestDescription();
#if GTEST_OS_WINDOWS&&!GTEST_OS_WINDOWS_MOBILE

@ -15,7 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// Wenju He, wenju@multicorewareinc.com
// Wenju He, wenju@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@ -48,13 +48,107 @@
using namespace cv;
using namespace cv::ocl;
#define CELL_WIDTH 8
#define CELL_HEIGHT 8
#define CELLS_PER_BLOCK_X 2
#define CELLS_PER_BLOCK_Y 2
#define NTHREADS 256
static oclMat gauss_w_lut;
static bool hog_device_cpu;
/* pre-compute gaussian and interp_weight lookup tables if sigma is 4.0f */
static const float gaussian_interp_lut[] =
{
/* gaussian lut */
0.01831564f, 0.02926831f, 0.04393693f, 0.06196101f, 0.08208500f, 0.10215643f,
0.11943297f, 0.13117145f, 0.13533528f, 0.13117145f, 0.11943297f, 0.10215643f,
0.08208500f, 0.06196101f, 0.04393693f, 0.02926831f, 0.02926831f, 0.04677062f,
0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, 0.19085334f, 0.20961139f,
0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, 0.13117145f, 0.09901341f,
0.07021102f, 0.04677062f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f,
0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f,
0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f,
0.06196101f, 0.09901341f, 0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f,
0.40403652f, 0.44374731f, 0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f,
0.27768996f, 0.20961139f, 0.14863673f, 0.09901341f, 0.08208500f, 0.13117145f,
0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, 0.53526145f, 0.58786964f,
0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, 0.36787945f, 0.27768996f,
0.19691168f, 0.13117145f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f,
0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f,
0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f,
0.11943297f, 0.19085334f, 0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f,
0.77880079f, 0.85534531f, 0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f,
0.53526145f, 0.40403652f, 0.28650481f, 0.19085334f, 0.13117145f, 0.20961139f,
0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, 0.85534531f, 0.93941307f,
0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, 0.58786964f, 0.44374731f,
0.31466395f, 0.20961139f, 0.13533528f, 0.21626517f, 0.32465246f, 0.45783335f,
0.60653067f, 0.75483960f, 0.88249689f, 0.96923321f, 1.00000000f, 0.96923321f,
0.88249689f, 0.75483960f, 0.60653067f, 0.45783335f, 0.32465246f, 0.21626517f,
0.13117145f, 0.20961139f, 0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f,
0.85534531f, 0.93941307f, 0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f,
0.58786964f, 0.44374731f, 0.31466395f, 0.20961139f, 0.11943297f, 0.19085334f,
0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, 0.77880079f, 0.85534531f,
0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, 0.53526145f, 0.40403652f,
0.28650481f, 0.19085334f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f,
0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f,
0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f,
0.08208500f, 0.13117145f, 0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f,
0.53526145f, 0.58786964f, 0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f,
0.36787945f, 0.27768996f, 0.19691168f, 0.13117145f, 0.06196101f, 0.09901341f,
0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, 0.40403652f, 0.44374731f,
0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, 0.27768996f, 0.20961139f,
0.14863673f, 0.09901341f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f,
0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f,
0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f,
0.02926831f, 0.04677062f, 0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f,
0.19085334f, 0.20961139f, 0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f,
0.13117145f, 0.09901341f, 0.07021102f, 0.04677062f,
/* interp_weight lut */
0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f,
0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f,
0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f, 0.01171875f, 0.03515625f,
0.05859375f, 0.08203125f, 0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f,
0.17578125f, 0.15234375f, 0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f,
0.03515625f, 0.01171875f, 0.01953125f, 0.05859375f, 0.09765625f, 0.13671875f,
0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, 0.29296875f, 0.25390625f,
0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, 0.05859375f, 0.01953125f,
0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f,
0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f,
0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.03515625f, 0.10546875f,
0.17578125f, 0.24609375f, 0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f,
0.52734375f, 0.45703125f, 0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f,
0.10546875f, 0.03515625f, 0.04296875f, 0.12890625f, 0.21484375f, 0.30078125f,
0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, 0.64453125f, 0.55859375f,
0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, 0.12890625f, 0.04296875f,
0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f,
0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f,
0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.05859375f, 0.17578125f,
0.29296875f, 0.41015625f, 0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f,
0.87890625f, 0.76171875f, 0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f,
0.17578125f, 0.05859375f, 0.05859375f, 0.17578125f, 0.29296875f, 0.41015625f,
0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, 0.87890625f, 0.76171875f,
0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, 0.17578125f, 0.05859375f,
0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f,
0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f,
0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.04296875f, 0.12890625f,
0.21484375f, 0.30078125f, 0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f,
0.64453125f, 0.55859375f, 0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f,
0.12890625f, 0.04296875f, 0.03515625f, 0.10546875f, 0.17578125f, 0.24609375f,
0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, 0.52734375f, 0.45703125f,
0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, 0.10546875f, 0.03515625f,
0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f,
0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f,
0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.01953125f, 0.05859375f,
0.09765625f, 0.13671875f, 0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f,
0.29296875f, 0.25390625f, 0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f,
0.05859375f, 0.01953125f, 0.01171875f, 0.03515625f, 0.05859375f, 0.08203125f,
0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, 0.17578125f, 0.15234375f,
0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, 0.03515625f, 0.01171875f,
0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f,
0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f,
0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f
};
namespace cv
{
namespace ocl
@ -78,38 +172,43 @@ namespace cv
int cnblocks_win_x;
int cnblocks_win_y;
int cblock_hist_size;
int cblock_hist_size_2up;
int cdescr_size;
int cdescr_width;
int cdescr_height;
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y);
void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
int height, int width, const cv::ocl::oclMat &grad,
const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists);
int height, int width, float sigma, const cv::ocl::oclMat &grad,
const cv::ocl::oclMat &qangle,
const cv::ocl::oclMat &gauss_w_lut, cv::ocl::oclMat &block_hists);
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, cv::ocl::oclMat &block_hists, float threshold);
int height, int width, cv::ocl::oclMat &block_hists,
float threshold);
void classify_hists(int win_height, int win_width, int block_stride_y,
int block_stride_x, int win_stride_y, int win_stride_x, int height,
int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef,
int block_stride_x, int win_stride_y, int win_stride_x,
int height, int width, const cv::ocl::oclMat &block_hists,
const cv::ocl::oclMat &coefs, float free_coef,
float threshold, cv::ocl::oclMat &labels);
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists,
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y,
int block_stride_x, int win_stride_y, int win_stride_x,
int height, int width, const cv::ocl::oclMat &block_hists,
cv::ocl::oclMat &descriptors);
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists,
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y,
int block_stride_x, int win_stride_y, int win_stride_x,
int height, int width, const cv::ocl::oclMat &block_hists,
cv::ocl::oclMat &descriptors);
void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img,
float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma);
float angle_scale, cv::ocl::oclMat &grad,
cv::ocl::oclMat &qangle, bool correct_gamma);
void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img,
float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma);
void resize( const oclMat &src, oclMat &dst, const Size sz);
float angle_scale, cv::ocl::oclMat &grad,
cv::ocl::oclMat &qangle, bool correct_gamma);
}
}
}
@ -117,8 +216,14 @@ namespace cv
using namespace ::cv::ocl::device;
cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_,
int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_)
static inline int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_,
Size cell_size_, int nbins_, double win_sigma_,
double threshold_L2hys_, bool gamma_correction_, int nlevels_)
: win_size(win_size_),
block_size(block_size_),
block_stride(block_stride_),
@ -132,19 +237,27 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 &&
(win_size.height - block_size.height) % block_stride.height == 0);
CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0);
CV_Assert(block_size.width % cell_size.width == 0 &&
block_size.height % cell_size.height == 0);
CV_Assert(block_stride == cell_size);
CV_Assert(cell_size == Size(8, 8));
Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
Size cells_per_block(block_size.width / cell_size.width,
block_size.height / cell_size.height);
CV_Assert(cells_per_block == Size(2, 2));
cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
hog::set_up_constants(nbins, block_stride.width, block_stride.height,
blocks_per_win.width, blocks_per_win.height);
effect_size = Size(0, 0);
if (queryDeviceInfo<IS_CPU_DEVICE, bool>())
hog_device_cpu = true;
else
hog_device_cpu = false;
}
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
@ -154,7 +267,8 @@ size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const
{
Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
Size cells_per_block = Size(block_size.width / cell_size.width,
block_size.height / cell_size.height);
return (size_t)(nbins * cells_per_block.area());
}
@ -167,7 +281,8 @@ bool cv::ocl::HOGDescriptor::checkDetectorSize() const
{
size_t detector_size = detector.rows * detector.cols;
size_t descriptor_size = getDescriptorSize();
return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1;
return detector_size == 0 || detector_size == descriptor_size ||
detector_size == descriptor_size + 1;
}
void cv::ocl::HOGDescriptor::setSVMDetector(const std::vector<float> &_detector)
@ -207,10 +322,16 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
const size_t block_hist_size = getBlockHistogramSize();
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F);
block_hists.create(1,
static_cast<int>(block_hist_size * blocks_per_img.area()) + 256, CV_32F);
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
labels.create(1, wins_per_img.area(), CV_8U);
std::vector<float> v_lut = std::vector<float>(gaussian_interp_lut, gaussian_interp_lut +
sizeof(gaussian_interp_lut) / sizeof(gaussian_interp_lut[0]));
Mat m_lut(v_lut);
gauss_w_lut.upload(m_lut.reshape(1,1));
}
void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle)
@ -221,10 +342,12 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oc
switch (img.type())
{
case CV_8UC1:
hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img,
angleScale, grad, qangle, gamma_correction);
break;
case CV_8UC4:
hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img,
angleScale, grad, qangle, gamma_correction);
break;
}
}
@ -232,19 +355,21 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oc
void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img)
{
computeGradient(img, grad, qangle);
computeGradient(img, this->grad, this->qangle);
hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
grad, qangle, (float)getWinSigma(), block_hists);
hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
effect_size.width, (float)getWinSigma(), grad, qangle, gauss_w_lut, block_hists);
hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
block_hists, (float)threshold_L2hys);
hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
effect_size.width, block_hists, (float)threshold_L2hys);
}
void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, oclMat &descriptors, int descr_format)
void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride,
oclMat &descriptors, int descr_format)
{
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
CV_Assert(win_stride.width % block_stride.width == 0 &&
win_stride.height % block_stride.height == 0);
init_buffer(img, win_stride);
@ -254,17 +379,20 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride,
Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride);
descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
descriptors.create(wins_per_img.area(),
static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
switch (descr_format)
{
case DESCR_FORMAT_ROW_BY_ROW:
hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
hog::extract_descrs_by_rows(win_size.height, win_size.width,
block_stride.height, block_stride.width, win_stride.height, win_stride.width,
effect_size.height, effect_size.width, block_hists, descriptors);
break;
case DESCR_FORMAT_COL_BY_COL:
hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
hog::extract_descrs_by_cols(win_size.height, win_size.width,
block_stride.height, block_stride.width, win_stride.height, win_stride.width,
effect_size.height, effect_size.width, block_hists, descriptors);
break;
default:
CV_Error(Error::StsBadArg, "Unknown descriptor format");
@ -272,7 +400,8 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride,
}
void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits, double hit_threshold, Size win_stride, Size padding)
void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits,
double hit_threshold, Size win_stride, Size padding)
{
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
CV_Assert(padding == Size(0, 0));
@ -284,14 +413,16 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits,
if (win_stride == Size())
win_stride = block_stride;
else
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
CV_Assert(win_stride.width % block_stride.width == 0 &&
win_stride.height % block_stride.height == 0);
init_buffer(img, win_stride);
computeBlockHistograms(img);
hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists,
detector, (float)free_coef, (float)hit_threshold, labels);
hog::classify_hists(win_size.height, win_size.width, block_stride.height,
block_stride.width, win_stride.height, win_stride.width,
effect_size.height, effect_size.width, block_hists, detector,
(float)free_coef, (float)hit_threshold, labels);
labels.download(labels_host);
unsigned char *vec = labels_host.ptr();
@ -307,8 +438,9 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits,
void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations, double hit_threshold,
Size win_stride, Size padding, double scale0, int group_threshold)
void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
double hit_threshold, Size win_stride, Size padding,
double scale0, int group_threshold)
{
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
CV_Assert(scale0 > 1);
@ -334,7 +466,8 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rec
if (win_stride == Size())
win_stride = block_stride;
else
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
CV_Assert(win_stride.width % block_stride.width == 0 &&
win_stride.height % block_stride.height == 0);
init_buffer(img, win_stride);
image_scale.create(img.size(), img.type());
@ -348,16 +481,17 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rec
}
else
{
hog::resize( img, image_scale, effect_size);
resize(img, image_scale, effect_size);
detect(image_scale, locations, hit_threshold, win_stride, padding);
}
Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
Size scaled_win_size(cvRound(win_size.width * scale),
cvRound(win_size.height * scale));
for (size_t j = 0; j < locations.size(); j++)
all_candidates.push_back(Rect(Point2d(locations[j]) * scale, scaled_win_size));
}
found_locations.assign(all_candidates.begin(), all_candidates.end());
groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);
groupRectangles(found_locations, group_threshold, 0.2);
}
int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride)
@ -365,9 +499,11 @@ int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride)
return (size - part_size + stride) / stride;
}
cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride)
cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size,
cv::Size stride)
{
return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height));
return Size(numPartsWithin(size.width, part_size.width, stride.width),
numPartsWithin(size.height, part_size.height, stride.height));
}
std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector()
@ -1548,8 +1684,9 @@ static int power_2up(unsigned int n)
return -1; // Input is too big
}
void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y)
void cv::ocl::device::hog::set_up_constants(int nbins,
int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y)
{
cnbins = nbins;
cblock_stride_x = block_stride_x;
@ -1560,29 +1697,31 @@ void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int b
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
cblock_hist_size = block_hist_size;
int block_hist_size_2up = power_2up(block_hist_size);
cblock_hist_size_2up = block_hist_size_2up;
int descr_width = nblocks_win_x * block_hist_size;
cdescr_width = descr_width;
cdescr_height = nblocks_win_y;
int descr_size = descr_width * nblocks_win_y;
cdescr_size = descr_size;
}
void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, const cv::ocl::oclMat &grad,
const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists)
void cv::ocl::device::hog::compute_hists(int nbins,
int block_stride_x, int block_stride_y,
int height, int width, float sigma,
const cv::ocl::oclMat &grad,
const cv::ocl::oclMat &qangle,
const cv::ocl::oclMat &gauss_w_lut,
cv::ocl::oclMat &block_hists)
{
Context *clCxt = Context::getContext();
String kernelName = "compute_hists_kernel";
std::vector< std::pair<size_t, const void *> > args;
String kernelName = (sigma == 4.0f) ? "compute_hists_lut_kernel" :
"compute_hists_kernel";
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
size_t globalThreads[3] = { img_block_width * 32, img_block_height * 2, 1 };
size_t localThreads[3] = { 32, 2, 1 };
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
/ block_stride_x;
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y)
/ block_stride_y;
int grad_quadstep = grad.step >> 2;
int qangle_step = qangle.step;
@ -1590,6 +1729,11 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc
// Precompute gaussian spatial window parameter
float scale = 1.f / (2.f * sigma * sigma);
int blocks_in_group = 4;
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
size_t globalThreads[3] = {
divUp(img_block_width * img_block_height, blocks_in_group) * localThreads[0], 2, 1 };
int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float);
int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float);
int smem = hists_size + final_hists_size;
@ -1604,19 +1748,26 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc
args.push_back( std::make_pair( sizeof(cl_int), (void *)&qangle_step));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&grad.data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&qangle.data));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
if (kernelName.compare("compute_hists_lut_kernel") == 0)
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&gauss_w_lut.data));
else
args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( std::make_pair( smem, (void *)NULL));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
}
void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, cv::ocl::oclMat &block_hists, float threshold)
void cv::ocl::device::hog::normalize_hists(int nbins,
int block_stride_x, int block_stride_y,
int height, int width,
cv::ocl::oclMat &block_hists,
float threshold)
{
Context *clCxt = Context::getContext();
String kernelName = "normalize_hists_kernel";
std::vector< std::pair<size_t, const void *> > args;
String kernelName;
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
int nthreads = power_2up(block_hist_size);
@ -1626,40 +1777,90 @@ void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int bl
size_t globalThreads[3] = { img_block_width * nthreads, img_block_height, 1 };
size_t localThreads[3] = { nthreads, 1, 1 };
if ((nthreads < 32) || (nthreads > 512) )
cv::error(Error::StsBadArg, "normalize_hists: histogram's size is too small or too big", "cv::ocl::device::hog::normalize_hists", __FILE__, __LINE__);
if ( nbins == 9 )
{
/* optimized for the case of 9 bins */
kernelName = "normalize_hists_36_kernel";
int blocks_in_group = NTHREADS / block_hist_size;
nthreads = blocks_in_group * block_hist_size;
int num_groups = divUp( img_block_width * img_block_height, blocks_in_group);
globalThreads[0] = nthreads * num_groups;
localThreads[0] = nthreads;
}
else
{
kernelName = "normalize_hists_kernel";
nthreads = power_2up(block_hist_size);
globalThreads[0] = img_block_width * nthreads;
globalThreads[1] = img_block_height;
localThreads[0] = nthreads;
if ((nthreads < 32) || (nthreads > 512) )
cv::error(Error::StsBadArg, "normalize_hists: histogram's size is too small or too big",
"normalize_hists", __FILE__, __LINE__);
args.push_back( std::make_pair( sizeof(cl_int), (void *)&nthreads));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_hist_size));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
}
args.push_back( std::make_pair( sizeof(cl_int), (void *)&nthreads));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_hist_size));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&threshold));
args.push_back( std::make_pair( nthreads * sizeof(float), (void *)NULL));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
if(hog_device_cpu)
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1, "-D CPU");
else
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
}
void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y,
int block_stride_x, int win_stride_y, int win_stride_x, int height,
int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef,
float threshold, cv::ocl::oclMat &labels)
void cv::ocl::device::hog::classify_hists(int win_height, int win_width,
int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x,
int height, int width,
const cv::ocl::oclMat &block_hists,
const cv::ocl::oclMat &coefs,
float free_coef, float threshold,
cv::ocl::oclMat &labels)
{
Context *clCxt = Context::getContext();
String kernelName = "classify_hists_kernel";
std::vector< std::pair<size_t, const void *> > args;
int nthreads;
String kernelName;
switch (cdescr_width)
{
case 180:
nthreads = 180;
kernelName = "classify_hists_180_kernel";
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_height));
break;
case 252:
nthreads = 256;
kernelName = "classify_hists_252_kernel";
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_height));
break;
default:
nthreads = 256;
kernelName = "classify_hists_kernel";
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
}
int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
size_t localThreads[3] = { NTHREADS, 1, 1 };
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 };
size_t localThreads[3] = { nthreads, 1, 1 };
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_win_width));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
@ -1670,12 +1871,20 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int blo
args.push_back( std::make_pair( sizeof(cl_float), (void *)&threshold));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&labels.data));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
if(hog_device_cpu)
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1, "-D CPU");
else
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
}
void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width,
const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors)
void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x,
int height, int width,
const cv::ocl::oclMat &block_hists,
cv::ocl::oclMat &descriptors)
{
Context *clCxt = Context::getContext();
String kernelName = "extract_descrs_by_rows_kernel";
@ -1685,7 +1894,8 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
int descriptors_quadstep = descriptors.step >> 2;
size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
@ -1701,12 +1911,16 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
}
void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width,
const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors)
void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x,
int height, int width,
const cv::ocl::oclMat &block_hists,
cv::ocl::oclMat &descriptors)
{
Context *clCxt = Context::getContext();
String kernelName = "extract_descrs_by_cols_kernel";
@ -1716,7 +1930,8 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
int descriptors_quadstep = descriptors.step >> 2;
size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
@ -1733,16 +1948,16 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
}
static inline int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img,
float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma)
void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
const cv::ocl::oclMat &img,
float angle_scale,
cv::ocl::oclMat &grad,
cv::ocl::oclMat &qangle,
bool correct_gamma)
{
Context *clCxt = Context::getContext();
String kernelName = "compute_gradients_8UC1_kernel";
@ -1767,11 +1982,16 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const c
args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
}
void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img,
float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma)
void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
const cv::ocl::oclMat &img,
float angle_scale,
cv::ocl::oclMat &grad,
cv::ocl::oclMat &qangle,
bool correct_gamma)
{
Context *clCxt = Context::getContext();
String kernelName = "compute_gradients_8UC4_kernel";
@ -1797,37 +2017,6 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c
args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
}
void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz)
{
CV_Assert( (src.channels() == dst.channels()) );
Context *clCxt = Context::getContext();
String kernelName = (src.type() == CV_8UC1) ? "resize_8UC1_kernel" : "resize_8UC4_kernel";
size_t blkSizeX = 16, blkSizeY = 16;
size_t glbSizeX = sz.width % blkSizeX == 0 ? sz.width : (sz.width / blkSizeX + 1) * blkSizeX;
size_t glbSizeY = sz.height % blkSizeY == 0 ? sz.height : (sz.height / blkSizeY + 1) * blkSizeY;
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
float ifx = (float)src.cols / sz.width;
float ify = (float)src.rows / sz.height;
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.offset));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.offset));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&sz.width));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&sz.height));
args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifx));
args.push_back( std::make_pair(sizeof(cl_float), (void *)&ify));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
}

@ -73,6 +73,7 @@ namespace cv
}
}
////////////////////////////////////////////////////////////////////////
// convert_C3C4
static void convert_C3C4(const cl_mem &src, oclMat &dst)
@ -215,6 +216,34 @@ void cv::ocl::oclMat::upload(const Mat &m)
offset = ofs.y * step + ofs.x * elemSize();
}
cv::ocl::oclMat::operator cv::_InputArray()
{
_InputArray newInputArray;
newInputArray.flags = cv::_InputArray::OCL_MAT;
newInputArray.obj = reinterpret_cast<void *>(this);
return newInputArray;
}
cv::ocl::oclMat::operator cv::_OutputArray()
{
_OutputArray newOutputArray;
newOutputArray.flags = cv::_InputArray::OCL_MAT;
newOutputArray.obj = reinterpret_cast<void *>(this);
return newOutputArray;
}
cv::ocl::oclMat& cv::ocl::getOclMatRef(InputArray src)
{
CV_Assert(src.flags & cv::_InputArray::OCL_MAT);
return *reinterpret_cast<oclMat*>(src.obj);
}
cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src)
{
CV_Assert(src.flags & cv::_InputArray::OCL_MAT);
return *reinterpret_cast<oclMat*>(src.obj);
}
void cv::ocl::oclMat::download(cv::Mat &m) const
{
CV_DbgAssert(!this->empty());
@ -382,7 +411,7 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be
if( rtype < 0 )
rtype = type();
else
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), oclchannels());
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
//int scn = channels();
int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype);

@ -80,7 +80,7 @@ namespace cv
// provide additional methods for the user to interact with the command queue after a task is fired
static void openCLExecuteKernel_2(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, char *build_options, FLUSH_MODE finish_mode)
int depth, const char *build_options, FLUSH_MODE finish_mode)
{
//construct kernel name
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
@ -133,7 +133,7 @@ namespace cv
}
void openCLExecuteKernel2(Context *clCxt , const char **source, String kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode)
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options, FLUSH_MODE finish_mode)
{
openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,

@ -43,7 +43,6 @@
//
//M*/
#define CELL_WIDTH 8
#define CELL_HEIGHT 8
#define CELLS_PER_BLOCK_X 2
@ -51,6 +50,100 @@
#define NTHREADS 256
#define CV_PI_F 3.1415926535897932384626433832795f
//----------------------------------------------------------------------------
// Histogram computation
// 12 threads for a cell, 12x4 threads per block
// Use pre-computed gaussian and interp_weight lookup tables if sigma is 4.0f
__kernel void compute_hists_lut_kernel(
const int cblock_stride_x, const int cblock_stride_y,
const int cnbins, const int cblock_hist_size, const int img_block_width,
const int blocks_in_group, const int blocks_total,
const int grad_quadstep, const int qangle_step,
__global const float* grad, __global const uchar* qangle,
__global const float* gauss_w_lut,
__global float* block_hists, __local float* smem)
{
const int lx = get_local_id(0);
const int lp = lx / 24; /* local group id */
const int gid = get_group_id(0) * blocks_in_group + lp;/* global group id */
const int gidY = gid / img_block_width;
const int gidX = gid - gidY * img_block_width;
const int lidX = lx - lp * 24;
const int lidY = get_local_id(1);
const int cell_x = lidX / 12;
const int cell_y = lidY;
const int cell_thread_x = lidX - cell_x * 12;
__local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X *
CELLS_PER_BLOCK_Y * 12 + CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y);
__local float* final_hist = hists + cnbins *
(CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12);
const int offset_x = gidX * cblock_stride_x + (cell_x << 2) + cell_thread_x;
const int offset_y = gidY * cblock_stride_y + (cell_y << 2);
__global const float* grad_ptr = (gid < blocks_total) ?
grad + offset_y * grad_quadstep + (offset_x << 1) : grad;
__global const uchar* qangle_ptr = (gid < blocks_total) ?
qangle + offset_y * qangle_step + (offset_x << 1) : qangle;
__local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) +
cell_thread_x;
for (int bin_id = 0; bin_id < cnbins; ++bin_id)
hist[bin_id * 48] = 0.f;
const int dist_x = -4 + cell_thread_x - 4 * cell_x;
const int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
const int dist_y_begin = -4 - 4 * lidY;
for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
{
float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]);
grad_ptr += grad_quadstep;
qangle_ptr += qangle_step;
int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
int idx = (dist_center_y + 8) * 16 + (dist_center_x + 8);
float gaussian = gauss_w_lut[idx];
idx = (dist_y + 8) * 16 + (dist_x + 8);
float interp_weight = gauss_w_lut[256+idx];
hist[bin.x * 48] += gaussian * interp_weight * vote.x;
hist[bin.y * 48] += gaussian * interp_weight * vote.y;
}
barrier(CLK_LOCAL_MEM_FENCE);
volatile __local float* hist_ = hist;
for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48)
{
if (cell_thread_x < 6)
hist_[0] += hist_[6];
barrier(CLK_LOCAL_MEM_FENCE);
if (cell_thread_x < 3)
hist_[0] += hist_[3];
#ifdef CPU
barrier(CLK_LOCAL_MEM_FENCE);
#endif
if (cell_thread_x == 0)
final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] =
hist_[0] + hist_[1] + hist_[2];
}
barrier(CLK_LOCAL_MEM_FENCE);
int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x;
if ((tid < cblock_hist_size) && (gid < blocks_total))
{
__global float* block_hist = block_hists +
(gidY * img_block_width + gidX) * cblock_hist_size;
block_hist[tid] = final_hist[tid];
}
}
//----------------------------------------------------------------------------
// Histogram computation
// 12 threads for a cell, 12x4 threads per block
@ -125,16 +218,14 @@ __kernel void compute_hists_kernel(
barrier(CLK_LOCAL_MEM_FENCE);
if (cell_thread_x < 3)
hist_[0] += hist_[3];
#ifdef WAVE_SIZE_1
#ifdef CPU
barrier(CLK_LOCAL_MEM_FENCE);
#endif
if (cell_thread_x == 0)
final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] =
hist_[0] + hist_[1] + hist_[2];
}
#ifdef WAVE_SIZE_1
barrier(CLK_LOCAL_MEM_FENCE);
#endif
int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x;
if ((tid < cblock_hist_size) && (gid < blocks_total))
@ -145,6 +236,57 @@ __kernel void compute_hists_kernel(
}
}
//-------------------------------------------------------------
// Normalization of histograms via L2Hys_norm
// optimized for the case of 9 bins
__kernel void normalize_hists_36_kernel(__global float* block_hists,
const float threshold, __local float *squares)
{
const int tid = get_local_id(0);
const int gid = get_global_id(0);
const int bid = tid / 36; /* block-hist id, (0 - 6) */
const int boffset = bid * 36; /* block-hist offset in the work-group */
const int hid = tid - boffset; /* histogram bin id, (0 - 35) */
float elem = block_hists[gid];
squares[tid] = elem * elem;
barrier(CLK_LOCAL_MEM_FENCE);
__local float* smem = squares + boffset;
float sum = smem[hid];
if (hid < 18)
smem[hid] = sum = sum + smem[hid + 18];
barrier(CLK_LOCAL_MEM_FENCE);
if (hid < 9)
smem[hid] = sum = sum + smem[hid + 9];
barrier(CLK_LOCAL_MEM_FENCE);
if (hid < 4)
smem[hid] = sum + smem[hid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
elem = elem / (sqrt(sum) + 3.6f);
elem = min(elem, threshold);
barrier(CLK_LOCAL_MEM_FENCE);
squares[tid] = elem * elem;
barrier(CLK_LOCAL_MEM_FENCE);
sum = smem[hid];
if (hid < 18)
smem[hid] = sum = sum + smem[hid + 18];
barrier(CLK_LOCAL_MEM_FENCE);
if (hid < 9)
smem[hid] = sum = sum + smem[hid + 9];
barrier(CLK_LOCAL_MEM_FENCE);
if (hid < 4)
smem[hid] = sum + smem[hid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
block_hists[gid] = elem / (sqrt(sum) + 1e-3f);
}
//-------------------------------------------------------------
// Normalization of histograms via L2Hys_norm
//
@ -153,76 +295,50 @@ float reduce_smem(volatile __local float* smem, int size)
unsigned int tid = get_local_id(0);
float sum = smem[tid];
if (size >= 512)
{
if (tid < 256) smem[tid] = sum = sum + smem[tid + 256];
barrier(CLK_LOCAL_MEM_FENCE);
}
if (size >= 256)
{
if (tid < 128) smem[tid] = sum = sum + smem[tid + 128];
barrier(CLK_LOCAL_MEM_FENCE);
}
if (size >= 128)
{
if (tid < 64) smem[tid] = sum = sum + smem[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE);
}
if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE); }
#ifdef CPU
if (size >= 64) { if (tid < 32) smem[tid] = sum = sum + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 32) { if (tid < 16) smem[tid] = sum = sum + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 16) { if (tid < 8) smem[tid] = sum = sum + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 8) { if (tid < 4) smem[tid] = sum = sum + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 4) { if (tid < 2) smem[tid] = sum = sum + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 2) { if (tid < 1) smem[tid] = sum = sum + smem[tid + 1];
barrier(CLK_LOCAL_MEM_FENCE); }
#else
if (tid < 32)
{
if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1)
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
#endif
if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
#ifdef WAVE_SIZE_1
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
#endif
if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
#ifdef WAVE_SIZE_1
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
#endif
if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
#ifdef WAVE_SIZE_1
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
{
#endif
if (size >= 4) smem[tid] = sum = sum + smem[tid + 2];
#ifdef WAVE_SIZE_1
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1)
{
#endif
if (size >= 2) smem[tid] = sum = sum + smem[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
sum = smem[0];
#endif
return sum;
}
__kernel void normalize_hists_kernel(const int nthreads, const int block_hist_size, const int img_block_width,
__global float* block_hists, const float threshold, __local float *squares)
__kernel void normalize_hists_kernel(
const int nthreads, const int block_hist_size, const int img_block_width,
__global float* block_hists, const float threshold, __local float *squares)
{
const int tid = get_local_id(0);
const int gidX = get_group_id(0);
const int gidY = get_group_id(1);
__global float* hist = block_hists + (gidY * img_block_width + gidX) * block_hist_size + tid;
__global float* hist = block_hists + (gidY * img_block_width + gidX) *
block_hist_size + tid;
float elem = 0.f;
if (tid < block_hist_size)
@ -249,100 +365,226 @@ __kernel void normalize_hists_kernel(const int nthreads, const int block_hist_si
//---------------------------------------------------------------------
// Linear SVM based classification
//
__kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr_size, const int cdescr_width,
const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
__global const float * block_hists, __global const float* coefs,
float free_coef, float threshold, __global uchar* labels)
// 48x96 window, 9 bins and default parameters
// 180 threads, each thread corresponds to a bin in a row
__kernel void classify_hists_180_kernel(
const int cdescr_width, const int cdescr_height, const int cblock_hist_size,
const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
__global const float * block_hists, __global const float* coefs,
float free_coef, float threshold, __global uchar* labels)
{
const int tid = get_local_id(0);
const int gidX = get_group_id(0);
const int gidY = get_group_id(1);
__global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
__global const float* hist = block_hists + (gidY * win_block_stride_y *
img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
float product = 0.f;
for (int i = tid; i < cdescr_size; i += NTHREADS)
for (int i = 0; i < cdescr_height; i++)
{
int offset_y = i / cdescr_width;
int offset_x = i - offset_y * cdescr_width;
product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x];
product += coefs[i * cdescr_width + tid] *
hist[i * img_block_width * cblock_hist_size + tid];
}
__local float products[NTHREADS];
__local float products[180];
products[tid] = product;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128) products[tid] = product = product + products[tid + 128];
if (tid < 90) products[tid] = product = product + products[tid + 90];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64) products[tid] = product = product + products[tid + 64];
if (tid < 45) products[tid] = product = product + products[tid + 45];
barrier(CLK_LOCAL_MEM_FENCE);
volatile __local float* smem = products;
if (tid < 32)
#ifdef CPU
if (tid < 13) smem[tid] = product = product + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) smem[tid] = product = product + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<8) smem[tid] = product = product + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<4) smem[tid] = product = product + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<2) smem[tid] = product = product + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
#else
if (tid < 13)
{
smem[tid] = product = product + smem[tid + 32];
#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1)
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
#endif
smem[tid] = product = product + smem[tid + 16];
#ifdef WAVE_SIZE_1
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
#endif
smem[tid] = product = product + smem[tid + 8];
#ifdef WAVE_SIZE_1
smem[tid] = product = product + smem[tid + 4];
smem[tid] = product = product + smem[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
#endif
smem[tid] = product = product + smem[tid + 4];
#ifdef WAVE_SIZE_1
if (tid == 0){
product = product + smem[tid + 1];
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
}
}
//---------------------------------------------------------------------
// Linear SVM based classification
// 64x128 window, 9 bins and default parameters
// 256 threads, 252 of them are used
__kernel void classify_hists_252_kernel(
const int cdescr_width, const int cdescr_height, const int cblock_hist_size,
const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
__global const float * block_hists, __global const float* coefs,
float free_coef, float threshold, __global uchar* labels)
{
const int tid = get_local_id(0);
const int gidX = get_group_id(0);
const int gidY = get_group_id(1);
__global const float* hist = block_hists + (gidY * win_block_stride_y *
img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
float product = 0.f;
if (tid < cdescr_width)
{
for (int i = 0; i < cdescr_height; i++)
product += coefs[i * cdescr_width + tid] *
hist[i * img_block_width * cblock_hist_size + tid];
}
__local float products[NTHREADS];
products[tid] = product;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
{
#endif
if (tid < 128) products[tid] = product = product + products[tid + 128];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64) products[tid] = product = product + products[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE);
volatile __local float* smem = products;
#ifdef CPU
if(tid<32) smem[tid] = product = product + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<16) smem[tid] = product = product + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<8) smem[tid] = product = product + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<4) smem[tid] = product = product + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<2) smem[tid] = product = product + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
#else
if (tid < 32)
{
smem[tid] = product = product + smem[tid + 32];
smem[tid] = product = product + smem[tid + 16];
smem[tid] = product = product + smem[tid + 8];
smem[tid] = product = product + smem[tid + 4];
smem[tid] = product = product + smem[tid + 2];
#ifdef WAVE_SIZE_1
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1)
{
#endif
smem[tid] = product = product + smem[tid + 1];
if (tid == 0){
product = product + smem[tid + 1];
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
}
}
//---------------------------------------------------------------------
// Linear SVM based classification
// 256 threads
__kernel void classify_hists_kernel(
const int cdescr_size, const int cdescr_width, const int cblock_hist_size,
const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
__global const float * block_hists, __global const float* coefs,
float free_coef, float threshold, __global uchar* labels)
{
const int tid = get_local_id(0);
const int gidX = get_group_id(0);
const int gidY = get_group_id(1);
__global const float* hist = block_hists + (gidY * win_block_stride_y *
img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
float product = 0.f;
for (int i = tid; i < cdescr_size; i += NTHREADS)
{
int offset_y = i / cdescr_width;
int offset_x = i - offset_y * cdescr_width;
product += coefs[i] *
hist[offset_y * img_block_width * cblock_hist_size + offset_x];
}
if (tid == 0)
__local float products[NTHREADS];
products[tid] = product;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128) products[tid] = product = product + products[tid + 128];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64) products[tid] = product = product + products[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE);
volatile __local float* smem = products;
#ifdef CPU
if(tid<32) smem[tid] = product = product + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<16) smem[tid] = product = product + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<8) smem[tid] = product = product + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<4) smem[tid] = product = product + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<2) smem[tid] = product = product + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
#else
if (tid < 32)
{
smem[tid] = product = product + smem[tid + 32];
smem[tid] = product = product + smem[tid + 16];
smem[tid] = product = product + smem[tid + 8];
smem[tid] = product = product + smem[tid + 4];
smem[tid] = product = product + smem[tid + 2];
}
#endif
if (tid == 0){
smem[tid] = product = product + smem[tid + 1];
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
}
}
//----------------------------------------------------------------------------
// Extract descriptors
__kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, const int cdescr_width,
const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
__global const float* block_hists, __global float* descriptors)
__kernel void extract_descrs_by_rows_kernel(
const int cblock_hist_size, const int descriptors_quadstep,
const int cdescr_size, const int cdescr_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
__global const float* block_hists, __global float* descriptors)
{
int tid = get_local_id(0);
int gidX = get_group_id(0);
int gidY = get_group_id(1);
// Get left top corner of the window in src
__global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
__global const float* hist = block_hists + (gidY * win_block_stride_y *
img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
// Get left top corner of the window in dst
__global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
__global float* descriptor = descriptors +
(gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
// Copy elements from src to dst
for (int i = tid; i < cdescr_size; i += NTHREADS)
@ -353,19 +595,23 @@ __kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const in
}
}
__kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size,
const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, const int win_block_stride_x,
const int win_block_stride_y, __global const float* block_hists, __global float* descriptors)
__kernel void extract_descrs_by_cols_kernel(
const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size,
const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
__global const float* block_hists, __global float* descriptors)
{
int tid = get_local_id(0);
int gidX = get_group_id(0);
int gidY = get_group_id(1);
// Get left top corner of the window in src
__global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
__global const float* hist = block_hists + (gidY * win_block_stride_y *
img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
// Get left top corner of the window in dst
__global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
__global float* descriptor = descriptors +
(gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
// Copy elements from src to dst
for (int i = tid; i < cdescr_size; i += NTHREADS)
@ -376,16 +622,19 @@ __kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const in
int y = block_idx / cnblocks_win_x;
int x = block_idx - y * cnblocks_win_x;
descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] = hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block];
descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] =
hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block];
}
}
//----------------------------------------------------------------------------
// Gradients computation
__kernel void compute_gradients_8UC4_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step,
const __global uchar4 * img, __global float * grad, __global uchar * qangle,
const float angle_scale, const char correct_gamma, const int cnbins)
__kernel void compute_gradients_8UC4_kernel(
const int height, const int width,
const int img_step, const int grad_quadstep, const int qangle_step,
const __global uchar4 * img, __global float * grad, __global uchar * qangle,
const float angle_scale, const char correct_gamma, const int cnbins)
{
const int x = get_global_id(0);
const int tid = get_local_id(0);
@ -426,8 +675,10 @@ __kernel void compute_gradients_8UC4_kernel(const int height, const int width, c
barrier(CLK_LOCAL_MEM_FENCE);
if (x < width)
{
float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)], sh_row[tid + 2 * (NTHREADS + 2)]);
float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)], sh_row[tid + 2 + 2 * (NTHREADS + 2)]);
float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)],
sh_row[tid + 2 * (NTHREADS + 2)]);
float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)],
sh_row[tid + 2 + 2 * (NTHREADS + 2)]);
float3 dx;
if (correct_gamma == 1)
@ -482,9 +733,11 @@ __kernel void compute_gradients_8UC4_kernel(const int height, const int width, c
}
}
__kernel void compute_gradients_8UC1_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step,
__global const uchar * img, __global float * grad, __global uchar * qangle,
const float angle_scale, const char correct_gamma, const int cnbins)
__kernel void compute_gradients_8UC1_kernel(
const int height, const int width,
const int img_step, const int grad_quadstep, const int qangle_step,
__global const uchar * img, __global float * grad, __global uchar * qangle,
const float angle_scale, const char correct_gamma, const int cnbins)
{
const int x = get_global_id(0);
const int tid = get_local_id(0);
@ -539,43 +792,4 @@ __kernel void compute_gradients_8UC1_kernel(const int height, const int width, c
grad[ (gidY * grad_quadstep + x) << 1 ] = mag * (1.f - ang);
grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang;
}
}
//----------------------------------------------------------------------------
// Resize
__kernel void resize_8UC4_kernel(__global uchar4 * dst, __global const uchar4 * src,
int dst_offset, int src_offset, int dst_step, int src_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int sx = (int)floor(dx*ifx+0.5f);
int sy = (int)floor(dy*ify+0.5f);
sx = min(sx, src_cols-1);
sy = min(sy, src_rows-1);
int dpos = (dst_offset>>2) + dy * (dst_step>>2) + dx;
int spos = (src_offset>>2) + sy * (src_step>>2) + sx;
if(dx<dst_cols && dy<dst_rows)
dst[dpos] = src[spos];
}
__kernel void resize_8UC1_kernel(__global uchar * dst, __global const uchar * src,
int dst_offset, int src_offset, int dst_step, int src_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int sx = (int)floor(dx*ifx+0.5f);
int sy = (int)floor(dy*ify+0.5f);
sx = min(sx, src_cols-1);
sy = min(sy, src_rows-1);
int dpos = dst_offset + dy * dst_step + dx;
int spos = src_offset + sy * src_step + sx;
if(dx<dst_cols && dy<dst_rows)
dst[dpos] = src[spos];
}

@ -1,180 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Sen Liu, swjutls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/objdetect.hpp"
#include "precomp.hpp"
#if 0 //def HAVE_OPENCL
using namespace cvtest;
using namespace testing;
using namespace std;
using namespace cv;
extern string workdir;
namespace
{
IMPLEMENT_PARAM_CLASS(CascadeName, std::string);
CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml"));
CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml"));
struct getRect
{
Rect operator ()(const CvAvgComp &e) const
{
return e.rect;
}
};
}
PARAM_TEST_CASE(Haar, double, int, CascadeName)
{
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
cv::CascadeClassifier cpucascade, cpunestedCascade;
double scale;
int flags;
std::string cascadeName;
virtual void SetUp()
{
scale = GET_PARAM(0);
flags = GET_PARAM(1);
cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(2));
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) )
{
cout << "ERROR: Could not load classifier cascade" << endl;
return;
}
}
};
////////////////////////////////faceDetect/////////////////////////////////////////////////
TEST_P(Haar, FaceDetect)
{
string imgName = workdir + "lena.jpg";
Mat img = imread( imgName, 1 );
if(img.empty())
{
std::cout << "Couldn't read " << imgName << std::endl;
return ;
}
vector<Rect> faces, oclfaces;
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
MemStorage storage(cvCreateMemStorage(0));
cvtColor( img, gray, COLOR_BGR2GRAY );
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
cv::ocl::oclMat image;
CvSeq *_objects;
image.upload(smallImg);
_objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
3, flags, Size(30, 30), Size(0, 0) );
vector<CvAvgComp> vecAvgComp;
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
oclfaces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
cpucascade.detectMultiScale( smallImg, faces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0) );
EXPECT_EQ(faces.size(), oclfaces.size());
}
TEST_P(Haar, FaceDetectUseBuf)
{
string imgName = workdir + "lena.jpg";
Mat img = imread( imgName, 1 );
if(img.empty())
{
std::cout << "Couldn't read " << imgName << std::endl;
return ;
}
vector<Rect> faces, oclfaces;
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
cvtColor( img, gray, CV_BGR2GRAY );
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
cv::ocl::oclMat image;
image.upload(smallImg);
cv::ocl::OclCascadeClassifierBuf cascadebuf;
if( !cascadebuf.load( cascadeName ) )
{
cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << endl;
return;
}
cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0) );
cpucascade.detectMultiScale( smallImg, faces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0) );
EXPECT_EQ(faces.size(), oclfaces.size());
// intentionally run ocl facedetect again and check if it still works after the first run
cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3,
flags,
Size(30, 30));
cascadebuf.release();
EXPECT_EQ(faces.size(), oclfaces.size());
}
INSTANTIATE_TEST_CASE_P(FaceDetect, Haar,
Combine(Values(1.0),
Values(CV_HAAR_SCALE_IMAGE, 0), Values(cascade_frontalface_alt, cascade_frontalface_alt2)));
#endif // HAVE_OPENCL

@ -1573,6 +1573,47 @@ TEST_P(Convolve, Mat)
}
}
//////////////////////////////// ColumnSum //////////////////////////////////////
PARAM_TEST_CASE(ColumnSum, cv::Size)
{
cv::Size size;
cv::Mat src;
virtual void SetUp()
{
size = GET_PARAM(0);
}
};
TEST_P(ColumnSum, Accuracy)
{
cv::Mat src = randomMat(size, CV_32FC1);
cv::ocl::oclMat d_dst;
cv::ocl::oclMat d_src(src);
cv::ocl::columnSum(d_src, d_dst);
cv::Mat dst(d_dst);
for (int j = 0; j < src.cols; ++j)
{
float gold = src.at<float>(0, j);
float res = dst.at<float>(0, j);
ASSERT_NEAR(res, gold, 1e-5);
}
for (int i = 1; i < src.rows; ++i)
{
for (int j = 0; j < src.cols; ++j)
{
float gold = src.at<float>(i, j) += src.at<float>(i - 1, j);
float res = dst.at<float>(i, j);
ASSERT_NEAR(res, gold, 1e-5);
}
}
}
/////////////////////////////////////////////////////////////////////////////////////
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
ONE_TYPE(CV_8UC1),
NULL_TYPE,
@ -1688,7 +1729,6 @@ INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine(
Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)),
Values(0.0, 40.0)));
//INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine(
// Values(CV_32FC1, CV_32FC1),
// Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES);
#endif // HAVE_OPENCL

@ -15,7 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// Wenju He, wenju@multicorewareinc.com
// Yao Wang, bitwangyaoyao@gmail.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
@ -45,51 +45,61 @@
#include "precomp.hpp"
#include "opencv2/objdetect.hpp"
#include "opencv2/objdetect/objdetect_c.h"
using namespace std;
using namespace cv;
using namespace testing;
#ifdef HAVE_OPENCL
extern string workdir;
PARAM_TEST_CASE(HOG, cv::Size, int)
///////////////////// HOG /////////////////////////////
PARAM_TEST_CASE(HOG, Size, int)
{
cv::Size winSize;
Size winSize;
int type;
Mat img_rgb;
virtual void SetUp()
{
winSize = GET_PARAM(0);
type = GET_PARAM(1);
img_rgb = readImage(workdir + "../gpu/road.png");
if(img_rgb.empty())
{
std::cout << "Couldn't read road.png" << std::endl;
}
}
};
TEST_P(HOG, GetDescriptors)
{
// Load image
cv::Mat img_rgb = readImage(workdir + "lena.jpg");
ASSERT_FALSE(img_rgb.empty());
// Convert image
cv::Mat img;
Mat img;
switch (type)
{
case CV_8UC1:
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
cvtColor(img_rgb, img, COLOR_BGR2GRAY);
break;
case CV_8UC4:
default:
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
cvtColor(img_rgb, img, COLOR_BGR2BGRA);
break;
}
cv::ocl::oclMat d_img(img);
ocl::oclMat d_img(img);
// HOGs
cv::ocl::HOGDescriptor ocl_hog;
ocl::HOGDescriptor ocl_hog;
ocl_hog.gamma_correction = true;
cv::HOGDescriptor hog;
HOGDescriptor hog;
hog.gammaCorrection = true;
// Compute descriptor
cv::ocl::oclMat d_descriptors;
ocl::oclMat d_descriptors;
ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL);
cv::Mat down_descriptors;
Mat down_descriptors;
d_descriptors.download(down_descriptors);
down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows);
@ -105,45 +115,34 @@ TEST_P(HOG, GetDescriptors)
hog.compute(img_rgb, descriptors, ocl_hog.win_size);
break;
}
cv::Mat cpu_descriptors(descriptors);
Mat cpu_descriptors(descriptors);
EXPECT_MAT_SIMILAR(down_descriptors, cpu_descriptors, 1e-2);
}
bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
{
return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) &&
(abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
}
TEST_P(HOG, Detect)
{
// Load image
cv::Mat img_rgb = readImage(workdir + "lena.jpg");
ASSERT_FALSE(img_rgb.empty());
// Convert image
cv::Mat img;
Mat img;
switch (type)
{
case CV_8UC1:
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
cvtColor(img_rgb, img, COLOR_BGR2GRAY);
break;
case CV_8UC4:
default:
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
cvtColor(img_rgb, img, COLOR_BGR2BGRA);
break;
}
cv::ocl::oclMat d_img(img);
ocl::oclMat d_img(img);
// HOGs
if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128)))
winSize = cv::Size(64, 128);
cv::ocl::HOGDescriptor ocl_hog(winSize);
if ((winSize != Size(48, 96)) && (winSize != Size(64, 128)))
winSize = Size(64, 128);
ocl::HOGDescriptor ocl_hog(winSize);
ocl_hog.gamma_correction = true;
cv::HOGDescriptor hog;
HOGDescriptor hog;
hog.winSize = winSize;
hog.gammaCorrection = true;
@ -165,88 +164,119 @@ TEST_P(HOG, Detect)
}
// OpenCL detection
std::vector<cv::Rect> d_found;
ocl_hog.detectMultiScale(d_img, d_found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2);
std::vector<Rect> d_found;
ocl_hog.detectMultiScale(d_img, d_found, 0, Size(8, 8), Size(0, 0), 1.05, 6);
// CPU detection
std::vector<cv::Rect> found;
std::vector<Rect> found;
switch (type)
{
case CV_8UC1:
hog.detectMultiScale(img, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2);
hog.detectMultiScale(img, found, 0, Size(8, 8), Size(0, 0), 1.05, 6);
break;
case CV_8UC4:
default:
hog.detectMultiScale(img_rgb, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2);
hog.detectMultiScale(img_rgb, found, 0, Size(8, 8), Size(0, 0), 1.05, 6);
break;
}
// Ground-truth rectangular people window
cv::Rect win1_64x128(231, 190, 72, 144);
cv::Rect win2_64x128(621, 156, 97, 194);
cv::Rect win1_48x96(238, 198, 63, 126);
cv::Rect win2_48x96(619, 161, 92, 185);
cv::Rect win3_48x96(488, 136, 56, 112);
// Compare whether ground-truth windows are detected and compare the number of windows detected.
std::vector<int> d_comp(4);
std::vector<int> comp(4);
for(int i = 0; i < (int)d_comp.size(); i++)
{
d_comp[i] = 0;
comp[i] = 0;
}
EXPECT_LT(checkRectSimilarity(img.size(), found, d_found), 1.0);
}
int threshold = 10;
int val = 32;
d_comp[0] = (int)d_found.size();
comp[0] = (int)found.size();
if (winSize == cv::Size(48, 96))
INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
testing::Values(Size(64, 128), Size(48, 96)),
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
#if 0
///////////////////////////// Haar //////////////////////////////
IMPLEMENT_PARAM_CLASS(CascadeName, std::string);
CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml"));
CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml"));
struct getRect
{
Rect operator ()(const CvAvgComp &e) const
{
for(int i = 0; i < (int)d_found.size(); i++)
{
if (match_rect(d_found[i], win1_48x96, threshold))
d_comp[1] = val;
if (match_rect(d_found[i], win2_48x96, threshold))
d_comp[2] = val;
if (match_rect(d_found[i], win3_48x96, threshold))
d_comp[3] = val;
}
for(int i = 0; i < (int)found.size(); i++)
{
if (match_rect(found[i], win1_48x96, threshold))
comp[1] = val;
if (match_rect(found[i], win2_48x96, threshold))
comp[2] = val;
if (match_rect(found[i], win3_48x96, threshold))
comp[3] = val;
}
return e.rect;
}
else if (winSize == cv::Size(64, 128))
};
PARAM_TEST_CASE(Haar, int, CascadeName)
{
ocl::OclCascadeClassifier cascade, nestedCascade;
CascadeClassifier cpucascade, cpunestedCascade;
int flags;
std::string cascadeName;
vector<Rect> faces, oclfaces;
Mat img;
ocl::oclMat d_img;
virtual void SetUp()
{
for(int i = 0; i < (int)d_found.size(); i++)
flags = GET_PARAM(0);
cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(1));
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) )
{
if (match_rect(d_found[i], win1_64x128, threshold))
d_comp[1] = val;
if (match_rect(d_found[i], win2_64x128, threshold))
d_comp[2] = val;
std::cout << "ERROR: Could not load classifier cascade" << std::endl;
return;
}
for(int i = 0; i < (int)found.size(); i++)
img = readImage(workdir + "lena.jpg", IMREAD_GRAYSCALE);
if(img.empty())
{
if (match_rect(found[i], win1_64x128, threshold))
comp[1] = val;
if (match_rect(found[i], win2_64x128, threshold))
comp[2] = val;
std::cout << "Couldn't read lena.jpg" << std::endl;
return ;
}
equalizeHist(img, img);
d_img.upload(img);
}
};
EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3);
TEST_P(Haar, FaceDetect)
{
MemStorage storage(cvCreateMemStorage(0));
CvSeq *_objects;
_objects = cascade.oclHaarDetectObjects(d_img, storage, 1.1, 3,
flags, Size(30, 30), Size(0, 0));
vector<CvAvgComp> vecAvgComp;
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
oclfaces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
cpucascade.detectMultiScale(img, faces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0));
EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0);
}
TEST_P(Haar, FaceDetectUseBuf)
{
ocl::OclCascadeClassifierBuf cascadebuf;
if(!cascadebuf.load(cascadeName))
{
std::cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << std::endl;
return;
}
cascadebuf.detectMultiScale(d_img, oclfaces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0));
cpucascade.detectMultiScale(img, faces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0));
// intentionally run ocl facedetect again and check if it still works after the first run
cascadebuf.detectMultiScale(d_img, oclfaces, 1.1, 3,
flags,
Size(30, 30));
cascadebuf.release();
INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0);
}
INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, Haar,
Combine(Values(CV_HAAR_SCALE_IMAGE, 0),
Values(cascade_frontalface_alt/*, cascade_frontalface_alt2*/)));
#endif
#endif //HAVE_OPENCL

@ -15,7 +15,6 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com
// Yao Wang yao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
@ -56,11 +55,12 @@ using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(PyrDown, MatType, int)
PARAM_TEST_CASE(PyrBase, MatType, int)
{
int type;
int channels;
Mat dst_cpu;
oclMat gdst;
virtual void SetUp()
{
type = GET_PARAM(0);
@ -69,19 +69,19 @@ PARAM_TEST_CASE(PyrDown, MatType, int)
};
/////////////////////// PyrDown //////////////////////////
struct PyrDown : PyrBase {};
TEST_P(PyrDown, Mat)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::RNG &rng = TS::ptr()->get_rng();
cv::Mat src = randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false);
cv::ocl::oclMat gsrc(src), gdst;
cv::Mat dst_cpu;
cv::pyrDown(src, dst_cpu);
cv::ocl::pyrDown(gsrc, gdst);
Size size(MWIDTH, MHEIGHT);
Mat src = randomMat(size, CV_MAKETYPE(type, channels));
oclMat gsrc(src);
pyrDown(src, dst_cpu);
pyrDown(gsrc, gdst);
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), type == CV_32F ? 1e-4f : 1.0f);
}
@ -90,5 +90,27 @@ TEST_P(PyrDown, Mat)
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine(
Values(CV_8U, CV_32F), Values(1, 3, 4)));
/////////////////////// PyrUp //////////////////////////
struct PyrUp : PyrBase {};
TEST_P(PyrUp, Accuracy)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
Size size(MWIDTH, MHEIGHT);
Mat src = randomMat(size, CV_MAKETYPE(type, channels));
oclMat gsrc(src);
pyrUp(src, dst_cpu);
pyrUp(gsrc, gdst);
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (type == CV_32F ? 1e-4f : 1.0));
}
}
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine(
Values(CV_8U, CV_32F), Values(1, 3, 4)));
#endif // HAVE_OPENCL

@ -1,90 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Zhang Chunpeng chunpeng@multicorewareinc.com
// Yao Wang yao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(PyrUp, MatType, int)
{
int type;
int channels;
virtual void SetUp()
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
}
};
TEST_P(PyrUp, Accuracy)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
Size size(MWIDTH, MHEIGHT);
Mat src = randomMat(size, CV_MAKETYPE(type, channels));
Mat dst_gold;
pyrUp(src, dst_gold);
ocl::oclMat dst;
ocl::oclMat srcMat(src);
ocl::pyrUp(srcMat, dst);
EXPECT_MAT_NEAR(dst_gold, Mat(dst), (type == CV_32F ? 1e-4f : 1.0));
}
}
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine(
Values(CV_8U, CV_32F), Values(1, 3, 4)));
#endif // HAVE_OPENCL

@ -100,12 +100,6 @@ Mat randomMat(Size size, int type, double minVal, double maxVal)
return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false);
}
/*
void showDiff(InputArray gold_, InputArray actual_, double eps)
{
@ -137,58 +131,7 @@ void showDiff(InputArray gold_, InputArray actual_, double eps)
}
*/
/*
bool supportFeature(const DeviceInfo& info, FeatureSet feature)
{
return TargetArchs::builtWith(feature) && info.supports(feature);
}
const vector<DeviceInfo>& devices()
{
static vector<DeviceInfo> devs;
static bool first = true;
if (first)
{
int deviceCount = getCudaEnabledDeviceCount();
devs.reserve(deviceCount);
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
if (info.isCompatible())
devs.push_back(info);
}
first = false;
}
return devs;
}
vector<DeviceInfo> devices(FeatureSet feature)
{
const vector<DeviceInfo>& d = devices();
vector<DeviceInfo> devs_filtered;
if (TargetArchs::builtWith(feature))
{
devs_filtered.reserve(d.size());
for (size_t i = 0, size = d.size(); i < size; ++i)
{
const DeviceInfo& info = d[i];
if (info.supports(feature))
devs_filtered.push_back(info);
}
}
return devs_filtered;
}
*/
vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
{
@ -264,3 +207,48 @@ void PrintTo(const Inverse &inverse, std::ostream *os)
(*os) << "direct";
}
double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& ob2)
{
double final_test_result = 0.0;
size_t sz1 = ob1.size();
size_t sz2 = ob2.size();
if(sz1 != sz2)
{
return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
}
else
{
if(sz1==0 && sz2==0)
return 0;
cv::Mat cpu_result(sz, CV_8UC1);
cpu_result.setTo(0);
for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
{
cv::Mat cpu_result_roi(cpu_result, *r);
cpu_result_roi.setTo(1);
cpu_result.copyTo(cpu_result);
}
int cpu_area = cv::countNonZero(cpu_result > 0);
cv::Mat gpu_result(sz, CV_8UC1);
gpu_result.setTo(0);
for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
{
cv::Mat gpu_result_roi(gpu_result, *r2);
gpu_result_roi.setTo(1);
gpu_result.copyTo(gpu_result);
}
cv::Mat result_;
multiply(cpu_result, gpu_result, result_);
int result = cv::countNonZero(result_ > 0);
if(cpu_area!=0 && result!=0)
final_test_result = 1.0 - (double)result/(double)cpu_area;
else if(cpu_area==0 && result!=0)
final_test_result = -1;
}
return final_test_result;
}

@ -57,13 +57,12 @@ cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal =
void showDiff(cv::InputArray gold, cv::InputArray actual, double eps);
//! return true if device supports specified feature and gpu module was built with support the feature.
//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
// This function test if gpu_rst matches cpu_rst.
// If the two vectors are not equal, it will return the difference in vector size
// Else it will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
// The smaller, the better matched
double checkRectSimilarity(cv::Size sz, std::vector<cv::Rect>& ob1, std::vector<cv::Rect>& ob2);
//! return all devices compatible with current gpu module build.
//const std::vector<cv::ocl::DeviceInfo>& devices();
//! return all devices compatible with current gpu module build which support specified feature.
//std::vector<cv::ocl::DeviceInfo> devices(cv::gpu::FeatureSet feature);
//! read image from testdata folder.
cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR);

@ -100,34 +100,39 @@ class TestInfo(object):
def dump(self, units="ms"):
print "%s ->\t\033[1;31m%s\033[0m = \t%.2f%s" % (str(self), self.status, self.get("gmean", units), units)
def shortName(self):
def getName(self):
pos = self.name.find("/")
if pos > 0:
name = self.name[:pos]
else:
name = self.name
if self.fixture.endswith(name):
fixture = self.fixture[:-len(name)]
return self.name[:pos]
return self.name
def getFixture(self):
if self.fixture.endswith(self.getName()):
fixture = self.fixture[:-len(self.getName())]
else:
fixture = self.fixture
if fixture.endswith("_"):
fixture = fixture[:-1]
return fixture
def param(self):
return '::'.join(filter(None, [self.type_param, self.value_param]))
def shortName(self):
name = self.getName()
fixture = self.getFixture()
return '::'.join(filter(None, [name, fixture]))
def __str__(self):
pos = self.name.find("/")
if pos > 0:
name = self.name[:pos]
else:
name = self.name
if self.fixture.endswith(name):
fixture = self.fixture[:-len(name)]
else:
fixture = self.fixture
if fixture.endswith("_"):
fixture = fixture[:-1]
name = self.getName()
fixture = self.getFixture()
return '::'.join(filter(None, [name, fixture, self.type_param, self.value_param]))
def __cmp__(self, other):
r = cmp(self.fixture, other.fixture);
if r != 0:

@ -0,0 +1,167 @@
#!/usr/bin/env python
from __future__ import division
import ast
import logging
import numbers
import os, os.path
import re
from argparse import ArgumentParser
from collections import OrderedDict
from glob import glob
from itertools import ifilter
import xlwt
from testlog_parser import parseLogFile
# To build XLS report you neet to put your xmls (OpenCV tests output) in the
# following way:
#
# "root" --- folder, representing the whole XLS document. It contains several
# subfolders --- sheet-paths of the XLS document. Each sheet-path contains it's
# subfolders --- config-paths. Config-paths are columns of the sheet and
# they contains xmls files --- output of OpenCV modules testing.
# Config-path means OpenCV build configuration, including different
# options such as NEON, TBB, GPU enabling/disabling.
#
# root
# root\sheet_path
# root\sheet_path\configuration1 (column 1)
# root\sheet_path\configuration2 (column 2)
re_image_size = re.compile(r'^ \d+ x \d+$', re.VERBOSE)
re_data_type = re.compile(r'^ (?: 8 | 16 | 32 | 64 ) [USF] C [1234] $', re.VERBOSE)
time_style = xlwt.easyxf(num_format_str='#0.00')
no_time_style = xlwt.easyxf('pattern: pattern solid, fore_color gray25')
speedup_style = time_style
good_speedup_style = xlwt.easyxf('font: color green', num_format_str='#0.00')
bad_speedup_style = xlwt.easyxf('font: color red', num_format_str='#0.00')
no_speedup_style = no_time_style
error_speedup_style = xlwt.easyxf('pattern: pattern solid, fore_color orange')
header_style = xlwt.easyxf('font: bold true; alignment: horizontal centre, vertical top, wrap True')
def collect_xml(collection, configuration, xml_fullname):
xml_fname = os.path.split(xml_fullname)[1]
module = xml_fname[:xml_fname.index('_')]
module_tests = collection.setdefault(module, OrderedDict())
for test in sorted(parseLogFile(xml_fullname)):
test_results = module_tests.setdefault((test.shortName(), test.param()), {})
test_results[configuration] = test.get("gmean") if test.status == 'run' else test.status
def main():
arg_parser = ArgumentParser(description='Build an XLS performance report.')
arg_parser.add_argument('sheet_dirs', nargs='+', metavar='DIR', help='directory containing perf test logs')
arg_parser.add_argument('-o', '--output', metavar='XLS', default='report.xls', help='name of output file')
arg_parser.add_argument('-c', '--config', metavar='CONF', help='global configuration file')
args = arg_parser.parse_args()
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG)
if args.config is not None:
with open(args.config) as global_conf_file:
global_conf = ast.literal_eval(global_conf_file.read())
else:
global_conf = {}
wb = xlwt.Workbook()
for sheet_path in args.sheet_dirs:
try:
with open(os.path.join(sheet_path, 'sheet.conf')) as sheet_conf_file:
sheet_conf = ast.literal_eval(sheet_conf_file.read())
except Exception:
sheet_conf = {}
logging.debug('no sheet.conf for %s', sheet_path)
sheet_conf = dict(global_conf.items() + sheet_conf.items())
if 'configurations' in sheet_conf:
config_names = sheet_conf['configurations']
else:
try:
config_names = [p for p in os.listdir(sheet_path)
if os.path.isdir(os.path.join(sheet_path, p))]
except Exception as e:
logging.warning('error while determining configuration names for %s: %s', sheet_path, e)
continue
collection = {}
for configuration, configuration_path in \
[(c, os.path.join(sheet_path, c)) for c in config_names]:
logging.info('processing %s', configuration_path)
for xml_fullname in glob(os.path.join(configuration_path, '*.xml')):
collect_xml(collection, configuration, xml_fullname)
sheet = wb.add_sheet(sheet_conf.get('sheet_name', os.path.basename(os.path.abspath(sheet_path))))
sheet.row(0).height = 800
sheet.panes_frozen = True
sheet.remove_splits = True
sheet.horz_split_pos = 1
sheet.horz_split_first_visible = 1
sheet_comparisons = sheet_conf.get('comparisons', [])
for i, w in enumerate([2000, 15000, 2500, 2000, 15000]
+ (len(config_names) + 1 + len(sheet_comparisons)) * [3000]):
sheet.col(i).width = w
for i, caption in enumerate(['Module', 'Test', 'Image\nsize', 'Data\ntype', 'Parameters']
+ config_names + [None]
+ [comp['to'] + '\nvs\n' + comp['from'] for comp in sheet_comparisons]):
sheet.row(0).write(i, caption, header_style)
row = 1
module_colors = sheet_conf.get('module_colors', {})
module_styles = {module: xlwt.easyxf('pattern: pattern solid, fore_color {}'.format(color))
for module, color in module_colors.iteritems()}
for module, tests in sorted(collection.iteritems()):
for ((test, param), configs) in tests.iteritems():
sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style))
sheet.write(row, 1, test)
param_list = param[1:-1].split(", ")
sheet.write(row, 2, next(ifilter(re_image_size.match, param_list), None))
sheet.write(row, 3, next(ifilter(re_data_type.match, param_list), None))
sheet.row(row).write(4, param)
for i, c in enumerate(config_names):
if c in configs:
sheet.write(row, 5 + i, configs[c], time_style)
else:
sheet.write(row, 5 + i, None, no_time_style)
for i, comp in enumerate(sheet_comparisons):
cmp_from = configs.get(comp["from"])
cmp_to = configs.get(comp["to"])
col = 5 + len(config_names) + 1 + i
if isinstance(cmp_from, numbers.Number) and isinstance(cmp_to, numbers.Number):
try:
speedup = cmp_from / cmp_to
sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else
bad_speedup_style if speedup < 0.9 else
speedup_style)
except ArithmeticError as e:
sheet.write(row, col, None, error_speedup_style)
else:
sheet.write(row, col, None, no_speedup_style)
row += 1
if row % 1000 == 0: sheet.flush_row_data()
wb.save(args.output)
if __name__ == '__main__':
main()

@ -2,6 +2,10 @@
#include <float.h>
#include <limits.h>
#ifdef HAVE_TEGRA_OPTIMIZATION
#include "tegra.hpp"
#endif
using namespace cv;
namespace cvtest
@ -2939,28 +2943,76 @@ MatComparator::operator()(const char* expr1, const char* expr2,
void printVersionInfo(bool useStdOut)
{
::testing::Test::RecordProperty("CV_VERSION", CV_VERSION);
::testing::Test::RecordProperty("cv_version", CV_VERSION);
if(useStdOut) std::cout << "OpenCV version: " << CV_VERSION << std::endl;
std::string buildInfo( cv::getBuildInformation() );
size_t pos1 = buildInfo.find("Version control");
size_t pos2 = buildInfo.find("\n", pos1);\
size_t pos2 = buildInfo.find('\n', pos1);
if(pos1 != std::string::npos && pos2 != std::string::npos)
{
std::string ver( buildInfo.substr(pos1, pos2-pos1) );
::testing::Test::RecordProperty("Version_control", ver);
if(useStdOut) std::cout << ver << std::endl;
size_t value_start = buildInfo.rfind(' ', pos2) + 1;
std::string ver( buildInfo.substr(value_start, pos2 - value_start) );
::testing::Test::RecordProperty("cv_vcs_version", ver);
if (useStdOut) std::cout << "OpenCV VCS version: " << ver << std::endl;
}
pos1 = buildInfo.find("inner version");
pos2 = buildInfo.find("\n", pos1);\
pos2 = buildInfo.find('\n', pos1);
if(pos1 != std::string::npos && pos2 != std::string::npos)
{
std::string ver( buildInfo.substr(pos1, pos2-pos1) );
::testing::Test::RecordProperty("inner_version", ver);
if(useStdOut) std::cout << ver << std::endl;
}
size_t value_start = buildInfo.rfind(' ', pos2) + 1;
std::string ver( buildInfo.substr(value_start, pos2 - value_start) );
::testing::Test::RecordProperty("cv_inner_vcs_version", ver);
if(useStdOut) std::cout << "Inner VCS version: " << ver << std::endl;
}
#ifdef CV_PARALLEL_FRAMEWORK
::testing::Test::RecordProperty("cv_parallel_framework", CV_PARALLEL_FRAMEWORK);
if (useStdOut)
{
std::cout << "Parallel framework: " << CV_PARALLEL_FRAMEWORK << std::endl;
}
#endif
std::string cpu_features;
#if CV_SSE
if (checkHardwareSupport(CV_CPU_SSE)) cpu_features += " sse";
#endif
#if CV_SSE2
if (checkHardwareSupport(CV_CPU_SSE2)) cpu_features += " sse2";
#endif
#if CV_SSE3
if (checkHardwareSupport(CV_CPU_SSE3)) cpu_features += " sse3";
#endif
#if CV_SSSE3
if (checkHardwareSupport(CV_CPU_SSSE3)) cpu_features += " ssse3";
#endif
#if CV_SSE4_1
if (checkHardwareSupport(CV_CPU_SSE4_1)) cpu_features += " sse4.1";
#endif
#if CV_SSE4_2
if (checkHardwareSupport(CV_CPU_SSE4_2)) cpu_features += " sse4.2";
#endif
#if CV_AVX
if (checkHardwareSupport(CV_CPU_AVX)) cpu_features += " avx";
#endif
#if CV_NEON
cpu_features += " neon"; // NEON is currently not checked at runtime
#endif
cpu_features.erase(0, 1); // erase initial space
::testing::Test::RecordProperty("cv_cpu_features", cpu_features);
if (useStdOut) std::cout << "CPU features: " << cpu_features << std::endl;
#ifdef HAVE_TEGRA_OPTIMIZATION
const char * tegra_optimization = tegra::isDeviceSupported() ? "enabled" : "disabled";
::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization);
if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl;
#endif
}
}

@ -17,16 +17,19 @@ using namespace cv;
#define LOOP_NUM 10
const static Scalar colors[] = { CV_RGB(0,0,255),
CV_RGB(0,128,255),
CV_RGB(0,255,255),
CV_RGB(0,255,0),
CV_RGB(255,128,0),
CV_RGB(255,255,0),
CV_RGB(255,0,0),
CV_RGB(255,0,255)} ;
CV_RGB(0,128,255),
CV_RGB(0,255,255),
CV_RGB(0,255,0),
CV_RGB(255,128,0),
CV_RGB(255,255,0),
CV_RGB(255,0,0),
CV_RGB(255,0,255)
} ;
int64 work_begin = 0;
int64 work_end = 0;
string outputName;
static void workBegin()
{
@ -37,34 +40,40 @@ static void workEnd()
work_end += (getTickCount() - work_begin);
}
static double getTime(){
static double getTime()
{
return work_end /((double)cvGetTickFrequency() * 1000.);
}
void detect( Mat& img, vector<Rect>& faces,
cv::ocl::OclCascadeClassifierBuf& cascade,
double scale, bool calTime);
ocl::OclCascadeClassifierBuf& cascade,
double scale, bool calTime);
void detectCPU( Mat& img, vector<Rect>& faces,
CascadeClassifier& cascade,
double scale, bool calTime);
CascadeClassifier& cascade,
double scale, bool calTime);
void Draw(Mat& img, vector<Rect>& faces, double scale);
// This function test if gpu_rst matches cpu_rst.
// If the two vectors are not equal, it will return the difference in vector size
// Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
double checkRectSimilarity(Size sz, std::vector<Rect>& cpu_rst, std::vector<Rect>& gpu_rst);
double checkRectSimilarity(Size sz, vector<Rect>& cpu_rst, vector<Rect>& gpu_rst);
int main( int argc, const char** argv )
{
const char* keys =
"{ h | help | false | print help message }"
"{ i | input | | specify input image }"
"{ t | template | ../../../data/haarcascades/haarcascade_frontalface_alt.xml | specify template file }"
"{ t | template | haarcascade_frontalface_alt.xml |"
" specify template file path }"
"{ c | scale | 1.0 | scale image }"
"{ s | use_cpu | false | use cpu or gpu to process the image }";
"{ s | use_cpu | false | use cpu or gpu to process the image }"
"{ o | output | facedetect_output.jpg |"
" specify output image save path(only works when input is images) }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help"))
@ -78,9 +87,10 @@ int main( int argc, const char** argv )
bool useCPU = cmd.get<bool>("s");
string inputName = cmd.get<string>("i");
outputName = cmd.get<string>("o");
string cascadeName = cmd.get<string>("t");
double scale = cmd.get<double>("c");
cv::ocl::OclCascadeClassifierBuf cascade;
ocl::OclCascadeClassifierBuf cascade;
CascadeClassifier cpu_cascade;
if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) )
@ -114,9 +124,10 @@ int main( int argc, const char** argv )
return -1;
}
cvNamedWindow( "result", 1 );
std::vector<cv::ocl::Info> oclinfo;
int devnums = cv::ocl::getDevice(oclinfo);
vector<ocl::Info> oclinfo;
int devnums = ocl::getDevice(oclinfo);
if( devnums < 1 )
{
std::cout << "no device found\n";
@ -139,10 +150,12 @@ int main( int argc, const char** argv )
frame.copyTo( frameCopy );
else
flip( frame, frameCopy, 0 );
if(useCPU){
if(useCPU)
{
detectCPU(frameCopy, faces, cpu_cascade, scale, false);
}
else{
else
{
detect(frameCopy, faces, cascade, scale, false);
}
Draw(frameCopy, faces, scale);
@ -150,8 +163,10 @@ int main( int argc, const char** argv )
goto _cleanup_;
}
waitKey(0);
_cleanup_:
cvReleaseCapture( &capture );
}
@ -161,15 +176,18 @@ _cleanup_:
vector<Rect> faces;
vector<Rect> ref_rst;
double accuracy = 0.;
for(int i = 0; i <= LOOP_NUM;i ++)
for(int i = 0; i <= LOOP_NUM; i ++)
{
cout << "loop" << i << endl;
if(useCPU){
if(useCPU)
{
detectCPU(image, faces, cpu_cascade, scale, i==0?false:true);
}
else{
else
{
detect(image, faces, cascade, scale, i==0?false:true);
if(i == 0){
if(i == 0)
{
detectCPU(image, ref_rst, cpu_cascade, scale, false);
accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
}
@ -189,31 +207,30 @@ _cleanup_:
}
cvDestroyWindow("result");
return 0;
}
void detect( Mat& img, vector<Rect>& faces,
cv::ocl::OclCascadeClassifierBuf& cascade,
double scale, bool calTime)
ocl::OclCascadeClassifierBuf& cascade,
double scale, bool calTime)
{
cv::ocl::oclMat image(img);
cv::ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
ocl::oclMat image(img);
ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
if(calTime) workBegin();
cv::ocl::cvtColor( image, gray, COLOR_BGR2GRAY );
cv::ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
cv::ocl::equalizeHist( smallImg, smallImg );
ocl::cvtColor( image, gray, COLOR_BGR2GRAY );
ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
ocl::equalizeHist( smallImg, smallImg );
cascade.detectMultiScale( smallImg, faces, 1.1,
3, 0
|CV_HAAR_SCALE_IMAGE
, Size(30,30), Size(0, 0) );
3, 0
|CV_HAAR_SCALE_IMAGE
, Size(30,30), Size(0, 0) );
if(calTime) workEnd();
}
void detectCPU( Mat& img, vector<Rect>& faces,
CascadeClassifier& cascade,
double scale, bool calTime)
CascadeClassifier& cascade,
double scale, bool calTime)
{
if(calTime) workBegin();
Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
@ -221,11 +238,12 @@ void detectCPU( Mat& img, vector<Rect>& faces,
resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR);
equalizeHist(cpu_smallImg, cpu_smallImg);
cascade.detectMultiScale(cpu_smallImg, faces, 1.1,
3, 0 | CV_HAAR_SCALE_IMAGE,
Size(30, 30), Size(0, 0));
3, 0 | CV_HAAR_SCALE_IMAGE,
Size(30, 30), Size(0, 0));
if(calTime) workEnd();
}
void Draw(Mat& img, vector<Rect>& faces, double scale)
{
int i = 0;
@ -239,31 +257,38 @@ void Draw(Mat& img, vector<Rect>& faces, double scale)
radius = cvRound((r->width + r->height)*0.25*scale);
circle( img, center, radius, color, 3, 8, 0 );
}
cv::imshow( "result", img );
imshow( "result", img );
imwrite( outputName, img );
}
double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& ob2)
double checkRectSimilarity(Size sz, vector<Rect>& ob1, vector<Rect>& ob2)
{
double final_test_result = 0.0;
size_t sz1 = ob1.size();
size_t sz2 = ob2.size();
if(sz1 != sz2)
{
return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
}
else
{
cv::Mat cpu_result(sz, CV_8UC1);
if(sz1==0 && sz2==0)
return 0;
Mat cpu_result(sz, CV_8UC1);
cpu_result.setTo(0);
for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
{
cv::Mat cpu_result_roi(cpu_result, *r);
Mat cpu_result_roi(cpu_result, *r);
cpu_result_roi.setTo(1);
cpu_result.copyTo(cpu_result);
}
int cpu_area = cv::countNonZero(cpu_result > 0);
int cpu_area = countNonZero(cpu_result > 0);
cv::Mat gpu_result(sz, CV_8UC1);
Mat gpu_result(sz, CV_8UC1);
gpu_result.setTo(0);
for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
{
@ -272,11 +297,13 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
gpu_result.copyTo(gpu_result);
}
cv::Mat result_;
Mat result_;
multiply(cpu_result, gpu_result, result_);
int result = cv::countNonZero(result_ > 0);
final_test_result = 1.0 - (double)result/(double)cpu_area;
int result = countNonZero(result_ > 0);
if(cpu_area!=0 && result!=0)
final_test_result = 1.0 - (double)result/(double)cpu_area;
else if(cpu_area==0 && result!=0)
final_test_result = -1;
}
return final_test_result;
}

@ -11,75 +11,39 @@
using namespace std;
using namespace cv;
bool help_showed = false;
class Args
{
public:
Args();
static Args read(int argc, char** argv);
string src;
bool src_is_video;
bool src_is_camera;
int camera_id;
bool write_video;
string dst_video;
double dst_video_fps;
bool make_gray;
bool resize_src;
int width, height;
double scale;
int nlevels;
int gr_threshold;
double hit_threshold;
bool hit_threshold_auto;
int win_width;
int win_stride_width, win_stride_height;
bool gamma_corr;
};
class App
{
public:
App(const Args& s);
App(CommandLineParser& cmd);
void run();
void handleKey(char key);
void hogWorkBegin();
void hogWorkEnd();
string hogWorkFps() const;
void workBegin();
void workEnd();
string workFps() const;
string message() const;
// This function test if gpu_rst matches cpu_rst.
// If the two vectors are not equal, it will return the difference in vector size
// Else if will return
// Else if will return
// (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
double checkRectSimilarity(Size sz,
std::vector<Rect>& cpu_rst,
double checkRectSimilarity(Size sz,
std::vector<Rect>& cpu_rst,
std::vector<Rect>& gpu_rst);
private:
App operator=(App&);
Args args;
//Args args;
bool running;
bool use_gpu;
bool make_gray;
double scale;
double resize_scale;
int win_width;
int win_stride_width, win_stride_height;
int gr_threshold;
int nlevels;
double hit_threshold;
@ -87,119 +51,49 @@ private:
int64 hog_work_begin;
double hog_work_fps;
int64 work_begin;
double work_fps;
};
static void printHelp()
{
cout << "Histogram of Oriented Gradients descriptor and detector sample.\n"
<< "\nUsage: hog_gpu\n"
<< " (<image>|--video <vide>|--camera <camera_id>) # frames source\n"
<< " [--make_gray <true/false>] # convert image to gray one or not\n"
<< " [--resize_src <true/false>] # do resize of the source image or not\n"
<< " [--width <int>] # resized image width\n"
<< " [--height <int>] # resized image height\n"
<< " [--hit_threshold <double>] # classifying plane distance threshold (0.0 usually)\n"
<< " [--scale <double>] # HOG window scale factor\n"
<< " [--nlevels <int>] # max number of HOG window scales\n"
<< " [--win_width <int>] # width of the window (48 or 64)\n"
<< " [--win_stride_width <int>] # distance by OX axis between neighbour wins\n"
<< " [--win_stride_height <int>] # distance by OY axis between neighbour wins\n"
<< " [--gr_threshold <int>] # merging similar rects constant\n"
<< " [--gamma_correct <int>] # do gamma correction or not\n"
<< " [--write_video <bool>] # write video or not\n"
<< " [--dst_video <path>] # output video path\n"
<< " [--dst_video_fps <double>] # output video fps\n";
help_showed = true;
}
string img_source;
string vdo_source;
string output;
int camera_id;
};
int main(int argc, char** argv)
{
const char* keys =
"{ h | help | false | print help message }"
"{ i | input | | specify input image}"
"{ c | camera | -1 | enable camera capturing }"
"{ v | video | | use video as input }"
"{ g | gray | false | convert image to gray one or not}"
"{ s | scale | 1.0 | resize the image before detect}"
"{ l |larger_win| false | use 64x128 window}"
"{ o | output | | specify output path when input is images}";
CommandLineParser cmd(argc, argv, keys);
App app(cmd);
try
{
if (argc < 2)
printHelp();
Args args = Args::read(argc, argv);
if (help_showed)
return -1;
App app(args);
app.run();
}
catch (const Exception& e) { return cout << "error: " << e.what() << endl, 1; }
catch (const exception& e) { return cout << "error: " << e.what() << endl, 1; }
catch(...) { return cout << "unknown exception" << endl, 1; }
return 0;
}
Args::Args()
{
src_is_video = false;
src_is_camera = false;
camera_id = 0;
write_video = false;
dst_video_fps = 24.;
make_gray = false;
resize_src = false;
width = 640;
height = 480;
scale = 1.05;
nlevels = 13;
gr_threshold = 8;
hit_threshold = 1.4;
hit_threshold_auto = true;
win_width = 48;
win_stride_width = 8;
win_stride_height = 8;
gamma_corr = true;
}
Args Args::read(int argc, char** argv)
{
Args args;
for (int i = 1; i < argc; i++)
catch (const Exception& e)
{
if (string(argv[i]) == "--make_gray") args.make_gray = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--resize_src") args.resize_src = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--width") args.width = atoi(argv[++i]);
else if (string(argv[i]) == "--height") args.height = atoi(argv[++i]);
else if (string(argv[i]) == "--hit_threshold")
{
args.hit_threshold = atof(argv[++i]);
args.hit_threshold_auto = false;
}
else if (string(argv[i]) == "--scale") args.scale = atof(argv[++i]);
else if (string(argv[i]) == "--nlevels") args.nlevels = atoi(argv[++i]);
else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]);
else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]);
else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]);
else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]);
else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--dst_video") args.dst_video = argv[++i];
else if (string(argv[i]) == "--dst_video_fps") args.dst_video_fps = atof(argv[++i]);
else if (string(argv[i]) == "--help") printHelp();
else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; }
else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; }
else if (args.src.empty()) args.src = argv[i];
else throw runtime_error((string("unknown key: ") + argv[i]));
return cout << "error: " << e.what() << endl, 1;
}
return args;
catch (const exception& e)
{
return cout << "error: " << e.what() << endl, 1;
}
catch(...)
{
return cout << "unknown exception" << endl, 1;
}
return 0;
}
App::App(const Args& s)
App::App(CommandLineParser& cmd)
{
args = s;
cout << "\nControls:\n"
<< "\tESC - exit\n"
<< "\tm - change mode GPU <-> CPU\n"
@ -210,56 +104,56 @@ App::App(const Args& s)
<< "\t4/r - increase/decrease hit threshold\n"
<< endl;
use_gpu = true;
make_gray = args.make_gray;
scale = args.scale;
gr_threshold = args.gr_threshold;
nlevels = args.nlevels;
if (args.hit_threshold_auto)
args.hit_threshold = args.win_width == 48 ? 1.4 : 0.;
hit_threshold = args.hit_threshold;
gamma_corr = args.gamma_corr;
use_gpu = true;
make_gray = cmd.get<bool>("g");
resize_scale = cmd.get<double>("s");
win_width = cmd.get<bool>("l") == true ? 64 : 48;
vdo_source = cmd.get<string>("v");
img_source = cmd.get<string>("i");
output = cmd.get<string>("o");
camera_id = cmd.get<int>("c");
if (args.win_width != 64 && args.win_width != 48)
args.win_width = 64;
win_stride_width = 8;
win_stride_height = 8;
gr_threshold = 8;
nlevels = 13;
hit_threshold = win_width == 48 ? 1.4 : 0.;
scale = 1.05;
gamma_corr = true;
cout << "Scale: " << scale << endl;
if (args.resize_src)
cout << "Resized source: (" << args.width << ", " << args.height << ")\n";
cout << "Group threshold: " << gr_threshold << endl;
cout << "Levels number: " << nlevels << endl;
cout << "Win width: " << args.win_width << endl;
cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n";
cout << "Win width: " << win_width << endl;
cout << "Win stride: (" << win_stride_width << ", " << win_stride_height << ")\n";
cout << "Hit threshold: " << hit_threshold << endl;
cout << "Gamma correction: " << gamma_corr << endl;
cout << endl;
}
void App::run()
{
std::vector<ocl::Info> oclinfo;
vector<ocl::Info> oclinfo;
ocl::getDevice(oclinfo);
running = true;
cv::VideoWriter video_writer;
VideoWriter video_writer;
Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96)
Size win_stride(args.win_stride_width, args.win_stride_height);
Size win_size(win_width, win_width * 2);
Size win_stride(win_stride_width, win_stride_height);
// Create HOG descriptors and detectors here
vector<float> detector;
if (win_size == Size(64, 128))
detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128();
detector = ocl::HOGDescriptor::getPeopleDetector64x128();
else
detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96();
detector = ocl::HOGDescriptor::getPeopleDetector48x96();
cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
cv::ocl::HOGDescriptor::DEFAULT_NLEVELS);
cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
ocl::HOGDescriptor::DEFAULT_NLEVELS);
HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
gpu_hog.setSVMDetector(detector);
cpu_hog.setSVMDetector(detector);
@ -268,29 +162,29 @@ void App::run()
VideoCapture vc;
Mat frame;
if (args.src_is_video)
if (vdo_source!="")
{
vc.open(args.src.c_str());
vc.open(vdo_source.c_str());
if (!vc.isOpened())
throw runtime_error(string("can't open video file: " + args.src));
throw runtime_error(string("can't open video file: " + vdo_source));
vc >> frame;
}
else if (args.src_is_camera)
else if (camera_id != -1)
{
vc.open(args.camera_id);
vc.open(camera_id);
if (!vc.isOpened())
{
stringstream msg;
msg << "can't open camera: " << args.camera_id;
msg << "can't open camera: " << camera_id;
throw runtime_error(msg.str());
}
vc >> frame;
}
else
{
frame = imread(args.src);
frame = imread(img_source);
if (frame.empty())
throw runtime_error(string("can't open image file: " + args.src));
throw runtime_error(string("can't open image file: " + img_source));
}
Mat img_aux, img, img_to_show;
@ -308,13 +202,15 @@ void App::run()
else frame.copyTo(img_aux);
// Resize image
if (args.resize_src) resize(img_aux, img, Size(args.width, args.height));
if (abs(scale-1.0)>0.001)
{
Size sz((int)((double)img_aux.cols/resize_scale), (int)((double)img_aux.rows/resize_scale));
resize(img_aux, img, sz);
}
else img = img_aux;
img_to_show = img;
gpu_hog.nlevels = nlevels;
cpu_hog.nlevels = nlevels;
vector<Rect> found;
// Perform HOG classification
@ -331,15 +227,16 @@ void App::run()
vector<Rect> ref_rst;
cvtColor(img, img, COLOR_BGRA2BGR);
cpu_hog.detectMultiScale(img, ref_rst, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold-2);
Size(0, 0), scale, gr_threshold-2);
double accuracy = checkRectSimilarity(img.size(), ref_rst, found);
cout << "\naccuracy value: " << accuracy << endl;
}
}
cout << "\naccuracy value: " << accuracy << endl;
}
}
else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
Size(0, 0), scale, gr_threshold);
hogWorkEnd();
// Draw positive classified windows
for (size_t i = 0; i < found.size(); i++)
{
@ -354,25 +251,31 @@ void App::run()
putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
imshow("opencv_gpu_hog", img_to_show);
if (args.src_is_video || args.src_is_camera) vc >> frame;
if (vdo_source!="" || camera_id!=-1) vc >> frame;
workEnd();
if (args.write_video)
if (output!="")
{
if (!video_writer.isOpened())
if (img_source!="") // wirte image
{
video_writer.open(args.dst_video, VideoWriter::fourcc('x','v','i','d'), args.dst_video_fps,
img_to_show.size(), true);
if (!video_writer.isOpened())
throw std::runtime_error("can't create video writer");
imwrite(output, img_to_show);
}
else //write video
{
if (!video_writer.isOpened())
{
video_writer.open(output, VideoWriter::fourcc('x','v','i','d'), 24,
img_to_show.size(), true);
if (!video_writer.isOpened())
throw std::runtime_error("can't create video writer");
}
if (make_gray) cvtColor(img_to_show, img, COLOR_GRAY2BGR);
else cvtColor(img_to_show, img, COLOR_BGRA2BGR);
if (make_gray) cvtColor(img_to_show, img, COLOR_GRAY2BGR);
else cvtColor(img_to_show, img, COLOR_BGRA2BGR);
video_writer << img;
video_writer << img;
}
}
handleKey((char)waitKey(3));
@ -380,7 +283,6 @@ void App::run()
}
}
void App::handleKey(char key)
{
switch (key)
@ -443,7 +345,10 @@ void App::handleKey(char key)
}
inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); }
inline void App::hogWorkBegin()
{
hog_work_begin = getTickCount();
}
inline void App::hogWorkEnd()
{
@ -459,8 +364,10 @@ inline string App::hogWorkFps() const
return ss.str();
}
inline void App::workBegin() { work_begin = getTickCount(); }
inline void App::workBegin()
{
work_begin = getTickCount();
}
inline void App::workEnd()
{
@ -476,8 +383,9 @@ inline string App::workFps() const
return ss.str();
}
double App::checkRectSimilarity(Size sz,
std::vector<Rect>& ob1,
double App::checkRectSimilarity(Size sz,
std::vector<Rect>& ob1,
std::vector<Rect>& ob2)
{
double final_test_result = 0.0;
@ -485,20 +393,26 @@ double App::checkRectSimilarity(Size sz,
size_t sz2 = ob2.size();
if(sz1 != sz2)
{
return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
}
else
{
if(sz1==0 && sz2==0)
return 0;
cv::Mat cpu_result(sz, CV_8UC1);
cpu_result.setTo(0);
for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
{
{
cv::Mat cpu_result_roi(cpu_result, *r);
cpu_result_roi.setTo(1);
cpu_result.copyTo(cpu_result);
}
int cpu_area = cv::countNonZero(cpu_result > 0);
cv::Mat gpu_result(sz, CV_8UC1);
gpu_result.setTo(0);
for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
@ -511,10 +425,10 @@ double App::checkRectSimilarity(Size sz,
cv::Mat result_;
multiply(cpu_result, gpu_result, result_);
int result = cv::countNonZero(result_ > 0);
final_test_result = 1.0 - (double)result/(double)cpu_area;
if(cpu_area!=0 && result!=0)
final_test_result = 1.0 - (double)result/(double)cpu_area;
else if(cpu_area==0 && result!=0)
final_test_result = -1;
}
return final_test_result;
}

@ -12,19 +12,20 @@ using namespace cv;
using namespace cv::ocl;
typedef unsigned char uchar;
#define LOOP_NUM 10
#define LOOP_NUM 10
int64 work_begin = 0;
int64 work_end = 0;
static void workBegin()
{
static void workBegin()
{
work_begin = getTickCount();
}
static void workEnd()
{
work_end += (getTickCount() - work_begin);
}
static double getTime(){
static double getTime()
{
return work_end * 1000. / getTickFrequency();
}
@ -94,14 +95,15 @@ int main(int argc, const char* argv[])
//set this to save kernel compile time from second time you run
ocl::setBinpath("./");
const char* keys =
"{ help h | false | print help message }"
"{ left l | | specify left image }"
"{ right r | | specify right image }"
"{ camera c | 0 | enable camera capturing }"
"{ use_cpu s | false | use cpu or gpu to process the image }"
"{ video v | | use video as input }"
"{ points | 1000 | specify points count [GoodFeatureToTrack] }"
"{ min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }";
"{ help h | false | print help message }"
"{ left l | | specify left image }"
"{ right r | | specify right image }"
"{ camera c | 0 | enable camera capturing }"
"{ use_cpu s | false | use cpu or gpu to process the image }"
"{ video v | | use video as input }"
"{ output o | pyrlk_output.jpg| specify output save path when input is images }"
"{ points | 1000 | specify points count [GoodFeatureToTrack] }"
"{ min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }";
CommandLineParser cmd(argc, argv, keys);
@ -115,10 +117,10 @@ int main(int argc, const char* argv[])
string fname0 = cmd.get<string>("left");
string fname1 = cmd.get<string>("right");
string vdofile = cmd.get<string>("video");
string outfile = cmd.get<string>("output");
int points = cmd.get<int>("points");
double minDist = cmd.get<double>("min_dist");
bool useCPU = cmd.has("s");
bool useCamera = cmd.has("c");
int inputName = cmd.get<int>("c");
oclMat d_nextPts, d_status;
@ -131,21 +133,9 @@ int main(int argc, const char* argv[])
vector<unsigned char> status(points);
vector<float> err;
if (frame0.empty() || frame1.empty())
{
useCamera = true;
defaultPicturesFail = true;
VideoCapture capture(inputName);
if (!capture.isOpened())
{
cout << "Can't load input images" << endl;
return -1;
}
}
cout << "Points count : " << points << endl << endl;
if (useCamera)
if (frame0.empty() || frame1.empty())
{
VideoCapture capture;
Mat frame, frameCopy;
@ -238,10 +228,10 @@ _cleanup_:
else
{
nocamera:
for(int i = 0; i <= LOOP_NUM;i ++)
for(int i = 0; i <= LOOP_NUM; i ++)
{
cout << "loop" << i << endl;
if (i > 0) workBegin();
if (i > 0) workBegin();
if (useCPU)
{
@ -271,8 +261,8 @@ nocamera:
cout << getTime() / LOOP_NUM << " ms" << endl;
drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0));
imshow("PyrLK [Sparse]", frame0);
imwrite(outfile, frame0);
}
}
}

@ -2,11 +2,11 @@
// It loads several images sequentially and tries to find squares in
// each image
#include "opencv2/core/core.hpp"
#include "opencv2/core.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/ocl/ocl.hpp"
#include <iostream>
#include <math.h>
#include <string.h>
@ -14,23 +14,50 @@
using namespace cv;
using namespace std;
static void help()
#define ACCURACY_CHECK 1
#if ACCURACY_CHECK
// check if two vectors of vector of points are near or not
// prior assumption is that they are in correct order
static bool checkPoints(
vector< vector<Point> > set1,
vector< vector<Point> > set2,
int maxDiff = 5)
{
cout <<
"\nA program using OCL module pyramid scaling, Canny, dilate functions, threshold, split; cpu contours, contour simpification and\n"
"memory storage (it's got it all folks) to find\n"
"squares in a list of images pic1-6.png\n"
"Returns sequence of squares detected on the image.\n"
"the sequence is stored in the specified memory storage\n"
"Call:\n"
"./squares\n"
"Using OpenCV version %s\n" << CV_VERSION << "\n" << endl;
}
if(set1.size() != set2.size())
{
return false;
}
for(vector< vector<Point> >::iterator it1 = set1.begin(), it2 = set2.begin();
it1 < set1.end() && it2 < set2.end(); it1 ++, it2 ++)
{
vector<Point> pts1 = *it1;
vector<Point> pts2 = *it2;
if(pts1.size() != pts2.size())
{
return false;
}
for(size_t i = 0; i < pts1.size(); i ++)
{
Point pt1 = pts1[i], pt2 = pts2[i];
if(std::abs(pt1.x - pt2.x) > maxDiff ||
std::abs(pt1.y - pt2.y) > maxDiff)
{
return false;
}
}
}
return true;
}
#endif
int thresh = 50, N = 11;
const char* wndname = "OpenCL Square Detection Demo";
// helper function:
// finds a cosine of angle between vectors
// from pt0->pt1 and from pt0->pt2
@ -43,9 +70,92 @@ static double angle( Point pt1, Point pt2, Point pt0 )
return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
}
// returns sequence of squares detected on the image.
// the sequence is stored in the specified memory storage
static void findSquares( const Mat& image, vector<vector<Point> >& squares )
{
squares.clear();
Mat pyr, timg, gray0(image.size(), CV_8U), gray;
// down-scale and upscale the image to filter out the noise
pyrDown(image, pyr, Size(image.cols/2, image.rows/2));
pyrUp(pyr, timg, image.size());
vector<vector<Point> > contours;
// find squares in every color plane of the image
for( int c = 0; c < 3; c++ )
{
int ch[] = {c, 0};
mixChannels(&timg, 1, &gray0, 1, ch, 1);
// try several threshold levels
for( int l = 0; l < N; l++ )
{
// hack: use Canny instead of zero threshold level.
// Canny helps to catch squares with gradient shading
if( l == 0 )
{
// apply Canny. Take the upper threshold from slider
// and set the lower to 0 (which forces edges merging)
Canny(gray0, gray, 0, thresh, 5);
// dilate canny output to remove potential
// holes between edge segments
dilate(gray, gray, Mat(), Point(-1,-1));
}
else
{
// apply threshold if l!=0:
// tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY);
}
// find contours and store them all as a list
findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
vector<Point> approx;
// test each contour
for( size_t i = 0; i < contours.size(); i++ )
{
// approximate contour with accuracy proportional
// to the contour perimeter
approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
// square contours should have 4 vertices after approximation
// relatively large area (to filter out noisy contours)
// and be convex.
// Note: absolute value of an area is used because
// area may be positive or negative - in accordance with the
// contour orientation
if( approx.size() == 4 &&
fabs(contourArea(Mat(approx))) > 1000 &&
isContourConvex(Mat(approx)) )
{
double maxCosine = 0;
for( int j = 2; j < 5; j++ )
{
// find the maximum cosine of the angle between joint edges
double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
maxCosine = MAX(maxCosine, cosine);
}
// if cosines of all angles are small
// (all angles are ~90 degree) then write quandrange
// vertices to resultant sequence
if( maxCosine < 0.3 )
squares.push_back(approx);
}
}
}
}
}
// returns sequence of squares detected on the image.
// the sequence is stored in the specified memory storage
static void findSquares_ocl( const Mat& image, vector<vector<Point> >& squares )
{
squares.clear();
@ -91,7 +201,6 @@ static void findSquares( const Mat& image, vector<vector<Point> >& squares )
findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
vector<Point> approx;
// test each contour
for( size_t i = 0; i < contours.size(); i++ )
{
@ -106,11 +215,10 @@ static void findSquares( const Mat& image, vector<vector<Point> >& squares )
// area may be positive or negative - in accordance with the
// contour orientation
if( approx.size() == 4 &&
fabs(contourArea(Mat(approx))) > 1000 &&
isContourConvex(Mat(approx)) )
fabs(contourArea(Mat(approx))) > 1000 &&
isContourConvex(Mat(approx)) )
{
double maxCosine = 0;
for( int j = 2; j < 5; j++ )
{
// find the maximum cosine of the angle between joint edges
@ -139,40 +247,93 @@ static void drawSquares( Mat& image, const vector<vector<Point> >& squares )
int n = (int)squares[i].size();
polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, LINE_AA);
}
}
imshow(wndname, image);
// draw both pure-C++ and ocl square results onto a single image
static Mat drawSquaresBoth( const Mat& image,
const vector<vector<Point> >& sqsCPP,
const vector<vector<Point> >& sqsOCL
)
{
Mat imgToShow(Size(image.cols * 2, image.rows), image.type());
Mat lImg = imgToShow(Rect(Point(0, 0), image.size()));
Mat rImg = imgToShow(Rect(Point(image.cols, 0), image.size()));
image.copyTo(lImg);
image.copyTo(rImg);
drawSquares(lImg, sqsCPP);
drawSquares(rImg, sqsOCL);
float fontScale = 0.8f;
Scalar white = Scalar::all(255), black = Scalar::all(0);
putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
return imgToShow;
}
int main(int /*argc*/, char** /*argv*/)
int main(int argc, char** argv)
{
const char* keys =
"{ i | input | | specify input image }"
"{ o | output | squares_output.jpg | specify output save path}";
CommandLineParser cmd(argc, argv, keys);
string inputName = cmd.get<string>("i");
string outfile = cmd.get<string>("o");
if(inputName.empty())
{
cout << "Avaible options:" << endl;
cmd.printMessage();
return 0;
}
//ocl::setBinpath("F:/kernel_bin");
vector<ocl::Info> info;
CV_Assert(ocl::getDevice(info));
static const char* names[] = { "pic1.png", "pic2.png", "pic3.png",
"pic4.png", "pic5.png", "pic6.png", 0 };
help();
int iterations = 10;
namedWindow( wndname, 1 );
vector<vector<Point> > squares;
vector<vector<Point> > squares_cpu, squares_ocl;
for( int i = 0; names[i] != 0; i++ )
Mat image = imread(inputName, 1);
if( image.empty() )
{
Mat image = imread(names[i], 1);
if( image.empty() )
{
cout << "Couldn't load " << names[i] << endl;
continue;
}
cout << "Couldn't load " << inputName << endl;
return -1;
}
int j = iterations;
int64 t_ocl = 0, t_cpp = 0;
//warm-ups
cout << "warming up ..." << endl;
findSquares(image, squares_cpu);
findSquares_ocl(image, squares_ocl);
#if ACCURACY_CHECK
cout << "Checking ocl accuracy ... " << endl;
cout << (checkPoints(squares_cpu, squares_ocl) ? "Pass" : "Failed") << endl;
#endif
do
{
int64 t_start = cv::getTickCount();
findSquares(image, squares_cpu);
t_cpp += cv::getTickCount() - t_start;
findSquares(image, squares);
drawSquares(image, squares);
int c = waitKey();
if( (char)c == 27 )
break;
t_start = cv::getTickCount();
findSquares_ocl(image, squares_ocl);
t_ocl += cv::getTickCount() - t_start;
cout << "run loop: " << j << endl;
}
while(--j);
cout << "cpp average time: " << 1000.0f * (double)t_cpp / getTickFrequency() / iterations << "ms" << endl;
cout << "ocl average time: " << 1000.0f * (double)t_ocl / getTickFrequency() / iterations << "ms" << endl;
Mat result = drawSquaresBoth(image, squares_cpu, squares_ocl);
imshow(wndname, result);
imwrite(outfile, result);
waitKey(0);
return 0;
}

@ -12,56 +12,45 @@ using namespace cv;
using namespace std;
using namespace ocl;
bool help_showed = false;
struct Params
{
Params();
static Params read(int argc, char** argv);
string left;
string right;
string method_str() const
{
switch (method)
{
case BM: return "BM";
case BP: return "BP";
case CSBP: return "CSBP";
}
return "";
}
enum {BM, BP, CSBP} method;
int ndisp; // Max disparity + 1
enum {GPU, CPU} type;
};
struct App
{
App(const Params& p);
App(CommandLineParser& cmd);
void run();
void handleKey(char key);
void printParams() const;
void workBegin() { work_begin = getTickCount(); }
void workBegin()
{
work_begin = getTickCount();
}
void workEnd()
{
int64 d = getTickCount() - work_begin;
double f = getTickFrequency();
work_fps = f / d;
}
string method_str() const
{
switch (method)
{
case BM:
return "BM";
case BP:
return "BP";
case CSBP:
return "CSBP";
}
return "";
}
string text() const
{
stringstream ss;
ss << "(" << p.method_str() << ") FPS: " << setiosflags(ios::left)
<< setprecision(4) << work_fps;
ss << "(" << method_str() << ") FPS: " << setiosflags(ios::left)
<< setprecision(4) << work_fps;
return ss.str();
}
private:
Params p;
bool running;
Mat left_src, right_src;
@ -74,42 +63,45 @@ private:
int64 work_begin;
double work_fps;
};
static void printHelp()
{
cout << "Usage: stereo_match_gpu\n"
<< "\t--left <left_view> --right <right_view> # must be rectified\n"
<< "\t--method <stereo_match_method> # BM | BP | CSBP\n"
<< "\t--ndisp <number> # number of disparity levels\n"
<< "\t--type <device_type> # cpu | CPU | gpu | GPU\n";
help_showed = true;
}
string l_img, r_img;
string out_img;
enum {BM, BP, CSBP} method;
int ndisp; // Max disparity + 1
enum {GPU, CPU} type;
};
int main(int argc, char** argv)
{
const char* keys =
"{ h | help | false | print help message }"
"{ l | left | | specify left image }"
"{ r | right | | specify right image }"
"{ m | method | BM | specify match method(BM/BP/CSBP) }"
"{ n | ndisp | 64 | specify number of disparity levels }"
"{ s | cpu_ocl | false | use cpu or gpu as ocl device to process the image }"
"{ o | output | stereo_match_output.jpg | specify output path when input is images}";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help"))
{
cout << "Avaible options:" << endl;
cmd.printMessage();
return 0;
}
try
{
if (argc < 2)
{
printHelp();
return 1;
}
Params args = Params::read(argc, argv);
if (help_showed)
return -1;
App app(cmd);
int flag = CVCL_DEVICE_TYPE_GPU;
if(cmd.get<bool>("s") == true)
flag = CVCL_DEVICE_TYPE_CPU;
int flags[2] = { CVCL_DEVICE_TYPE_GPU, CVCL_DEVICE_TYPE_CPU };
vector<Info> info;
if(getDevice(info, flags[args.type]) == 0)
if(getDevice(info, flag) == 0)
{
throw runtime_error("Error: Did not find a valid OpenCL device!");
}
cout << "Device name:" << info[0].DeviceName[0] << endl;
App app(args);
app.run();
}
catch (const exception& e)
@ -119,77 +111,39 @@ int main(int argc, char** argv)
return 0;
}
Params::Params()
{
method = BM;
ndisp = 64;
type = GPU;
}
Params Params::read(int argc, char** argv)
{
Params p;
for (int i = 1; i < argc; i++)
{
if (string(argv[i]) == "--left") p.left = argv[++i];
else if (string(argv[i]) == "--right") p.right = argv[++i];
else if (string(argv[i]) == "--method")
{
if (string(argv[i + 1]) == "BM") p.method = BM;
else if (string(argv[i + 1]) == "BP") p.method = BP;
else if (string(argv[i + 1]) == "CSBP") p.method = CSBP;
else throw runtime_error("unknown stereo match method: " + string(argv[i + 1]));
i++;
}
else if (string(argv[i]) == "--ndisp") p.ndisp = atoi(argv[++i]);
else if (string(argv[i]) == "--type")
{
string t(argv[++i]);
if (t == "cpu" || t == "CPU")
{
p.type = CPU;
}
else if (t == "gpu" || t == "GPU")
{
p.type = GPU;
}
else throw runtime_error("unknown device type: " + t);
}
else if (string(argv[i]) == "--help") printHelp();
else throw runtime_error("unknown key: " + string(argv[i]));
}
return p;
}
App::App(const Params& params)
: p(params), running(false)
App::App(CommandLineParser& cmd)
: running(false),method(BM)
{
cout << "stereo_match_ocl sample\n";
cout << "\nControls:\n"
<< "\tesc - exit\n"
<< "\tp - print current parameters\n"
<< "\tg - convert source images into gray\n"
<< "\tm - change stereo match method\n"
<< "\ts - change Sobel prefiltering flag (for BM only)\n"
<< "\t1/q - increase/decrease maximum disparity\n"
<< "\t2/w - increase/decrease window size (for BM only)\n"
<< "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n"
<< "\t4/r - increase/decrease level count (for BP and CSBP only)\n";
<< "\tesc - exit\n"
<< "\tp - print current parameters\n"
<< "\tg - convert source images into gray\n"
<< "\tm - change stereo match method\n"
<< "\ts - change Sobel prefiltering flag (for BM only)\n"
<< "\t1/q - increase/decrease maximum disparity\n"
<< "\t2/w - increase/decrease window size (for BM only)\n"
<< "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n"
<< "\t4/r - increase/decrease level count (for BP and CSBP only)\n";
l_img = cmd.get<string>("l");
r_img = cmd.get<string>("r");
string mstr = cmd.get<string>("m");
if(mstr == "BM") method = BM;
else if(mstr == "BP") method = BP;
else if(mstr == "CSBP") method = CSBP;
else cout << "unknown method!\n";
ndisp = cmd.get<int>("n");
out_img = cmd.get<string>("o");
}
void App::run()
{
// Load images
left_src = imread(p.left);
right_src = imread(p.right);
if (left_src.empty()) throw runtime_error("can't open file \"" + p.left + "\"");
if (right_src.empty()) throw runtime_error("can't open file \"" + p.right + "\"");
left_src = imread(l_img);
right_src = imread(r_img);
if (left_src.empty()) throw runtime_error("can't open file \"" + l_img + "\"");
if (right_src.empty()) throw runtime_error("can't open file \"" + r_img + "\"");
cvtColor(left_src, left, COLOR_BGR2GRAY);
cvtColor(right_src, right, COLOR_BGR2GRAY);
@ -201,14 +155,15 @@ void App::run()
imshow("right", right);
// Set common parameters
bm.ndisp = p.ndisp;
bp.ndisp = p.ndisp;
csbp.ndisp = p.ndisp;
bm.ndisp = ndisp;
bp.ndisp = ndisp;
csbp.ndisp = ndisp;
cout << endl;
printParams();
running = true;
bool written = false;
while (running)
{
@ -216,9 +171,9 @@ void App::run()
Mat disp;
oclMat d_disp;
workBegin();
switch (p.method)
switch (method)
{
case Params::BM:
case BM:
if (d_left.channels() > 1 || d_right.channels() > 1)
{
cout << "BM doesn't support color images\n";
@ -232,25 +187,27 @@ void App::run()
}
bm(d_left, d_right, d_disp);
break;
case Params::BP:
case BP:
bp(d_left, d_right, d_disp);
break;
case Params::CSBP:
case CSBP:
csbp(d_left, d_right, d_disp);
break;
}
ocl::finish();
workEnd();
// Show results
d_disp.download(disp);
if (p.method != Params::BM)
workEnd();
if (method != BM)
{
disp.convertTo(disp, 0);
}
putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255));
imshow("disparity", disp);
if(!written)
{
imwrite(out_img, disp);
written = true;
}
handleKey((char)waitKey(3));
}
}
@ -261,19 +218,19 @@ void App::printParams() const
cout << "--- Parameters ---\n";
cout << "image_size: (" << left.cols << ", " << left.rows << ")\n";
cout << "image_channels: " << left.channels() << endl;
cout << "method: " << p.method_str() << endl
<< "ndisp: " << p.ndisp << endl;
switch (p.method)
cout << "method: " << method_str() << endl
<< "ndisp: " << ndisp << endl;
switch (method)
{
case Params::BM:
case BM:
cout << "win_size: " << bm.winSize << endl;
cout << "prefilter_sobel: " << bm.preset << endl;
break;
case Params::BP:
case BP:
cout << "iter_count: " << bp.iters << endl;
cout << "level_count: " << bp.levels << endl;
break;
case Params::CSBP:
case CSBP:
cout << "iter_count: " << csbp.iters << endl;
cout << "level_count: " << csbp.levels << endl;
break;
@ -289,11 +246,13 @@ void App::handleKey(char key)
case 27:
running = false;
break;
case 'p': case 'P':
case 'p':
case 'P':
printParams();
break;
case 'g': case 'G':
if (left.channels() == 1 && p.method != Params::BM)
case 'g':
case 'G':
if (left.channels() == 1 && method != BM)
{
left = left_src;
right = right_src;
@ -309,23 +268,25 @@ void App::handleKey(char key)
imshow("left", left);
imshow("right", right);
break;
case 'm': case 'M':
switch (p.method)
case 'm':
case 'M':
switch (method)
{
case Params::BM:
p.method = Params::BP;
case BM:
method = BP;
break;
case Params::BP:
p.method = Params::CSBP;
case BP:
method = CSBP;
break;
case Params::CSBP:
p.method = Params::BM;
case CSBP:
method = BM;
break;
}
cout << "method: " << p.method_str() << endl;
cout << "method: " << method_str() << endl;
break;
case 's': case 'S':
if (p.method == Params::BM)
case 's':
case 'S':
if (method == BM)
{
switch (bm.preset)
{
@ -340,76 +301,80 @@ void App::handleKey(char key)
}
break;
case '1':
p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8;
cout << "ndisp: " << p.ndisp << endl;
bm.ndisp = p.ndisp;
bp.ndisp = p.ndisp;
csbp.ndisp = p.ndisp;
ndisp == 1 ? ndisp = 8 : ndisp += 8;
cout << "ndisp: " << ndisp << endl;
bm.ndisp = ndisp;
bp.ndisp = ndisp;
csbp.ndisp = ndisp;
break;
case 'q': case 'Q':
p.ndisp = max(p.ndisp - 8, 1);
cout << "ndisp: " << p.ndisp << endl;
bm.ndisp = p.ndisp;
bp.ndisp = p.ndisp;
csbp.ndisp = p.ndisp;
case 'q':
case 'Q':
ndisp = max(ndisp - 8, 1);
cout << "ndisp: " << ndisp << endl;
bm.ndisp = ndisp;
bp.ndisp = ndisp;
csbp.ndisp = ndisp;
break;
case '2':
if (p.method == Params::BM)
if (method == BM)
{
bm.winSize = min(bm.winSize + 1, 51);
cout << "win_size: " << bm.winSize << endl;
}
break;
case 'w': case 'W':
if (p.method == Params::BM)
case 'w':
case 'W':
if (method == BM)
{
bm.winSize = max(bm.winSize - 1, 2);
cout << "win_size: " << bm.winSize << endl;
}
break;
case '3':
if (p.method == Params::BP)
if (method == BP)
{
bp.iters += 1;
cout << "iter_count: " << bp.iters << endl;
}
else if (p.method == Params::CSBP)
else if (method == CSBP)
{
csbp.iters += 1;
cout << "iter_count: " << csbp.iters << endl;
}
break;
case 'e': case 'E':
if (p.method == Params::BP)
case 'e':
case 'E':
if (method == BP)
{
bp.iters = max(bp.iters - 1, 1);
cout << "iter_count: " << bp.iters << endl;
}
else if (p.method == Params::CSBP)
else if (method == CSBP)
{
csbp.iters = max(csbp.iters - 1, 1);
cout << "iter_count: " << csbp.iters << endl;
}
break;
case '4':
if (p.method == Params::BP)
if (method == BP)
{
bp.levels += 1;
cout << "level_count: " << bp.levels << endl;
}
else if (p.method == Params::CSBP)
else if (method == CSBP)
{
csbp.levels += 1;
cout << "level_count: " << csbp.levels << endl;
}
break;
case 'r': case 'R':
if (p.method == Params::BP)
case 'r':
case 'R':
if (method == BP)
{
bp.levels = max(bp.levels - 1, 1);
cout << "level_count: " << bp.levels << endl;
}
else if (p.method == Params::CSBP)
else if (method == CSBP)
{
csbp.levels = max(csbp.levels - 1, 1);
cout << "level_count: " << csbp.levels << endl;
@ -417,5 +382,3 @@ void App::handleKey(char key)
break;
}
}

@ -1,48 +1,3 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iostream>
#include <stdio.h>
#include "opencv2/core/core.hpp"
@ -62,14 +17,6 @@ const float GOOD_PORTION = 0.15f;
namespace
{
void help();
void help()
{
std::cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << std::endl;
std::cout << "\nUsage:\n\tsurf_matcher --left <image1> --right <image2> [-c]" << std::endl;
std::cout << "\nExample:\n\tsurf_matcher --left box.png --right box_in_scene.png" << std::endl;
}
int64 work_begin = 0;
int64 work_end = 0;
@ -82,7 +29,8 @@ void workEnd()
{
work_end = getTickCount() - work_begin;
}
double getTime(){
double getTime()
{
return work_end /((double)getTickFrequency() * 1000.);
}
@ -125,7 +73,7 @@ Mat drawGoodMatches(
std::sort(matches.begin(), matches.end());
std::vector< DMatch > good_matches;
double minDist = matches.front().distance,
maxDist = matches.back().distance;
maxDist = matches.back().distance;
const int ptsPairs = std::min(GOOD_PTS_MAX, (int)(matches.size() * GOOD_PORTION));
for( int i = 0; i < ptsPairs; i++ )
@ -140,8 +88,8 @@ Mat drawGoodMatches(
// drawing the results
Mat img_matches;
drawMatches( cpu_img1, keypoints1, cpu_img2, keypoints2,
good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
std::vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
std::vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
//-- Localize the object
std::vector<Point2f> obj;
@ -155,8 +103,10 @@ Mat drawGoodMatches(
}
//-- Get the corners from the image_1 ( the object to be "detected" )
std::vector<Point2f> obj_corners(4);
obj_corners[0] = Point(0,0); obj_corners[1] = Point( cpu_img1.cols, 0 );
obj_corners[2] = Point( cpu_img1.cols, cpu_img1.rows ); obj_corners[3] = Point( 0, cpu_img1.rows );
obj_corners[0] = Point(0,0);
obj_corners[1] = Point( cpu_img1.cols, 0 );
obj_corners[2] = Point( cpu_img1.cols, cpu_img1.rows );
obj_corners[3] = Point( 0, cpu_img1.rows );
std::vector<Point2f> scene_corners(4);
Mat H = findHomography( obj, scene, RANSAC );
@ -166,17 +116,17 @@ Mat drawGoodMatches(
//-- Draw lines between the corners (the mapped object in the scene - image_2 )
line( img_matches,
scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0),
Scalar( 0, 255, 0), 2, LINE_AA );
scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0),
Scalar( 0, 255, 0), 2, LINE_AA );
line( img_matches,
scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0),
Scalar( 0, 255, 0), 2, LINE_AA );
scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0),
Scalar( 0, 255, 0), 2, LINE_AA );
line( img_matches,
scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0),
Scalar( 0, 255, 0), 2, LINE_AA );
scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0),
Scalar( 0, 255, 0), 2, LINE_AA );
line( img_matches,
scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0),
Scalar( 0, 255, 0), 2, LINE_AA );
scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0),
Scalar( 0, 255, 0), 2, LINE_AA );
return img_matches;
}
@ -186,6 +136,21 @@ Mat drawGoodMatches(
// use cpu findHomography interface to calculate the transformation matrix
int main(int argc, char* argv[])
{
const char* keys =
"{ help h | false | print help message }"
"{ left l | | specify left image }"
"{ right r | | specify right image }"
"{ output o | SURF_output.jpg | specify output save path (only works in CPU or GPU only mode) }"
"{ use_cpu c | false | use CPU algorithms }"
"{ use_all a | false | use both CPU and GPU algorithms}";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help"))
{
std::cout << "Avaible options:" << std::endl;
cmd.printMessage();
return 0;
}
std::vector<cv::ocl::Info> info;
if(cv::ocl::getDevice(info) == 0)
{
@ -196,54 +161,38 @@ int main(int argc, char* argv[])
Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey;
oclMat img1, img2;
bool useCPU = false;
bool useCPU = cmd.get<bool>("c");
bool useGPU = false;
bool useALL = false;
bool useALL = cmd.get<bool>("a");
std::string outpath = cmd.get<std::string>("o");
cpu_img1 = imread(cmd.get<std::string>("l"));
CV_Assert(!cpu_img1.empty());
cvtColor(cpu_img1, cpu_img1_grey, COLOR_BGR2GRAY);
img1 = cpu_img1_grey;
for (int i = 1; i < argc; ++i)
cpu_img2 = imread(cmd.get<std::string>("r"));
CV_Assert(!cpu_img2.empty());
cvtColor(cpu_img2, cpu_img2_grey, COLOR_BGR2GRAY);
img2 = cpu_img2_grey;
if(useALL)
{
if (String(argv[i]) == "--left")
{
cpu_img1 = imread(argv[++i]);
CV_Assert(!cpu_img1.empty());
cvtColor(cpu_img1, cpu_img1_grey, COLOR_BGR2GRAY);
img1 = cpu_img1_grey;
}
else if (String(argv[i]) == "--right")
{
cpu_img2 = imread(argv[++i]);
CV_Assert(!cpu_img2.empty());
cvtColor(cpu_img2, cpu_img2_grey, COLOR_BGR2GRAY);
img2 = cpu_img2_grey;
}
else if (String(argv[i]) == "-c")
{
useCPU = true;
useGPU = false;
useALL = false;
}else if(String(argv[i]) == "-g")
{
useGPU = true;
useCPU = false;
useALL = false;
}else if(String(argv[i]) == "-a")
{
useALL = true;
useCPU = false;
useGPU = false;
}
else if (String(argv[i]) == "--help")
{
help();
return -1;
}
useCPU = false;
useGPU = false;
}
else if(useCPU==false && useALL==false)
{
useGPU = true;
}
if(!useCPU)
{
std::cout
<< "Device name:"
<< info[0].DeviceName[0]
<< std::endl;
<< "Device name:"
<< info[0].DeviceName[0]
<< std::endl;
}
double surf_time = 0.;
@ -299,7 +248,8 @@ int main(int argc, char* argv[])
surf_time = getTime();
std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
}else
}
else
{
//cpu runs
for (int i = 0; i <= LOOP_NUM; i++)
@ -354,7 +304,7 @@ int main(int argc, char* argv[])
for(size_t i = 0; i < cpu_corner.size(); i++)
{
if((std::abs(cpu_corner[i].x - gpu_corner[i].x) > 10)
||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10))
||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10))
{
std::cout<<"Failed\n";
result = false;
@ -372,12 +322,15 @@ int main(int argc, char* argv[])
{
namedWindow("cpu surf matches", 0);
imshow("cpu surf matches", img_matches);
imwrite(outpath, img_matches);
}
else if(useGPU)
{
namedWindow("ocl surf matches", 0);
imshow("ocl surf matches", img_matches);
}else
imwrite(outpath, img_matches);
}
else
{
namedWindow("cpu surf matches", 0);
imshow("cpu surf matches", img_matches);

@ -0,0 +1,264 @@
#include <iostream>
#include <vector>
#include <iomanip>
#include "opencv2/core/utility.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/ocl/ocl.hpp"
#include "opencv2/video/video.hpp"
using namespace std;
using namespace cv;
using namespace cv::ocl;
typedef unsigned char uchar;
#define LOOP_NUM 10
int64 work_begin = 0;
int64 work_end = 0;
static void workBegin()
{
work_begin = getTickCount();
}
static void workEnd()
{
work_end += (getTickCount() - work_begin);
}
static double getTime()
{
return work_end * 1000. / getTickFrequency();
}
template <typename T> inline T clamp (T x, T a, T b)
{
return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a));
}
template <typename T> inline T mapValue(T x, T a, T b, T c, T d)
{
x = clamp(x, a, b);
return c + (d - c) * (x - a) / (b - a);
}
static void getFlowField(const Mat& u, const Mat& v, Mat& flowField)
{
float maxDisplacement = 1.0f;
for (int i = 0; i < u.rows; ++i)
{
const float* ptr_u = u.ptr<float>(i);
const float* ptr_v = v.ptr<float>(i);
for (int j = 0; j < u.cols; ++j)
{
float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j]));
if (d > maxDisplacement)
maxDisplacement = d;
}
}
flowField.create(u.size(), CV_8UC4);
for (int i = 0; i < flowField.rows; ++i)
{
const float* ptr_u = u.ptr<float>(i);
const float* ptr_v = v.ptr<float>(i);
Vec4b* row = flowField.ptr<Vec4b>(i);
for (int j = 0; j < flowField.cols; ++j)
{
row[j][0] = 0;
row[j][1] = static_cast<unsigned char> (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
row[j][2] = static_cast<unsigned char> (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
row[j][3] = 255;
}
}
}
int main(int argc, const char* argv[])
{
static std::vector<Info> ocl_info;
ocl::getDevice(ocl_info);
//if you want to use undefault device, set it here
setDevice(ocl_info[0]);
//set this to save kernel compile time from second time you run
ocl::setBinpath("./");
const char* keys =
"{ h | help | false | print help message }"
"{ l | left | | specify left image }"
"{ r | right | | specify right image }"
"{ o | output | tvl1_output.jpg | specify output save path }"
"{ c | camera | 0 | enable camera capturing }"
"{ s | use_cpu | false | use cpu or gpu to process the image }"
"{ v | video | | use video as input }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help"))
{
cout << "Usage: pyrlk_optical_flow [options]" << endl;
cout << "Avaible options:" << endl;
cmd.printMessage();
return 0;
}
bool defaultPicturesFail = false;
string fname0 = cmd.get<string>("l");
string fname1 = cmd.get<string>("r");
string vdofile = cmd.get<string>("v");
string outpath = cmd.get<string>("o");
bool useCPU = cmd.get<bool>("s");
bool useCamera = cmd.get<bool>("c");
int inputName = cmd.get<int>("c");
Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE);
Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE);
cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
Mat flow, show_flow;
Mat flow_vec[2];
if (frame0.empty() || frame1.empty())
{
useCamera = true;
defaultPicturesFail = true;
VideoCapture capture( inputName );
if (!capture.isOpened())
{
cout << "Can't load input images" << endl;
return -1;
}
}
if (useCamera)
{
VideoCapture capture;
Mat frame, frameCopy;
Mat frame0Gray, frame1Gray;
Mat ptr0, ptr1;
if(vdofile == "")
capture.open( inputName );
else
capture.open(vdofile.c_str());
int c = inputName ;
if(!capture.isOpened())
{
if(vdofile == "")
cout << "Capture from CAM " << c << " didn't work" << endl;
else
cout << "Capture from file " << vdofile << " failed" <<endl;
if (defaultPicturesFail)
{
return -1;
}
goto nocamera;
}
cout << "In capture ..." << endl;
for(int i = 0;; i++)
{
if( !capture.read(frame) )
break;
if (i == 0)
{
frame.copyTo( frame0 );
cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
}
else
{
if (i%2 == 1)
{
frame.copyTo(frame1);
cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
ptr0 = frame0Gray;
ptr1 = frame1Gray;
}
else
{
frame.copyTo(frame0);
cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
ptr0 = frame1Gray;
ptr1 = frame0Gray;
}
if (useCPU)
{
alg->calc(ptr0, ptr1, flow);
split(flow, flow_vec);
}
else
{
oclMat d_flowx, d_flowy;
d_alg(oclMat(ptr0), oclMat(ptr1), d_flowx, d_flowy);
d_flowx.download(flow_vec[0]);
d_flowy.download(flow_vec[1]);
}
if (i%2 == 1)
frame1.copyTo(frameCopy);
else
frame0.copyTo(frameCopy);
getFlowField(flow_vec[0], flow_vec[1], show_flow);
imshow("PyrLK [Sparse]", show_flow);
}
if( waitKey( 10 ) >= 0 )
goto _cleanup_;
}
waitKey(0);
_cleanup_:
capture.release();
}
else
{
nocamera:
oclMat d_flowx, d_flowy;
for(int i = 0; i <= LOOP_NUM; i ++)
{
cout << "loop" << i << endl;
if (i > 0) workBegin();
if (useCPU)
{
alg->calc(frame0, frame1, flow);
split(flow, flow_vec);
}
else
{
d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
d_flowx.download(flow_vec[0]);
d_flowy.download(flow_vec[1]);
}
if (i > 0 && i <= LOOP_NUM)
workEnd();
if (i == LOOP_NUM)
{
if (useCPU)
cout << "average CPU time (noCamera) : ";
else
cout << "average GPU time (noCamera) : ";
cout << getTime() / LOOP_NUM << " ms" << endl;
getFlowField(flow_vec[0], flow_vec[1], show_flow);
imshow("PyrLK [Sparse]", show_flow);
imwrite(outpath, show_flow);
}
}
}
waitKey();
return 0;
}
Loading…
Cancel
Save