Merge remote-tracking branch 'origin/2.4' into merge-2.4

Conflicts:
	modules/calib3d/include/opencv2/calib3d/calib3d.hpp
	modules/core/include/opencv2/core/core.hpp
	modules/core/include/opencv2/core/cuda/limits.hpp
	modules/core/include/opencv2/core/internal.hpp
	modules/core/src/matrix.cpp
	modules/nonfree/test/test_features2d.cpp
	modules/ocl/include/opencv2/ocl/ocl.hpp
	modules/ocl/src/hog.cpp
	modules/ocl/test/test_haar.cpp
	modules/ocl/test/test_objdetect.cpp
	modules/ocl/test/test_pyrup.cpp
	modules/ts/src/precomp.hpp
	samples/ocl/facedetect.cpp
	samples/ocl/hog.cpp
	samples/ocl/pyrlk_optical_flow.cpp
	samples/ocl/surf_matcher.cpp
pull/1046/head
Roman Donchenko 12 years ago
commit f36f8067bc
  1. 2
      cmake/OpenCVDetectOpenCL.cmake
  2. 2
      modules/calib3d/include/opencv2/calib3d.hpp
  3. 215
      modules/core/include/opencv2/core/cuda/limits.hpp
  4. 3
      modules/core/include/opencv2/core/mat.hpp
  5. 24
      modules/core/include/opencv2/core/private.hpp
  6. 30
      modules/core/src/matrix.cpp
  7. 27
      modules/core/src/parallel.cpp
  8. 2
      modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp
  9. 4
      modules/gpulegacy/src/cuda/NCVPixelOperations.hpp
  10. 12
      modules/highgui/include/opencv2/highgui/cap_ios.h
  11. 86
      modules/highgui/src/cap_ios_abstract_camera.mm
  12. 48
      modules/highgui/src/cap_ios_video_camera.mm
  13. 81
      modules/highgui/src/window_QT.cpp
  14. 1
      modules/highgui/src/window_QT.h
  15. 14
      modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java
  16. 73
      modules/nonfree/test/test_features2d.cpp
  17. 8
      modules/ocl/include/opencv2/ocl.hpp
  18. 2
      modules/ocl/include/opencv2/ocl/private/util.hpp
  19. 2
      modules/ocl/perf/main.cpp
  20. 91
      modules/ocl/perf/perf_calib3d.cpp
  21. 16
      modules/ocl/perf/perf_filters.cpp
  22. 76
      modules/ocl/perf/perf_hog.cpp
  23. 46
      modules/ocl/perf/perf_imgproc.cpp
  24. 48
      modules/ocl/perf/perf_moments.cpp
  25. 14
      modules/ocl/perf/precomp.cpp
  26. 457
      modules/ocl/src/hog.cpp
  27. 31
      modules/ocl/src/matrix_operations.cpp
  28. 4
      modules/ocl/src/mcwutil.cpp
  29. 494
      modules/ocl/src/opencl/objdetect_hog.cl
  30. 180
      modules/ocl/test/test_haar.cpp
  31. 46
      modules/ocl/test/test_imgproc.cpp
  32. 216
      modules/ocl/test/test_objdetect.cpp
  33. 42
      modules/ocl/test/test_pyramids.cpp
  34. 90
      modules/ocl/test/test_pyrup.cpp
  35. 102
      modules/ocl/test/utility.cpp
  36. 11
      modules/ocl/test/utility.hpp
  37. 39
      modules/ts/misc/testlog_parser.py
  38. 167
      modules/ts/misc/xls-report.py
  39. 72
      modules/ts/src/ts_func.cpp
  40. 91
      samples/ocl/facedetect.cpp
  41. 290
      samples/ocl/hog.cpp
  42. 24
      samples/ocl/pyrlk_optical_flow.cpp
  43. 227
      samples/ocl/squares.cpp
  44. 291
      samples/ocl/stereo_match.cpp
  45. 125
      samples/ocl/surf_matcher.cpp
  46. 264
      samples/ocl/tvl1_optical_flow.cpp

@ -44,7 +44,7 @@ if(OPENCL_FOUND)
set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR}) set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR})
set(OPENCL_LIBRARIES ${OPENCL_LIBRARY}) set(OPENCL_LIBRARIES ${OPENCL_LIBRARY})
if(WIN64) if(WIN32 AND X86_64)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64/import) set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64/import)
elseif(WIN32) elseif(WIN32)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import) set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import)

@ -278,7 +278,7 @@ CV_EXPORTS int recoverPose( InputArray E, InputArray points1, InputArray points2
//! finds coordinates of epipolar lines corresponding the specified points //! finds coordinates of epipolar lines corresponding the specified points
CV_EXPORTS void computeCorrespondEpilines( InputArray points, int whichImage, CV_EXPORTS_W void computeCorrespondEpilines( InputArray points, int whichImage,
InputArray F, OutputArray lines ); InputArray F, OutputArray lines );
CV_EXPORTS_W void triangulatePoints( InputArray projMatr1, InputArray projMatr2, CV_EXPORTS_W void triangulatePoints( InputArray projMatr1, InputArray projMatr2,

@ -43,193 +43,80 @@
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__ #ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
#define __OPENCV_GPU_LIMITS_GPU_HPP__ #define __OPENCV_GPU_LIMITS_GPU_HPP__
#include <limits> #include <limits.h>
#include <float.h>
#include "common.hpp" #include "common.hpp"
namespace cv { namespace gpu { namespace cudev namespace cv { namespace gpu { namespace cudev
{ {
template<class T> struct numeric_limits
{
typedef T type;
__device__ __forceinline__ static type min() { return type(); };
__device__ __forceinline__ static type max() { return type(); };
__device__ __forceinline__ static type epsilon() { return type(); }
__device__ __forceinline__ static type round_error() { return type(); }
__device__ __forceinline__ static type denorm_min() { return type(); }
__device__ __forceinline__ static type infinity() { return type(); }
__device__ __forceinline__ static type quiet_NaN() { return type(); }
__device__ __forceinline__ static type signaling_NaN() { return T(); }
static const bool is_signed;
};
template<> struct numeric_limits<bool> template <class T> struct numeric_limits;
{
typedef bool type;
__device__ __forceinline__ static type min() { return false; };
__device__ __forceinline__ static type max() { return true; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<char>
{
typedef char type;
__device__ __forceinline__ static type min() { return CHAR_MIN; };
__device__ __forceinline__ static type max() { return CHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = (char)-1 == -1;
};
template<> struct numeric_limits<signed char> template <> struct numeric_limits<bool>
{ {
typedef char type; __device__ __forceinline__ static bool min() { return false; }
__device__ __forceinline__ static type min() { return SCHAR_MIN; }; __device__ __forceinline__ static bool max() { return true; }
__device__ __forceinline__ static type max() { return SCHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = (signed char)-1 == -1;
};
template<> struct numeric_limits<unsigned char>
{
typedef unsigned char type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return UCHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits<short> template <> struct numeric_limits<signed char>
{ {
typedef short type; __device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
__device__ __forceinline__ static type min() { return SHRT_MIN; }; __device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
__device__ __forceinline__ static type max() { return SHRT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true; static const bool is_signed = true;
}; };
template<> struct numeric_limits<unsigned short> template <> struct numeric_limits<unsigned char>
{ {
typedef unsigned short type; __device__ __forceinline__ static unsigned char min() { return 0; }
__device__ __forceinline__ static type min() { return 0; }; __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
__device__ __forceinline__ static type max() { return USHRT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits<int> template <> struct numeric_limits<short>
{ {
typedef int type; __device__ __forceinline__ static short min() { return SHRT_MIN; }
__device__ __forceinline__ static type min() { return INT_MIN; }; __device__ __forceinline__ static short max() { return SHRT_MAX; }
__device__ __forceinline__ static type max() { return INT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true; static const bool is_signed = true;
}; };
template <> struct numeric_limits<unsigned short>
template<> struct numeric_limits<unsigned int> {
{ __device__ __forceinline__ static unsigned short min() { return 0; }
typedef unsigned int type; __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return UINT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits<long> template <> struct numeric_limits<int>
{ {
typedef long type; __device__ __forceinline__ static int min() { return INT_MIN; }
__device__ __forceinline__ static type min() { return LONG_MIN; }; __device__ __forceinline__ static int max() { return INT_MAX; }
__device__ __forceinline__ static type max() { return LONG_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true; static const bool is_signed = true;
}; };
template<> struct numeric_limits<unsigned long> template <> struct numeric_limits<unsigned int>
{ {
typedef unsigned long type; __device__ __forceinline__ static unsigned int min() { return 0; }
__device__ __forceinline__ static type min() { return 0; }; __device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
__device__ __forceinline__ static type max() { return ULONG_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false; static const bool is_signed = false;
}; };
template<> struct numeric_limits<float> template <> struct numeric_limits<float>
{ {
typedef float type; __device__ __forceinline__ static float min() { return FLT_MIN; }
__device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; }; __device__ __forceinline__ static float max() { return FLT_MAX; }
__device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; }; __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
__device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; };
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true; static const bool is_signed = true;
}; };
template<> struct numeric_limits<double> template <> struct numeric_limits<double>
{ {
typedef double type; __device__ __forceinline__ static double min() { return DBL_MIN; }
__device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; }; __device__ __forceinline__ static double max() { return DBL_MAX; }
__device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; }; __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true; static const bool is_signed = true;
}; };
}}} // namespace cv { namespace gpu { namespace cudev { }}} // namespace cv { namespace gpu { namespace cudev {
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__ #endif // __OPENCV_GPU_LIMITS_GPU_HPP__

@ -78,7 +78,8 @@ public:
EXPR = 6 << KIND_SHIFT, EXPR = 6 << KIND_SHIFT,
OPENGL_BUFFER = 7 << KIND_SHIFT, OPENGL_BUFFER = 7 << KIND_SHIFT,
CUDA_MEM = 8 << KIND_SHIFT, CUDA_MEM = 8 << KIND_SHIFT,
GPU_MAT = 9 << KIND_SHIFT GPU_MAT = 9 << KIND_SHIFT,
OCL_MAT =10 << KIND_SHIFT
}; };
_InputArray(); _InputArray();

@ -71,6 +71,30 @@
# endif # endif
#endif #endif
#ifdef _OPENMP
# define HAVE_OPENMP
#endif
#ifdef __APPLE__
# define HAVE_GCD
#endif
#if defined _MSC_VER && _MSC_VER >= 1600
# define HAVE_CONCURRENCY
#endif
#if defined HAVE_TBB
# define CV_PARALLEL_FRAMEWORK "tbb"
#elif defined HAVE_CSTRIPES
# define CV_PARALLEL_FRAMEWORK "cstripes"
#elif defined HAVE_OPENMP
# define CV_PARALLEL_FRAMEWORK "openmp"
#elif defined HAVE_GCD
# define CV_PARALLEL_FRAMEWORK "gcd"
#elif defined HAVE_CONCURRENCY
# define CV_PARALLEL_FRAMEWORK "ms-concurrency"
#endif
namespace cv namespace cv
{ {
#ifdef HAVE_TBB #ifdef HAVE_TBB

@ -995,6 +995,11 @@ Mat _InputArray::getMat(int i) const
return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat(); return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat();
} }
if( k == OCL_MAT )
{
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
}
if( k == STD_VECTOR_MAT ) if( k == STD_VECTOR_MAT )
{ {
const std::vector<Mat>& v = *(const std::vector<Mat>*)obj; const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
@ -1100,6 +1105,11 @@ void _InputArray::getMatVector(std::vector<Mat>& mv) const
return; return;
} }
if( k == OCL_MAT )
{
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
}
CV_Assert( k == STD_VECTOR_MAT ); CV_Assert( k == STD_VECTOR_MAT );
//if( k == STD_VECTOR_MAT ) //if( k == STD_VECTOR_MAT )
{ {
@ -1224,6 +1234,11 @@ Size _InputArray::size(int i) const
return d_mat->size(); return d_mat->size();
} }
if( k == OCL_MAT )
{
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
}
CV_Assert( k == CUDA_MEM ); CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM ) //if( k == CUDA_MEM )
{ {
@ -1338,6 +1353,11 @@ bool _InputArray::empty() const
if( k == OPENGL_BUFFER ) if( k == OPENGL_BUFFER )
return ((const ogl::Buffer*)obj)->empty(); return ((const ogl::Buffer*)obj)->empty();
if( k == OCL_MAT )
{
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
}
if( k == GPU_MAT ) if( k == GPU_MAT )
return ((const gpu::GpuMat*)obj)->empty(); return ((const gpu::GpuMat*)obj)->empty();
@ -1573,6 +1593,11 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool all
return; return;
} }
if( k == OCL_MAT )
{
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
}
if( k == NONE ) if( k == NONE )
{ {
CV_Error(CV_StsNullPtr, "create() called for the missing output array" ); CV_Error(CV_StsNullPtr, "create() called for the missing output array" );
@ -1684,6 +1709,11 @@ void _OutputArray::release() const
return; return;
} }
if( k == OCL_MAT )
{
CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
}
CV_Assert( k == STD_VECTOR_MAT ); CV_Assert( k == STD_VECTOR_MAT );
//if( k == STD_VECTOR_MAT ) //if( k == STD_VECTOR_MAT )
{ {

@ -61,17 +61,6 @@
#endif #endif
#endif #endif
#ifdef _OPENMP
#define HAVE_OPENMP
#endif
#ifdef __APPLE__
#define HAVE_GCD
#endif
#if defined _MSC_VER && _MSC_VER >= 1600
#define HAVE_CONCURRENCY
#endif
/* IMPORTANT: always use the same order of defines /* IMPORTANT: always use the same order of defines
1. HAVE_TBB - 3rdparty library, should be explicitly enabled 1. HAVE_TBB - 3rdparty library, should be explicitly enabled
@ -110,10 +99,6 @@
#endif #endif
#endif #endif
#if defined HAVE_TBB || defined HAVE_CSTRIPES || defined HAVE_OPENMP || defined HAVE_GCD || defined HAVE_CONCURRENCY
#define HAVE_PARALLEL_FRAMEWORK
#endif
namespace cv namespace cv
{ {
ParallelLoopBody::~ParallelLoopBody() {} ParallelLoopBody::~ParallelLoopBody() {}
@ -121,7 +106,7 @@ namespace cv
namespace namespace
{ {
#ifdef HAVE_PARALLEL_FRAMEWORK #ifdef CV_PARALLEL_FRAMEWORK
class ParallelLoopBodyWrapper class ParallelLoopBodyWrapper
{ {
public: public:
@ -218,7 +203,7 @@ public:
static SchedPtr pplScheduler; static SchedPtr pplScheduler;
#endif #endif
#endif // HAVE_PARALLEL_FRAMEWORK #endif // CV_PARALLEL_FRAMEWORK
} //namespace } //namespace
@ -226,7 +211,7 @@ static SchedPtr pplScheduler;
void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes) void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
{ {
#ifdef HAVE_PARALLEL_FRAMEWORK #ifdef CV_PARALLEL_FRAMEWORK
if(numThreads != 0) if(numThreads != 0)
{ {
@ -281,7 +266,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
} }
else else
#endif // HAVE_PARALLEL_FRAMEWORK #endif // CV_PARALLEL_FRAMEWORK
{ {
(void)nstripes; (void)nstripes;
body(range); body(range);
@ -290,7 +275,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
int cv::getNumThreads(void) int cv::getNumThreads(void)
{ {
#ifdef HAVE_PARALLEL_FRAMEWORK #ifdef CV_PARALLEL_FRAMEWORK
if(numThreads == 0) if(numThreads == 0)
return 1; return 1;
@ -333,7 +318,7 @@ int cv::getNumThreads(void)
void cv::setNumThreads( int threads ) void cv::setNumThreads( int threads )
{ {
(void)threads; (void)threads;
#ifdef HAVE_PARALLEL_FRAMEWORK #ifdef CV_PARALLEL_FRAMEWORK
numThreads = threads; numThreads = threads;
#endif #endif

@ -126,7 +126,7 @@ typedef int Ncv32s;
typedef unsigned int Ncv32u; typedef unsigned int Ncv32u;
typedef short Ncv16s; typedef short Ncv16s;
typedef unsigned short Ncv16u; typedef unsigned short Ncv16u;
typedef char Ncv8s; typedef signed char Ncv8s;
typedef unsigned char Ncv8u; typedef unsigned char Ncv8u;
typedef float Ncv32f; typedef float Ncv32f;
typedef double Ncv64f; typedef double Ncv64f;

@ -51,7 +51,7 @@ template<typename TBase> inline __host__ __device__ TBase _pixMaxVal();
template<> static inline __host__ __device__ Ncv8u _pixMaxVal<Ncv8u>() {return UCHAR_MAX;} template<> static inline __host__ __device__ Ncv8u _pixMaxVal<Ncv8u>() {return UCHAR_MAX;}
template<> static inline __host__ __device__ Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;} template<> static inline __host__ __device__ Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
template<> static inline __host__ __device__ Ncv32u _pixMaxVal<Ncv32u>() {return UINT_MAX;} template<> static inline __host__ __device__ Ncv32u _pixMaxVal<Ncv32u>() {return UINT_MAX;}
template<> static inline __host__ __device__ Ncv8s _pixMaxVal<Ncv8s>() {return CHAR_MAX;} template<> static inline __host__ __device__ Ncv8s _pixMaxVal<Ncv8s>() {return SCHAR_MAX;}
template<> static inline __host__ __device__ Ncv16s _pixMaxVal<Ncv16s>() {return SHRT_MAX;} template<> static inline __host__ __device__ Ncv16s _pixMaxVal<Ncv16s>() {return SHRT_MAX;}
template<> static inline __host__ __device__ Ncv32s _pixMaxVal<Ncv32s>() {return INT_MAX;} template<> static inline __host__ __device__ Ncv32s _pixMaxVal<Ncv32s>() {return INT_MAX;}
template<> static inline __host__ __device__ Ncv32f _pixMaxVal<Ncv32f>() {return FLT_MAX;} template<> static inline __host__ __device__ Ncv32f _pixMaxVal<Ncv32f>() {return FLT_MAX;}
@ -61,7 +61,7 @@ template<typename TBase> inline __host__ __device__ TBase _pixMinVal();
template<> static inline __host__ __device__ Ncv8u _pixMinVal<Ncv8u>() {return 0;} template<> static inline __host__ __device__ Ncv8u _pixMinVal<Ncv8u>() {return 0;}
template<> static inline __host__ __device__ Ncv16u _pixMinVal<Ncv16u>() {return 0;} template<> static inline __host__ __device__ Ncv16u _pixMinVal<Ncv16u>() {return 0;}
template<> static inline __host__ __device__ Ncv32u _pixMinVal<Ncv32u>() {return 0;} template<> static inline __host__ __device__ Ncv32u _pixMinVal<Ncv32u>() {return 0;}
template<> static inline __host__ __device__ Ncv8s _pixMinVal<Ncv8s>() {return CHAR_MIN;} template<> static inline __host__ __device__ Ncv8s _pixMinVal<Ncv8s>() {return SCHAR_MIN;}
template<> static inline __host__ __device__ Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;} template<> static inline __host__ __device__ Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
template<> static inline __host__ __device__ Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;} template<> static inline __host__ __device__ Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
template<> static inline __host__ __device__ Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;} template<> static inline __host__ __device__ Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}

@ -1,6 +1,4 @@
/* /* For iOS video I/O
* cap_ios.h
* For iOS video I/O
* by Eduard Feicho on 29/07/12 * by Eduard Feicho on 29/07/12
* Copyright 2012. All rights reserved. * Copyright 2012. All rights reserved.
* *
@ -90,6 +88,12 @@
- (void)createVideoPreviewLayer; - (void)createVideoPreviewLayer;
- (void)updateOrientation; - (void)updateOrientation;
- (void)lockFocus;
- (void)unlockFocus;
- (void)lockExposure;
- (void)unlockExposure;
- (void)lockBalance;
- (void)unlockBalance;
@end @end
@ -116,6 +120,7 @@
BOOL grayscaleMode; BOOL grayscaleMode;
BOOL recordVideo; BOOL recordVideo;
BOOL rotateVideo;
AVAssetWriterInput* recordAssetWriterInput; AVAssetWriterInput* recordAssetWriterInput;
AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor; AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor;
AVAssetWriter* recordAssetWriter; AVAssetWriter* recordAssetWriter;
@ -128,6 +133,7 @@
@property (nonatomic, assign) BOOL grayscaleMode; @property (nonatomic, assign) BOOL grayscaleMode;
@property (nonatomic, assign) BOOL recordVideo; @property (nonatomic, assign) BOOL recordVideo;
@property (nonatomic, assign) BOOL rotateVideo;
@property (nonatomic, retain) AVAssetWriterInput* recordAssetWriterInput; @property (nonatomic, retain) AVAssetWriterInput* recordAssetWriterInput;
@property (nonatomic, retain) AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor; @property (nonatomic, retain) AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor;
@property (nonatomic, retain) AVAssetWriter* recordAssetWriter; @property (nonatomic, retain) AVAssetWriter* recordAssetWriter;

@ -2,6 +2,7 @@
* cap_ios_abstract_camera.mm * cap_ios_abstract_camera.mm
* For iOS video I/O * For iOS video I/O
* by Eduard Feicho on 29/07/12 * by Eduard Feicho on 29/07/12
* by Alexander Shishkov on 17/07/13
* Copyright 2012. All rights reserved. * Copyright 2012. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -405,4 +406,89 @@
} }
} }
- (void)lockFocus;
{
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([device isFocusModeSupported:AVCaptureFocusModeLocked]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.focusMode = AVCaptureFocusModeLocked;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for locked focus configuration %@", [error localizedDescription]);
}
}
}
- (void) unlockFocus;
{
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([device isFocusModeSupported:AVCaptureFocusModeContinuousAutoFocus]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.focusMode = AVCaptureFocusModeContinuousAutoFocus;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for autofocus configuration %@", [error localizedDescription]);
}
}
}
- (void)lockExposure;
{
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([device isExposureModeSupported:AVCaptureExposureModeLocked]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.exposureMode = AVCaptureExposureModeLocked;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for locked exposure configuration %@", [error localizedDescription]);
}
}
}
- (void) unlockExposure;
{
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([device isExposureModeSupported:AVCaptureExposureModeContinuousAutoExposure]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.exposureMode = AVCaptureExposureModeContinuousAutoExposure;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for autoexposure configuration %@", [error localizedDescription]);
}
}
}
- (void)lockBalance;
{
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([device isWhiteBalanceModeSupported:AVCaptureWhiteBalanceModeLocked]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.whiteBalanceMode = AVCaptureWhiteBalanceModeLocked;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for locked white balance configuration %@", [error localizedDescription]);
}
}
}
- (void) unlockBalance;
{
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
if ([device isWhiteBalanceModeSupported:AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.whiteBalanceMode = AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for auto white balance configuration %@", [error localizedDescription]);
}
}
}
@end @end

@ -2,6 +2,7 @@
* cap_ios_video_camera.mm * cap_ios_video_camera.mm
* For iOS video I/O * For iOS video I/O
* by Eduard Feicho on 29/07/12 * by Eduard Feicho on 29/07/12
* by Alexander Shishkov on 17/07/13
* Copyright 2012. All rights reserved. * Copyright 2012. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -30,7 +31,6 @@
#import "opencv2/highgui/cap_ios.h" #import "opencv2/highgui/cap_ios.h"
#include "precomp.hpp" #include "precomp.hpp"
#import <AssetsLibrary/AssetsLibrary.h> #import <AssetsLibrary/AssetsLibrary.h>
@ -70,6 +70,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
@synthesize videoDataOutput; @synthesize videoDataOutput;
@synthesize recordVideo; @synthesize recordVideo;
@synthesize rotateVideo;
//@synthesize videoFileOutput; //@synthesize videoFileOutput;
@synthesize recordAssetWriterInput; @synthesize recordAssetWriterInput;
@synthesize recordPixelBufferAdaptor; @synthesize recordPixelBufferAdaptor;
@ -85,6 +86,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
if (self) { if (self) {
self.useAVCaptureVideoPreviewLayer = NO; self.useAVCaptureVideoPreviewLayer = NO;
self.recordVideo = NO; self.recordVideo = NO;
self.rotateVideo = NO;
} }
return self; return self;
} }
@ -269,13 +271,8 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
} }
#pragma mark - Private Interface #pragma mark - Private Interface
- (void)createVideoDataOutput; - (void)createVideoDataOutput;
{ {
// Make a video data output // Make a video data output
@ -389,6 +386,38 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
[self.parentView.layer addSublayer:self.customPreviewLayer]; [self.parentView.layer addSublayer:self.customPreviewLayer];
} }
- (CVPixelBufferRef) pixelBufferFromCGImage: (CGImageRef) image
{
CGSize frameSize = CGSizeMake(CGImageGetWidth(image), CGImageGetHeight(image));
NSDictionary *options = [NSDictionary dictionaryWithObjectsAndKeys:
[NSNumber numberWithBool:NO], kCVPixelBufferCGImageCompatibilityKey,
[NSNumber numberWithBool:NO], kCVPixelBufferCGBitmapContextCompatibilityKey,
nil];
CVPixelBufferRef pxbuffer = NULL;
CVReturn status = CVPixelBufferCreate(kCFAllocatorDefault, frameSize.width,
frameSize.height, kCVPixelFormatType_32ARGB, (CFDictionaryRef) CFBridgingRetain(options),
&pxbuffer);
NSParameterAssert(status == kCVReturnSuccess && pxbuffer != NULL);
CVPixelBufferLockBaseAddress(pxbuffer, 0);
void *pxdata = CVPixelBufferGetBaseAddress(pxbuffer);
CGColorSpaceRef rgbColorSpace = CGColorSpaceCreateDeviceRGB();
CGContextRef context = CGBitmapContextCreate(pxdata, frameSize.width,
frameSize.height, 8, 4*frameSize.width, rgbColorSpace,
kCGImageAlphaPremultipliedFirst);
CGContextDrawImage(context, CGRectMake(0, 0, CGImageGetWidth(image),
CGImageGetHeight(image)), image);
CGColorSpaceRelease(rgbColorSpace);
CGContextRelease(context);
CVPixelBufferUnlockBaseAddress(pxbuffer, 0);
return pxbuffer;
}
#pragma mark - Protocol AVCaptureVideoDataOutputSampleBufferDelegate #pragma mark - Protocol AVCaptureVideoDataOutputSampleBufferDelegate
@ -522,7 +551,8 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
} }
if (self.recordAssetWriterInput.readyForMoreMediaData) { if (self.recordAssetWriterInput.readyForMoreMediaData) {
if (! [self.recordPixelBufferAdaptor appendPixelBuffer:imageBuffer CVImageBufferRef pixelBuffer = [self pixelBufferFromCGImage:dstImage];
if (! [self.recordPixelBufferAdaptor appendPixelBuffer:pixelBuffer
withPresentationTime:lastSampleTime] ) { withPresentationTime:lastSampleTime] ) {
NSLog(@"Video Writing Error"); NSLog(@"Video Writing Error");
} }
@ -543,9 +573,12 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
- (void)updateOrientation; - (void)updateOrientation;
{ {
if (self.rotateVideo == YES)
{
NSLog(@"rotate.."); NSLog(@"rotate..");
self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height); self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height);
[self layoutPreviewLayer]; [self layoutPreviewLayer];
}
} }
@ -583,3 +616,4 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
} }
@end @end

@ -2474,34 +2474,32 @@ void DefaultViewPort::saveView()
{ {
QString extension = fileName.right(3); QString extension = fileName.right(3);
// (no need anymore) create the image resized to receive the 'screenshot' // Create a new pixmap to render the viewport into
// image2Draw_qt_resized = QImage(viewport()->width(), viewport()->height(),QImage::Format_RGB888); QPixmap viewportPixmap(viewport()->size());
viewport()->render(&viewportPixmap);
QPainter saveimage(&image2Draw_qt_resized);
this->render(&saveimage);
// Save it.. // Save it..
if (QString::compare(extension, "png", Qt::CaseInsensitive) == 0) if (QString::compare(extension, "png", Qt::CaseInsensitive) == 0)
{ {
image2Draw_qt_resized.save(fileName, "PNG"); viewportPixmap.save(fileName, "PNG");
return; return;
} }
if (QString::compare(extension, "jpg", Qt::CaseInsensitive) == 0) if (QString::compare(extension, "jpg", Qt::CaseInsensitive) == 0)
{ {
image2Draw_qt_resized.save(fileName, "JPG"); viewportPixmap.save(fileName, "JPG");
return; return;
} }
if (QString::compare(extension, "bmp", Qt::CaseInsensitive) == 0) if (QString::compare(extension, "bmp", Qt::CaseInsensitive) == 0)
{ {
image2Draw_qt_resized.save(fileName, "BMP"); viewportPixmap.save(fileName, "BMP");
return; return;
} }
if (QString::compare(extension, "jpeg", Qt::CaseInsensitive) == 0) if (QString::compare(extension, "jpeg", Qt::CaseInsensitive) == 0)
{ {
image2Draw_qt_resized.save(fileName, "JPEG"); viewportPixmap.save(fileName, "JPEG");
return; return;
} }
@ -2651,17 +2649,16 @@ void DefaultViewPort::paintEvent(QPaintEvent* evnt)
//Now disable matrixWorld for overlay display //Now disable matrixWorld for overlay display
myPainter.setWorldMatrixEnabled(false); myPainter.setWorldMatrixEnabled(false);
//in mode zoom/panning //overlay pixel values if zoomed in far enough
if (param_matrixWorld.m11() > 1) if (param_matrixWorld.m11()*ratioX >= threshold_zoom_img_region &&
param_matrixWorld.m11()*ratioY >= threshold_zoom_img_region)
{ {
if (param_matrixWorld.m11() >= threshold_zoom_img_region)
{
if (centralWidget->param_flags == CV_WINDOW_NORMAL)
startDisplayInfo("WARNING: The values displayed are the resized image's values. If you want the original image's values, use CV_WINDOW_AUTOSIZE", 1000);
drawImgRegion(&myPainter); drawImgRegion(&myPainter);
} }
//in mode zoom/panning
if (param_matrixWorld.m11() > 1)
{
drawViewOverview(&myPainter); drawViewOverview(&myPainter);
} }
@ -2887,22 +2884,24 @@ void DefaultViewPort::drawStatusBar()
//accept only CV_8UC1 and CV_8UC8 image for now //accept only CV_8UC1 and CV_8UC8 image for now
void DefaultViewPort::drawImgRegion(QPainter *painter) void DefaultViewPort::drawImgRegion(QPainter *painter)
{ {
if (nbChannelOriginImage!=CV_8UC1 && nbChannelOriginImage!=CV_8UC3) if (nbChannelOriginImage!=CV_8UC1 && nbChannelOriginImage!=CV_8UC3)
return; return;
qreal offsetX = param_matrixWorld.dx()/param_matrixWorld.m11(); double pixel_width = param_matrixWorld.m11()*ratioX;
double pixel_height = param_matrixWorld.m11()*ratioY;
qreal offsetX = param_matrixWorld.dx()/pixel_width;
offsetX = offsetX - floor(offsetX); offsetX = offsetX - floor(offsetX);
qreal offsetY = param_matrixWorld.dy()/param_matrixWorld.m11(); qreal offsetY = param_matrixWorld.dy()/pixel_height;
offsetY = offsetY - floor(offsetY); offsetY = offsetY - floor(offsetY);
QSize view = size(); QSize view = size();
QVarLengthArray<QLineF, 30> linesX; QVarLengthArray<QLineF, 30> linesX;
for (qreal _x = offsetX*param_matrixWorld.m11(); _x < view.width(); _x += param_matrixWorld.m11() ) for (qreal _x = offsetX*pixel_width; _x < view.width(); _x += pixel_width )
linesX.append(QLineF(_x, 0, _x, view.height())); linesX.append(QLineF(_x, 0, _x, view.height()));
QVarLengthArray<QLineF, 30> linesY; QVarLengthArray<QLineF, 30> linesY;
for (qreal _y = offsetY*param_matrixWorld.m11(); _y < view.height(); _y += param_matrixWorld.m11() ) for (qreal _y = offsetY*pixel_height; _y < view.height(); _y += pixel_height )
linesY.append(QLineF(0, _y, view.width(), _y)); linesY.append(QLineF(0, _y, view.width(), _y));
@ -2910,27 +2909,25 @@ void DefaultViewPort::drawImgRegion(QPainter *painter)
int original_font_size = f.pointSize(); int original_font_size = f.pointSize();
//change font size //change font size
//f.setPointSize(4+(param_matrixWorld.m11()-threshold_zoom_img_region)/5); //f.setPointSize(4+(param_matrixWorld.m11()-threshold_zoom_img_region)/5);
f.setPixelSize(10+(param_matrixWorld.m11()-threshold_zoom_img_region)/5); f.setPixelSize(10+(pixel_height-threshold_zoom_img_region)/5);
painter->setFont(f); painter->setFont(f);
QString val;
QRgb rgbValue;
QPointF point1;//sorry, I do not know how to name it
QPointF point2;//idem
for (int j=-1;j<height()/param_matrixWorld.m11();j++)//-1 because display the pixels top rows left colums for (int j=-1;j<height()/pixel_height;j++)//-1 because display the pixels top rows left columns
for (int i=-1;i<width()/param_matrixWorld.m11();i++)//-1 for (int i=-1;i<width()/pixel_width;i++)//-1
{ {
point1.setX((i+offsetX)*param_matrixWorld.m11()); // Calculate top left of the pixel's position in the viewport (screen space)
point1.setY((j+offsetY)*param_matrixWorld.m11()); QPointF pos_in_view((i+offsetX)*pixel_width, (j+offsetY)*pixel_height);
matrixWorld_inv.map(point1.x(),point1.y(),&point2.rx(),&point2.ry());
point2.rx()= (long) (point2.x() + 0.5); // Calculate top left of the pixel's position in the image (image space)
point2.ry()= (long) (point2.y() + 0.5); QPointF pos_in_image = matrixWorld_inv.map(pos_in_view);// Top left of pixel in view
pos_in_image.rx() = pos_in_image.x()/ratioX;
pos_in_image.ry() = pos_in_image.y()/ratioY;
QPoint point_in_image(pos_in_image.x() + 0.5f,pos_in_image.y() + 0.5f);// Add 0.5 for rounding
if (point2.x() >= 0 && point2.y() >= 0) QRgb rgbValue;
rgbValue = image2Draw_qt_resized.pixel(QPoint(point2.x(),point2.y())); if (image2Draw_qt.valid(point_in_image))
rgbValue = image2Draw_qt.pixel(point_in_image);
else else
rgbValue = qRgb(0,0,0); rgbValue = qRgb(0,0,0);
@ -2943,29 +2940,29 @@ void DefaultViewPort::drawImgRegion(QPainter *painter)
painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/2), painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/2),
Qt::AlignCenter, val); Qt::AlignCenter, val);
*/ */
QString val;
val = tr("%1").arg(qRed(rgbValue)); val = tr("%1").arg(qRed(rgbValue));
painter->setPen(QPen(Qt::red, 1)); painter->setPen(QPen(Qt::red, 1));
painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/3), painter->drawText(QRect(pos_in_view.x(),pos_in_view.y(),pixel_width,pixel_height/3),
Qt::AlignCenter, val); Qt::AlignCenter, val);
val = tr("%1").arg(qGreen(rgbValue)); val = tr("%1").arg(qGreen(rgbValue));
painter->setPen(QPen(Qt::green, 1)); painter->setPen(QPen(Qt::green, 1));
painter->drawText(QRect(point1.x(),point1.y()+param_matrixWorld.m11()/3,param_matrixWorld.m11(),param_matrixWorld.m11()/3), painter->drawText(QRect(pos_in_view.x(),pos_in_view.y()+pixel_height/3,pixel_width,pixel_height/3),
Qt::AlignCenter, val); Qt::AlignCenter, val);
val = tr("%1").arg(qBlue(rgbValue)); val = tr("%1").arg(qBlue(rgbValue));
painter->setPen(QPen(Qt::blue, 1)); painter->setPen(QPen(Qt::blue, 1));
painter->drawText(QRect(point1.x(),point1.y()+2*param_matrixWorld.m11()/3,param_matrixWorld.m11(),param_matrixWorld.m11()/3), painter->drawText(QRect(pos_in_view.x(),pos_in_view.y()+2*pixel_height/3,pixel_width,pixel_height/3),
Qt::AlignCenter, val); Qt::AlignCenter, val);
} }
if (nbChannelOriginImage==CV_8UC1) if (nbChannelOriginImage==CV_8UC1)
{ {
QString val = tr("%1").arg(qRed(rgbValue));
val = tr("%1").arg(qRed(rgbValue)); painter->drawText(QRect(pos_in_view.x(),pos_in_view.y(),pixel_width,pixel_height),
painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()),
Qt::AlignCenter, val); Qt::AlignCenter, val);
} }
} }

@ -522,7 +522,6 @@ private:
CvMat* image2Draw_mat; CvMat* image2Draw_mat;
QImage image2Draw_qt; QImage image2Draw_qt;
QImage image2Draw_qt_resized;
int nbChannelOriginImage; int nbChannelOriginImage;
//for mouse callback //for mouse callback

@ -585,4 +585,18 @@ public class Calib3dTest extends OpenCVTestCase {
public void testValidateDisparityMatMatIntIntInt() { public void testValidateDisparityMatMatIntIntInt() {
fail("Not yet implemented"); fail("Not yet implemented");
} }
public void testComputeCorrespondEpilines()
{
Mat fundamental = new Mat(3, 3, CvType.CV_64F);
fundamental.put(0, 0, 0, -0.577, 0.288, 0.577, 0, 0.288, -0.288, -0.288, 0);
MatOfPoint2f left = new MatOfPoint2f();
left.alloc(1);
left.put(0, 0, 2, 3); //add(new Point(x, y));
Mat lines = new Mat();
Mat truth = new Mat(1, 1, CvType.CV_32FC3);
truth.put(0, 0, -0.70735186, 0.70686162, -0.70588124);
Calib3d.computeCorrespondEpilines(left, 1, fundamental, lines);
assertMatEqual(truth, lines, EPS);
}
} }

@ -1149,3 +1149,76 @@ protected:
TEST(Features2d_SIFTHomographyTest, regression) { CV_DetectPlanarTest test("SIFT", 80); test.safe_run(); } TEST(Features2d_SIFTHomographyTest, regression) { CV_DetectPlanarTest test("SIFT", 80); test.safe_run(); }
TEST(Features2d_SURFHomographyTest, regression) { CV_DetectPlanarTest test("SURF", 80); test.safe_run(); } TEST(Features2d_SURFHomographyTest, regression) { CV_DetectPlanarTest test("SURF", 80); test.safe_run(); }
class FeatureDetectorUsingMaskTest : public cvtest::BaseTest
{
public:
FeatureDetectorUsingMaskTest(const Ptr<FeatureDetector>& featureDetector) :
featureDetector_(featureDetector)
{
CV_Assert(!featureDetector_.empty());
}
protected:
void run(int)
{
const int nStepX = 2;
const int nStepY = 2;
const string imageFilename = string(ts->get_data_path()) + "/features2d/tsukuba.png";
Mat image = imread(imageFilename);
if(image.empty())
{
ts->printf(cvtest::TS::LOG, "Image %s can not be read.\n", imageFilename.c_str());
ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
return;
}
Mat mask(image.size(), CV_8U);
const int stepX = image.size().width / nStepX;
const int stepY = image.size().height / nStepY;
vector<KeyPoint> keyPoints;
vector<Point2f> points;
for(int i=0; i<nStepX; ++i)
for(int j=0; j<nStepY; ++j)
{
mask.setTo(0);
Rect whiteArea(i * stepX, j * stepY, stepX, stepY);
mask(whiteArea).setTo(255);
featureDetector_->detect(image, keyPoints, mask);
KeyPoint::convert(keyPoints, points);
for(size_t k=0; k<points.size(); ++k)
{
if ( !whiteArea.contains(points[k]) )
{
ts->printf(cvtest::TS::LOG, "The feature point is outside of the mask.");
ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
return;
}
}
}
ts->set_failed_test_info( cvtest::TS::OK );
}
Ptr<FeatureDetector> featureDetector_;
};
TEST(Features2d_SIFT_using_mask, regression)
{
FeatureDetectorUsingMaskTest test(Algorithm::create<FeatureDetector>("Feature2D.SIFT"));
test.safe_run();
}
TEST(DISABLED_Features2d_SURF_using_mask, regression)
{
FeatureDetectorUsingMaskTest test(Algorithm::create<FeatureDetector>("Feature2D.SURF"));
test.safe_run();
}

@ -245,6 +245,11 @@ namespace cv
operator Mat() const; operator Mat() const;
void download(cv::Mat &m) const; void download(cv::Mat &m) const;
//! convert to _InputArray
operator _InputArray();
//! convert to _OutputArray
operator _OutputArray();
//! returns a new oclMatrix header for the specified row //! returns a new oclMatrix header for the specified row
oclMat row(int y) const; oclMat row(int y) const;
@ -386,6 +391,9 @@ namespace cv
int wholecols; int wholecols;
}; };
// convert InputArray/OutputArray to oclMat references
CV_EXPORTS oclMat& getOclMatRef(InputArray src);
CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
///////////////////// mat split and merge ///////////////////////////////// ///////////////////// mat split and merge /////////////////////////////////
//! Compose a multi-channel array from several single-channel arrays //! Compose a multi-channel array from several single-channel arrays

@ -113,7 +113,7 @@ namespace cv
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3], void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); int depth, const char *build_options, FLUSH_MODE finish_mode = DISABLE);
// bind oclMat to OpenCL image textures // bind oclMat to OpenCL image textures
// note: // note:
// 1. there is no memory management. User need to explicitly release the resource // 1. there is no memory management. User need to explicitly release the resource

@ -52,6 +52,8 @@ int main(int argc, const char *argv[])
cerr << "no device found\n"; cerr << "no device found\n";
return -1; return -1;
} }
// set this to overwrite binary cache every time the test starts
ocl::setBinaryDiskCache(ocl::CACHE_UPDATE);
int devidx = 0; int devidx = 0;

@ -15,8 +15,8 @@
// Third party copyrights are property of their respective owners. // Third party copyrights are property of their respective owners.
// //
// @Authors // @Authors
// Chunpeng Zhang chunpeng@multicorewareinc.com // Fangfang Bai, fangfang@multicorewareinc.com
// // Jin Ma, jin@multicorewareinc.com
// //
// Redistribution and use in source and binary forms, with or without modification, // Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met: // are permitted provided that the following conditions are met:
@ -31,7 +31,7 @@
// * The name of the copyright holders may not be used to endorse or promote products // * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission. // derived from this software without specific prior written permission.
// //
// This software is provided by the copyright holders and contributors "as is" and // This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied // any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed. // warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct, // In no event shall the Intel Corporation or contributors be liable for any direct,
@ -45,50 +45,57 @@
//M*/ //M*/
#include "precomp.hpp" #include "precomp.hpp"
#include <iomanip> ///////////// StereoMatchBM ////////////////////////
PERFTEST(StereoMatchBM)
{
Mat left_image = imread(abspath("aloeL.jpg"), cv::IMREAD_GRAYSCALE);
Mat right_image = imread(abspath("aloeR.jpg"), cv::IMREAD_GRAYSCALE);
Mat disp,dst;
ocl::oclMat d_left, d_right,d_disp;
int n_disp= 128;
int winSize =19;
#ifdef HAVE_OPENCL SUBTEST << left_image.cols << 'x' << left_image.rows << "; aloeL.jpg ;"<< right_image.cols << 'x' << right_image.rows << "; aloeR.jpg ";
PARAM_TEST_CASE(ColumnSum, cv::Size) Ptr<StereoBM> bm = createStereoBM(n_disp, winSize);
{ bm->compute(left_image, right_image, dst);
cv::Size size;
cv::Mat src;
virtual void SetUp() CPU_ON;
{ bm->compute(left_image, right_image, dst);
size = GET_PARAM(0); CPU_OFF;
}
};
TEST_P(ColumnSum, Accuracy) d_left.upload(left_image);
{ d_right.upload(right_image);
cv::Mat src = randomMat(size, CV_32FC1);
cv::ocl::oclMat d_dst; ocl::StereoBM_OCL d_bm(0, n_disp, winSize);
cv::ocl::oclMat d_src(src);
WARMUP_ON;
cv::ocl::columnSum(d_src, d_dst); d_bm(d_left, d_right, d_disp);
WARMUP_OFF;
cv::Mat dst(d_dst);
cv::Mat ocl_mat;
for (int j = 0; j < src.cols; ++j) d_disp.download(ocl_mat);
{ ocl_mat.convertTo(ocl_mat, dst.type());
float gold = src.at<float>(0, j);
float res = dst.at<float>(0, j); GPU_ON;
ASSERT_NEAR(res, gold, 1e-5); d_bm(d_left, d_right, d_disp);
} GPU_OFF;
for (int i = 1; i < src.rows; ++i) GPU_FULL_ON;
{ d_left.upload(left_image);
for (int j = 0; j < src.cols; ++j) d_right.upload(right_image);
{ d_bm(d_left, d_right, d_disp);
float gold = src.at<float>(i, j) += src.at<float>(i - 1, j); d_disp.download(disp);
float res = dst.at<float>(i, j); GPU_FULL_OFF;
ASSERT_NEAR(res, gold, 1e-5);
} TestSystem::instance().setAccurate(-1, 0.);
}
} }
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES);
#endif

@ -284,6 +284,7 @@ PERFTEST(GaussianBlur)
Mat src, dst, ocl_dst; Mat src, dst, ocl_dst;
int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
const int ksize = 7;
for (int size = Min_Size; size <= Max_Size; size *= Multiple) for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{ {
@ -291,29 +292,28 @@ PERFTEST(GaussianBlur)
{ {
SUBTEST << size << 'x' << size << "; " << type_name[j] ; SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 5, 16); gen(src, size, size, all_type[j], 0, 256);
GaussianBlur(src, dst, Size(9, 9), 0); GaussianBlur(src, dst, Size(ksize, ksize), 0);
CPU_ON; CPU_ON;
GaussianBlur(src, dst, Size(9, 9), 0); GaussianBlur(src, dst, Size(ksize, ksize), 0);
CPU_OFF; CPU_OFF;
ocl::oclMat d_src(src); ocl::oclMat d_src(src);
ocl::oclMat d_dst(src.size(), src.type()); ocl::oclMat d_dst;
ocl::oclMat d_buf;
WARMUP_ON; WARMUP_ON;
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0);
WARMUP_OFF; WARMUP_OFF;
GPU_ON; GPU_ON;
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0);
GPU_OFF; GPU_OFF;
GPU_FULL_ON; GPU_FULL_ON;
d_src.upload(src); d_src.upload(src);
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0);
d_dst.download(ocl_dst); d_dst.download(ocl_dst);
GPU_FULL_OFF; GPU_FULL_OFF;

@ -46,11 +46,6 @@
#include "precomp.hpp" #include "precomp.hpp"
///////////// HOG//////////////////////// ///////////// HOG////////////////////////
bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
{
return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) &&
(abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
}
PERFTEST(HOG) PERFTEST(HOG)
{ {
@ -61,13 +56,12 @@ PERFTEST(HOG)
throw runtime_error("can't open road.png"); throw runtime_error("can't open road.png");
} }
cv::HOGDescriptor hog; cv::HOGDescriptor hog;
hog.setSVMDetector(hog.getDefaultPeopleDetector()); hog.setSVMDetector(hog.getDefaultPeopleDetector());
std::vector<cv::Rect> found_locations; std::vector<cv::Rect> found_locations;
std::vector<cv::Rect> d_found_locations; std::vector<cv::Rect> d_found_locations;
SUBTEST << 768 << 'x' << 576 << "; road.png"; SUBTEST << src.cols << 'x' << src.rows << "; road.png";
hog.detectMultiScale(src, found_locations); hog.detectMultiScale(src, found_locations);
@ -84,70 +78,10 @@ PERFTEST(HOG)
ocl_hog.detectMultiScale(d_src, d_found_locations); ocl_hog.detectMultiScale(d_src, d_found_locations);
WARMUP_OFF; WARMUP_OFF;
// Ground-truth rectangular people window if(d_found_locations.size() == found_locations.size())
cv::Rect win1_64x128(231, 190, 72, 144); TestSystem::instance().setAccurate(1, 0);
cv::Rect win2_64x128(621, 156, 97, 194); else
cv::Rect win1_48x96(238, 198, 63, 126); TestSystem::instance().setAccurate(0, abs((int)found_locations.size() - (int)d_found_locations.size()));
cv::Rect win2_48x96(619, 161, 92, 185);
cv::Rect win3_48x96(488, 136, 56, 112);
// Compare whether ground-truth windows are detected and compare the number of windows detected.
std::vector<int> d_comp(4);
std::vector<int> comp(4);
for(int i = 0; i < (int)d_comp.size(); i++)
{
d_comp[i] = 0;
comp[i] = 0;
}
int threshold = 10;
int val = 32;
d_comp[0] = (int)d_found_locations.size();
comp[0] = (int)found_locations.size();
cv::Size winSize = hog.winSize;
if (winSize == cv::Size(48, 96))
{
for(int i = 0; i < (int)d_found_locations.size(); i++)
{
if (match_rect(d_found_locations[i], win1_48x96, threshold))
d_comp[1] = val;
if (match_rect(d_found_locations[i], win2_48x96, threshold))
d_comp[2] = val;
if (match_rect(d_found_locations[i], win3_48x96, threshold))
d_comp[3] = val;
}
for(int i = 0; i < (int)found_locations.size(); i++)
{
if (match_rect(found_locations[i], win1_48x96, threshold))
comp[1] = val;
if (match_rect(found_locations[i], win2_48x96, threshold))
comp[2] = val;
if (match_rect(found_locations[i], win3_48x96, threshold))
comp[3] = val;
}
}
else if (winSize == cv::Size(64, 128))
{
for(int i = 0; i < (int)d_found_locations.size(); i++)
{
if (match_rect(d_found_locations[i], win1_64x128, threshold))
d_comp[1] = val;
if (match_rect(d_found_locations[i], win2_64x128, threshold))
d_comp[2] = val;
}
for(int i = 0; i < (int)found_locations.size(); i++)
{
if (match_rect(found_locations[i], win1_64x128, threshold))
comp[1] = val;
if (match_rect(found_locations[i], win2_64x128, threshold))
comp[2] = val;
}
}
cv::Mat gpu_rst(d_comp), cpu_rst(comp);
TestSystem::instance().ExpectedMatNear(gpu_rst, cpu_rst, 3);
GPU_ON; GPU_ON;
ocl_hog.detectMultiScale(d_src, found_locations); ocl_hog.detectMultiScale(d_src, found_locations);

@ -743,12 +743,12 @@ PERFTEST(meanShiftFiltering)
WARMUP_OFF; WARMUP_OFF;
GPU_ON; GPU_ON;
ocl::meanShiftFiltering(d_src, d_dst, sp, sr); ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit);
GPU_OFF; GPU_OFF;
GPU_FULL_ON; GPU_FULL_ON;
d_src.upload(src); d_src.upload(src);
ocl::meanShiftFiltering(d_src, d_dst, sp, sr); ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit);
d_dst.download(ocl_dst); d_dst.download(ocl_dst);
GPU_FULL_OFF; GPU_FULL_OFF;
@ -969,3 +969,45 @@ PERFTEST(CLAHE)
} }
} }
} }
///////////// columnSum////////////////////////
PERFTEST(columnSum)
{
Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
SUBTEST << size << 'x' << size << "; CV_32FC1";
gen(src, size, size, CV_32FC1, 0, 256);
CPU_ON;
dst.create(src.size(), src.type());
for (int j = 0; j < src.cols; j++)
dst.at<float>(0, j) = src.at<float>(0, j);
for (int i = 1; i < src.rows; ++i)
for (int j = 0; j < src.cols; ++j)
dst.at<float>(i, j) = dst.at<float>(i - 1 , j) + src.at<float>(i , j);
CPU_OFF;
d_src.upload(src);
WARMUP_ON;
ocl::columnSum(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::columnSum(d_src, d_dst);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::columnSum(d_src, d_dst);
d_dst.download(ocl_dst);
GPU_FULL_OFF;
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1);
}
}

@ -44,45 +44,49 @@
// //
//M*/ //M*/
#include "precomp.hpp" #include "precomp.hpp"
///////////// Moments ////////////////////////
///////////// columnSum//////////////////////// PERFTEST(Moments)
PERFTEST(columnSum)
{ {
Mat src, dst, ocl_dst; Mat src;
ocl::oclMat d_src, d_dst; bool binaryImage = 0;
int all_type[] = {CV_8UC1, CV_16SC1, CV_32FC1, CV_64FC1};
std::string type_name[] = {"CV_8UC1", "CV_16SC1", "CV_32FC1", "CV_64FC1"};
for (int size = Min_Size; size <= Max_Size; size *= Multiple) for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{ {
SUBTEST << size << 'x' << size << "; CV_32FC1"; for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j];
gen(src, size, size, CV_32FC1, 0, 256); gen(src, size, size, all_type[j], 0, 256);
CPU_ON; cv::Moments CvMom = moments(src, binaryImage);
dst.create(src.size(), src.type());
for (int j = 0; j < src.cols; j++)
dst.at<float>(0, j) = src.at<float>(0, j);
for (int i = 1; i < src.rows; ++i) CPU_ON;
for (int j = 0; j < src.cols; ++j) moments(src, binaryImage);
dst.at<float>(i, j) = dst.at<float>(i - 1 , j) + src.at<float>(i , j);
CPU_OFF; CPU_OFF;
d_src.upload(src); cv::Moments oclMom;
WARMUP_ON; WARMUP_ON;
ocl::columnSum(d_src, d_dst); oclMom = ocl::ocl_moments(src, binaryImage);
WARMUP_OFF; WARMUP_OFF;
Mat gpu_dst, cpu_dst;
HuMoments(CvMom, cpu_dst);
HuMoments(oclMom, gpu_dst);
GPU_ON; GPU_ON;
ocl::columnSum(d_src, d_dst); ocl::ocl_moments(src, binaryImage);
GPU_OFF; GPU_OFF;
GPU_FULL_ON; GPU_FULL_ON;
d_src.upload(src); ocl::ocl_moments(src, binaryImage);
ocl::columnSum(d_src, d_dst);
d_dst.download(ocl_dst);
GPU_FULL_OFF; GPU_FULL_OFF;
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1); TestSystem::instance().ExpectedMatNear(gpu_dst, cpu_dst, .5);
}
} }
} }

@ -331,20 +331,6 @@ void TestSystem::printMetrics(int is_accurate, double cpu_time, double gpu_time,
cout << setiosflags(ios_base::left); cout << setiosflags(ios_base::left);
stringstream stream; stringstream stream;
#if 0
if(is_accurate == 1)
stream << "Pass";
else if(is_accurate_ == 0)
stream << "Fail";
else if(is_accurate == -1)
stream << " ";
else
{
std::cout<<"is_accurate errer: "<<is_accurate<<"\n";
exit(-1);
}
#endif
std::stringstream &cur_subtest_description = getCurSubtestDescription(); std::stringstream &cur_subtest_description = getCurSubtestDescription();
#if GTEST_OS_WINDOWS&&!GTEST_OS_WINDOWS_MOBILE #if GTEST_OS_WINDOWS&&!GTEST_OS_WINDOWS_MOBILE

@ -48,13 +48,107 @@
using namespace cv; using namespace cv;
using namespace cv::ocl; using namespace cv::ocl;
#define CELL_WIDTH 8 #define CELL_WIDTH 8
#define CELL_HEIGHT 8 #define CELL_HEIGHT 8
#define CELLS_PER_BLOCK_X 2 #define CELLS_PER_BLOCK_X 2
#define CELLS_PER_BLOCK_Y 2 #define CELLS_PER_BLOCK_Y 2
#define NTHREADS 256 #define NTHREADS 256
static oclMat gauss_w_lut;
static bool hog_device_cpu;
/* pre-compute gaussian and interp_weight lookup tables if sigma is 4.0f */
static const float gaussian_interp_lut[] =
{
/* gaussian lut */
0.01831564f, 0.02926831f, 0.04393693f, 0.06196101f, 0.08208500f, 0.10215643f,
0.11943297f, 0.13117145f, 0.13533528f, 0.13117145f, 0.11943297f, 0.10215643f,
0.08208500f, 0.06196101f, 0.04393693f, 0.02926831f, 0.02926831f, 0.04677062f,
0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, 0.19085334f, 0.20961139f,
0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, 0.13117145f, 0.09901341f,
0.07021102f, 0.04677062f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f,
0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f,
0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f,
0.06196101f, 0.09901341f, 0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f,
0.40403652f, 0.44374731f, 0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f,
0.27768996f, 0.20961139f, 0.14863673f, 0.09901341f, 0.08208500f, 0.13117145f,
0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, 0.53526145f, 0.58786964f,
0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, 0.36787945f, 0.27768996f,
0.19691168f, 0.13117145f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f,
0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f,
0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f,
0.11943297f, 0.19085334f, 0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f,
0.77880079f, 0.85534531f, 0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f,
0.53526145f, 0.40403652f, 0.28650481f, 0.19085334f, 0.13117145f, 0.20961139f,
0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, 0.85534531f, 0.93941307f,
0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, 0.58786964f, 0.44374731f,
0.31466395f, 0.20961139f, 0.13533528f, 0.21626517f, 0.32465246f, 0.45783335f,
0.60653067f, 0.75483960f, 0.88249689f, 0.96923321f, 1.00000000f, 0.96923321f,
0.88249689f, 0.75483960f, 0.60653067f, 0.45783335f, 0.32465246f, 0.21626517f,
0.13117145f, 0.20961139f, 0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f,
0.85534531f, 0.93941307f, 0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f,
0.58786964f, 0.44374731f, 0.31466395f, 0.20961139f, 0.11943297f, 0.19085334f,
0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, 0.77880079f, 0.85534531f,
0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, 0.53526145f, 0.40403652f,
0.28650481f, 0.19085334f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f,
0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f,
0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f,
0.08208500f, 0.13117145f, 0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f,
0.53526145f, 0.58786964f, 0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f,
0.36787945f, 0.27768996f, 0.19691168f, 0.13117145f, 0.06196101f, 0.09901341f,
0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, 0.40403652f, 0.44374731f,
0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, 0.27768996f, 0.20961139f,
0.14863673f, 0.09901341f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f,
0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f,
0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f,
0.02926831f, 0.04677062f, 0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f,
0.19085334f, 0.20961139f, 0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f,
0.13117145f, 0.09901341f, 0.07021102f, 0.04677062f,
/* interp_weight lut */
0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f,
0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f,
0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f, 0.01171875f, 0.03515625f,
0.05859375f, 0.08203125f, 0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f,
0.17578125f, 0.15234375f, 0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f,
0.03515625f, 0.01171875f, 0.01953125f, 0.05859375f, 0.09765625f, 0.13671875f,
0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, 0.29296875f, 0.25390625f,
0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, 0.05859375f, 0.01953125f,
0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f,
0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f,
0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.03515625f, 0.10546875f,
0.17578125f, 0.24609375f, 0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f,
0.52734375f, 0.45703125f, 0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f,
0.10546875f, 0.03515625f, 0.04296875f, 0.12890625f, 0.21484375f, 0.30078125f,
0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, 0.64453125f, 0.55859375f,
0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, 0.12890625f, 0.04296875f,
0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f,
0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f,
0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.05859375f, 0.17578125f,
0.29296875f, 0.41015625f, 0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f,
0.87890625f, 0.76171875f, 0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f,
0.17578125f, 0.05859375f, 0.05859375f, 0.17578125f, 0.29296875f, 0.41015625f,
0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, 0.87890625f, 0.76171875f,
0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, 0.17578125f, 0.05859375f,
0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f,
0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f,
0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.04296875f, 0.12890625f,
0.21484375f, 0.30078125f, 0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f,
0.64453125f, 0.55859375f, 0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f,
0.12890625f, 0.04296875f, 0.03515625f, 0.10546875f, 0.17578125f, 0.24609375f,
0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, 0.52734375f, 0.45703125f,
0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, 0.10546875f, 0.03515625f,
0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f,
0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f,
0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.01953125f, 0.05859375f,
0.09765625f, 0.13671875f, 0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f,
0.29296875f, 0.25390625f, 0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f,
0.05859375f, 0.01953125f, 0.01171875f, 0.03515625f, 0.05859375f, 0.08203125f,
0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, 0.17578125f, 0.15234375f,
0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, 0.03515625f, 0.01171875f,
0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f,
0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f,
0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f
};
namespace cv namespace cv
{ {
namespace ocl namespace ocl
@ -78,38 +172,43 @@ namespace cv
int cnblocks_win_x; int cnblocks_win_x;
int cnblocks_win_y; int cnblocks_win_y;
int cblock_hist_size; int cblock_hist_size;
int cblock_hist_size_2up;
int cdescr_size; int cdescr_size;
int cdescr_width; int cdescr_width;
int cdescr_height;
void set_up_constants(int nbins, int block_stride_x, int block_stride_y, void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y); int nblocks_win_x, int nblocks_win_y);
void compute_hists(int nbins, int block_stride_x, int blovck_stride_y, void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
int height, int width, const cv::ocl::oclMat &grad, int height, int width, float sigma, const cv::ocl::oclMat &grad,
const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists); const cv::ocl::oclMat &qangle,
const cv::ocl::oclMat &gauss_w_lut, cv::ocl::oclMat &block_hists);
void normalize_hists(int nbins, int block_stride_x, int block_stride_y, void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, cv::ocl::oclMat &block_hists, float threshold); int height, int width, cv::ocl::oclMat &block_hists,
float threshold);
void classify_hists(int win_height, int win_width, int block_stride_y, void classify_hists(int win_height, int win_width, int block_stride_y,
int block_stride_x, int win_stride_y, int win_stride_x, int height, int block_stride_x, int win_stride_y, int win_stride_x,
int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef, int height, int width, const cv::ocl::oclMat &block_hists,
const cv::ocl::oclMat &coefs, float free_coef,
float threshold, cv::ocl::oclMat &labels); float threshold, cv::ocl::oclMat &labels);
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y,
int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists, int block_stride_x, int win_stride_y, int win_stride_x,
int height, int width, const cv::ocl::oclMat &block_hists,
cv::ocl::oclMat &descriptors); cv::ocl::oclMat &descriptors);
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y,
int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists, int block_stride_x, int win_stride_y, int win_stride_x,
int height, int width, const cv::ocl::oclMat &block_hists,
cv::ocl::oclMat &descriptors); cv::ocl::oclMat &descriptors);
void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img,
float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma); float angle_scale, cv::ocl::oclMat &grad,
cv::ocl::oclMat &qangle, bool correct_gamma);
void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img,
float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma); float angle_scale, cv::ocl::oclMat &grad,
cv::ocl::oclMat &qangle, bool correct_gamma);
void resize( const oclMat &src, oclMat &dst, const Size sz);
} }
} }
} }
@ -117,8 +216,14 @@ namespace cv
using namespace ::cv::ocl::device; using namespace ::cv::ocl::device;
cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_, static inline int divUp(int total, int grain)
int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_) {
return (total + grain - 1) / grain;
}
cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_,
Size cell_size_, int nbins_, double win_sigma_,
double threshold_L2hys_, bool gamma_correction_, int nlevels_)
: win_size(win_size_), : win_size(win_size_),
block_size(block_size_), block_size(block_size_),
block_stride(block_stride_), block_stride(block_stride_),
@ -132,19 +237,27 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 && CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 &&
(win_size.height - block_size.height) % block_stride.height == 0); (win_size.height - block_size.height) % block_stride.height == 0);
CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0); CV_Assert(block_size.width % cell_size.width == 0 &&
block_size.height % cell_size.height == 0);
CV_Assert(block_stride == cell_size); CV_Assert(block_stride == cell_size);
CV_Assert(cell_size == Size(8, 8)); CV_Assert(cell_size == Size(8, 8));
Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); Size cells_per_block(block_size.width / cell_size.width,
block_size.height / cell_size.height);
CV_Assert(cells_per_block == Size(2, 2)); CV_Assert(cells_per_block == Size(2, 2));
cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height); hog::set_up_constants(nbins, block_stride.width, block_stride.height,
blocks_per_win.width, blocks_per_win.height);
effect_size = Size(0, 0); effect_size = Size(0, 0);
if (queryDeviceInfo<IS_CPU_DEVICE, bool>())
hog_device_cpu = true;
else
hog_device_cpu = false;
} }
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
@ -154,7 +267,8 @@ size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const
{ {
Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); Size cells_per_block = Size(block_size.width / cell_size.width,
block_size.height / cell_size.height);
return (size_t)(nbins * cells_per_block.area()); return (size_t)(nbins * cells_per_block.area());
} }
@ -167,7 +281,8 @@ bool cv::ocl::HOGDescriptor::checkDetectorSize() const
{ {
size_t detector_size = detector.rows * detector.cols; size_t detector_size = detector.rows * detector.cols;
size_t descriptor_size = getDescriptorSize(); size_t descriptor_size = getDescriptorSize();
return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1; return detector_size == 0 || detector_size == descriptor_size ||
detector_size == descriptor_size + 1;
} }
void cv::ocl::HOGDescriptor::setSVMDetector(const std::vector<float> &_detector) void cv::ocl::HOGDescriptor::setSVMDetector(const std::vector<float> &_detector)
@ -207,10 +322,16 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
const size_t block_hist_size = getBlockHistogramSize(); const size_t block_hist_size = getBlockHistogramSize();
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride); const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F); block_hists.create(1,
static_cast<int>(block_hist_size * blocks_per_img.area()) + 256, CV_32F);
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
labels.create(1, wins_per_img.area(), CV_8U); labels.create(1, wins_per_img.area(), CV_8U);
std::vector<float> v_lut = std::vector<float>(gaussian_interp_lut, gaussian_interp_lut +
sizeof(gaussian_interp_lut) / sizeof(gaussian_interp_lut[0]));
Mat m_lut(v_lut);
gauss_w_lut.upload(m_lut.reshape(1,1));
} }
void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle) void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle)
@ -221,10 +342,12 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oc
switch (img.type()) switch (img.type())
{ {
case CV_8UC1: case CV_8UC1:
hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction); hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img,
angleScale, grad, qangle, gamma_correction);
break; break;
case CV_8UC4: case CV_8UC4:
hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction); hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img,
angleScale, grad, qangle, gamma_correction);
break; break;
} }
} }
@ -232,19 +355,21 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oc
void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img) void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img)
{ {
computeGradient(img, grad, qangle); computeGradient(img, this->grad, this->qangle);
hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
grad, qangle, (float)getWinSigma(), block_hists); effect_size.width, (float)getWinSigma(), grad, qangle, gauss_w_lut, block_hists);
hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
block_hists, (float)threshold_L2hys); effect_size.width, block_hists, (float)threshold_L2hys);
} }
void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, oclMat &descriptors, int descr_format) void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride,
oclMat &descriptors, int descr_format)
{ {
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); CV_Assert(win_stride.width % block_stride.width == 0 &&
win_stride.height % block_stride.height == 0);
init_buffer(img, win_stride); init_buffer(img, win_stride);
@ -254,17 +379,20 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride,
Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride); Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride);
descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F); descriptors.create(wins_per_img.area(),
static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
switch (descr_format) switch (descr_format)
{ {
case DESCR_FORMAT_ROW_BY_ROW: case DESCR_FORMAT_ROW_BY_ROW:
hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width, hog::extract_descrs_by_rows(win_size.height, win_size.width,
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); block_stride.height, block_stride.width, win_stride.height, win_stride.width,
effect_size.height, effect_size.width, block_hists, descriptors);
break; break;
case DESCR_FORMAT_COL_BY_COL: case DESCR_FORMAT_COL_BY_COL:
hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width, hog::extract_descrs_by_cols(win_size.height, win_size.width,
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); block_stride.height, block_stride.width, win_stride.height, win_stride.width,
effect_size.height, effect_size.width, block_hists, descriptors);
break; break;
default: default:
CV_Error(Error::StsBadArg, "Unknown descriptor format"); CV_Error(Error::StsBadArg, "Unknown descriptor format");
@ -272,7 +400,8 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride,
} }
void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits, double hit_threshold, Size win_stride, Size padding) void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits,
double hit_threshold, Size win_stride, Size padding)
{ {
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
CV_Assert(padding == Size(0, 0)); CV_Assert(padding == Size(0, 0));
@ -284,14 +413,16 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits,
if (win_stride == Size()) if (win_stride == Size())
win_stride = block_stride; win_stride = block_stride;
else else
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); CV_Assert(win_stride.width % block_stride.width == 0 &&
win_stride.height % block_stride.height == 0);
init_buffer(img, win_stride); init_buffer(img, win_stride);
computeBlockHistograms(img); computeBlockHistograms(img);
hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, hog::classify_hists(win_size.height, win_size.width, block_stride.height,
win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, block_stride.width, win_stride.height, win_stride.width,
detector, (float)free_coef, (float)hit_threshold, labels); effect_size.height, effect_size.width, block_hists, detector,
(float)free_coef, (float)hit_threshold, labels);
labels.download(labels_host); labels.download(labels_host);
unsigned char *vec = labels_host.ptr(); unsigned char *vec = labels_host.ptr();
@ -307,8 +438,9 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits,
void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations, double hit_threshold, void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
Size win_stride, Size padding, double scale0, int group_threshold) double hit_threshold, Size win_stride, Size padding,
double scale0, int group_threshold)
{ {
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
CV_Assert(scale0 > 1); CV_Assert(scale0 > 1);
@ -334,7 +466,8 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rec
if (win_stride == Size()) if (win_stride == Size())
win_stride = block_stride; win_stride = block_stride;
else else
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); CV_Assert(win_stride.width % block_stride.width == 0 &&
win_stride.height % block_stride.height == 0);
init_buffer(img, win_stride); init_buffer(img, win_stride);
image_scale.create(img.size(), img.type()); image_scale.create(img.size(), img.type());
@ -348,16 +481,17 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rec
} }
else else
{ {
hog::resize( img, image_scale, effect_size); resize(img, image_scale, effect_size);
detect(image_scale, locations, hit_threshold, win_stride, padding); detect(image_scale, locations, hit_threshold, win_stride, padding);
} }
Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale)); Size scaled_win_size(cvRound(win_size.width * scale),
cvRound(win_size.height * scale));
for (size_t j = 0; j < locations.size(); j++) for (size_t j = 0; j < locations.size(); j++)
all_candidates.push_back(Rect(Point2d(locations[j]) * scale, scaled_win_size)); all_candidates.push_back(Rect(Point2d(locations[j]) * scale, scaled_win_size));
} }
found_locations.assign(all_candidates.begin(), all_candidates.end()); found_locations.assign(all_candidates.begin(), all_candidates.end());
groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/); groupRectangles(found_locations, group_threshold, 0.2);
} }
int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride)
@ -365,9 +499,11 @@ int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride)
return (size - part_size + stride) / stride; return (size - part_size + stride) / stride;
} }
cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride) cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size,
cv::Size stride)
{ {
return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height)); return Size(numPartsWithin(size.width, part_size.width, stride.width),
numPartsWithin(size.height, part_size.height, stride.height));
} }
std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector() std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector()
@ -1548,7 +1684,8 @@ static int power_2up(unsigned int n)
return -1; // Input is too big return -1; // Input is too big
} }
void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int block_stride_y, void cv::ocl::device::hog::set_up_constants(int nbins,
int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y) int nblocks_win_x, int nblocks_win_y)
{ {
cnbins = nbins; cnbins = nbins;
@ -1560,29 +1697,31 @@ void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int b
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
cblock_hist_size = block_hist_size; cblock_hist_size = block_hist_size;
int block_hist_size_2up = power_2up(block_hist_size);
cblock_hist_size_2up = block_hist_size_2up;
int descr_width = nblocks_win_x * block_hist_size; int descr_width = nblocks_win_x * block_hist_size;
cdescr_width = descr_width; cdescr_width = descr_width;
cdescr_height = nblocks_win_y;
int descr_size = descr_width * nblocks_win_y; int descr_size = descr_width * nblocks_win_y;
cdescr_size = descr_size; cdescr_size = descr_size;
} }
void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y, void cv::ocl::device::hog::compute_hists(int nbins,
int height, int width, const cv::ocl::oclMat &grad, int block_stride_x, int block_stride_y,
const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists) int height, int width, float sigma,
const cv::ocl::oclMat &grad,
const cv::ocl::oclMat &qangle,
const cv::ocl::oclMat &gauss_w_lut,
cv::ocl::oclMat &block_hists)
{ {
Context *clCxt = Context::getContext(); Context *clCxt = Context::getContext();
String kernelName = "compute_hists_kernel";
std::vector< std::pair<size_t, const void *> > args; std::vector< std::pair<size_t, const void *> > args;
String kernelName = (sigma == 4.0f) ? "compute_hists_lut_kernel" :
"compute_hists_kernel";
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; / block_stride_x;
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y)
size_t globalThreads[3] = { img_block_width * 32, img_block_height * 2, 1 }; / block_stride_y;
size_t localThreads[3] = { 32, 2, 1 };
int grad_quadstep = grad.step >> 2; int grad_quadstep = grad.step >> 2;
int qangle_step = qangle.step; int qangle_step = qangle.step;
@ -1590,6 +1729,11 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc
// Precompute gaussian spatial window parameter // Precompute gaussian spatial window parameter
float scale = 1.f / (2.f * sigma * sigma); float scale = 1.f / (2.f * sigma * sigma);
int blocks_in_group = 4;
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
size_t globalThreads[3] = {
divUp(img_block_width * img_block_height, blocks_in_group) * localThreads[0], 2, 1 };
int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float); int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float);
int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float); int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float);
int smem = hists_size + final_hists_size; int smem = hists_size + final_hists_size;
@ -1604,19 +1748,26 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc
args.push_back( std::make_pair( sizeof(cl_int), (void *)&qangle_step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&qangle_step));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&grad.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&grad.data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&qangle.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&qangle.data));
if (kernelName.compare("compute_hists_lut_kernel") == 0)
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&gauss_w_lut.data));
else
args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale)); args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( std::make_pair( smem, (void *)NULL)); args.push_back( std::make_pair( smem, (void *)NULL));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
} }
void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y, void cv::ocl::device::hog::normalize_hists(int nbins,
int height, int width, cv::ocl::oclMat &block_hists, float threshold) int block_stride_x, int block_stride_y,
int height, int width,
cv::ocl::oclMat &block_hists,
float threshold)
{ {
Context *clCxt = Context::getContext(); Context *clCxt = Context::getContext();
String kernelName = "normalize_hists_kernel";
std::vector< std::pair<size_t, const void *> > args; std::vector< std::pair<size_t, const void *> > args;
String kernelName;
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
int nthreads = power_2up(block_hist_size); int nthreads = power_2up(block_hist_size);
@ -1626,40 +1777,90 @@ void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int bl
size_t globalThreads[3] = { img_block_width * nthreads, img_block_height, 1 }; size_t globalThreads[3] = { img_block_width * nthreads, img_block_height, 1 };
size_t localThreads[3] = { nthreads, 1, 1 }; size_t localThreads[3] = { nthreads, 1, 1 };
if ( nbins == 9 )
{
/* optimized for the case of 9 bins */
kernelName = "normalize_hists_36_kernel";
int blocks_in_group = NTHREADS / block_hist_size;
nthreads = blocks_in_group * block_hist_size;
int num_groups = divUp( img_block_width * img_block_height, blocks_in_group);
globalThreads[0] = nthreads * num_groups;
localThreads[0] = nthreads;
}
else
{
kernelName = "normalize_hists_kernel";
nthreads = power_2up(block_hist_size);
globalThreads[0] = img_block_width * nthreads;
globalThreads[1] = img_block_height;
localThreads[0] = nthreads;
if ((nthreads < 32) || (nthreads > 512) ) if ((nthreads < 32) || (nthreads > 512) )
cv::error(Error::StsBadArg, "normalize_hists: histogram's size is too small or too big", "cv::ocl::device::hog::normalize_hists", __FILE__, __LINE__); cv::error(Error::StsBadArg, "normalize_hists: histogram's size is too small or too big",
"normalize_hists", __FILE__, __LINE__);
args.push_back( std::make_pair( sizeof(cl_int), (void *)&nthreads)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&nthreads));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_hist_size)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_hist_size));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
}
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&threshold)); args.push_back( std::make_pair( sizeof(cl_float), (void *)&threshold));
args.push_back( std::make_pair( nthreads * sizeof(float), (void *)NULL)); args.push_back( std::make_pair( nthreads * sizeof(float), (void *)NULL));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); if(hog_device_cpu)
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1, "-D CPU");
else
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
} }
void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y, void cv::ocl::device::hog::classify_hists(int win_height, int win_width,
int block_stride_x, int win_stride_y, int win_stride_x, int height, int block_stride_y, int block_stride_x,
int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef, int win_stride_y, int win_stride_x,
float threshold, cv::ocl::oclMat &labels) int height, int width,
const cv::ocl::oclMat &block_hists,
const cv::ocl::oclMat &coefs,
float free_coef, float threshold,
cv::ocl::oclMat &labels)
{ {
Context *clCxt = Context::getContext(); Context *clCxt = Context::getContext();
String kernelName = "classify_hists_kernel";
std::vector< std::pair<size_t, const void *> > args; std::vector< std::pair<size_t, const void *> > args;
int nthreads;
String kernelName;
switch (cdescr_width)
{
case 180:
nthreads = 180;
kernelName = "classify_hists_180_kernel";
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_height));
break;
case 252:
nthreads = 256;
kernelName = "classify_hists_252_kernel";
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_height));
break;
default:
nthreads = 256;
kernelName = "classify_hists_kernel";
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
}
int win_block_stride_x = win_stride_x / block_stride_x; int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y; int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x; int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y; int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
size_t localThreads[3] = { NTHREADS, 1, 1 };
size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 };
size_t localThreads[3] = { nthreads, 1, 1 };
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_win_width)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_win_width));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_x)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
@ -1670,12 +1871,20 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int blo
args.push_back( std::make_pair( sizeof(cl_float), (void *)&threshold)); args.push_back( std::make_pair( sizeof(cl_float), (void *)&threshold));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&labels.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&labels.data));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); if(hog_device_cpu)
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1, "-D CPU");
else
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
} }
void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
int win_stride_y, int win_stride_x, int height, int width, int block_stride_y, int block_stride_x,
const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors) int win_stride_y, int win_stride_x,
int height, int width,
const cv::ocl::oclMat &block_hists,
cv::ocl::oclMat &descriptors)
{ {
Context *clCxt = Context::getContext(); Context *clCxt = Context::getContext();
String kernelName = "extract_descrs_by_rows_kernel"; String kernelName = "extract_descrs_by_rows_kernel";
@ -1685,7 +1894,8 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
int win_block_stride_y = win_stride_y / block_stride_y; int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x; int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y; int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
int descriptors_quadstep = descriptors.step >> 2; int descriptors_quadstep = descriptors.step >> 2;
size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
@ -1701,12 +1911,16 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
} }
void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
int win_stride_y, int win_stride_x, int height, int width, int block_stride_y, int block_stride_x,
const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors) int win_stride_y, int win_stride_x,
int height, int width,
const cv::ocl::oclMat &block_hists,
cv::ocl::oclMat &descriptors)
{ {
Context *clCxt = Context::getContext(); Context *clCxt = Context::getContext();
String kernelName = "extract_descrs_by_cols_kernel"; String kernelName = "extract_descrs_by_cols_kernel";
@ -1716,7 +1930,8 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
int win_block_stride_y = win_stride_y / block_stride_y; int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x; int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y; int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
int descriptors_quadstep = descriptors.step >> 2; int descriptors_quadstep = descriptors.step >> 2;
size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
@ -1733,16 +1948,16 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
} }
static inline int divUp(int total, int grain) void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
{ const cv::ocl::oclMat &img,
return (total + grain - 1) / grain; float angle_scale,
} cv::ocl::oclMat &grad,
cv::ocl::oclMat &qangle,
void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, bool correct_gamma)
float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma)
{ {
Context *clCxt = Context::getContext(); Context *clCxt = Context::getContext();
String kernelName = "compute_gradients_8UC1_kernel"; String kernelName = "compute_gradients_8UC1_kernel";
@ -1767,11 +1982,16 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const c
args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma)); args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1);
} }
void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma) const cv::ocl::oclMat &img,
float angle_scale,
cv::ocl::oclMat &grad,
cv::ocl::oclMat &qangle,
bool correct_gamma)
{ {
Context *clCxt = Context::getContext(); Context *clCxt = Context::getContext();
String kernelName = "compute_gradients_8UC4_kernel"; String kernelName = "compute_gradients_8UC4_kernel";
@ -1797,37 +2017,6 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c
args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma)); args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
} localThreads, args, -1, -1);
void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz)
{
CV_Assert( (src.channels() == dst.channels()) );
Context *clCxt = Context::getContext();
String kernelName = (src.type() == CV_8UC1) ? "resize_8UC1_kernel" : "resize_8UC4_kernel";
size_t blkSizeX = 16, blkSizeY = 16;
size_t glbSizeX = sz.width % blkSizeX == 0 ? sz.width : (sz.width / blkSizeX + 1) * blkSizeX;
size_t glbSizeY = sz.height % blkSizeY == 0 ? sz.height : (sz.height / blkSizeY + 1) * blkSizeY;
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
float ifx = (float)src.cols / sz.width;
float ify = (float)src.rows / sz.height;
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.offset));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.offset));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&sz.width));
args.push_back( std::make_pair(sizeof(cl_int), (void *)&sz.height));
args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifx));
args.push_back( std::make_pair(sizeof(cl_float), (void *)&ify));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
} }

@ -73,6 +73,7 @@ namespace cv
} }
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// convert_C3C4 // convert_C3C4
static void convert_C3C4(const cl_mem &src, oclMat &dst) static void convert_C3C4(const cl_mem &src, oclMat &dst)
@ -215,6 +216,34 @@ void cv::ocl::oclMat::upload(const Mat &m)
offset = ofs.y * step + ofs.x * elemSize(); offset = ofs.y * step + ofs.x * elemSize();
} }
cv::ocl::oclMat::operator cv::_InputArray()
{
_InputArray newInputArray;
newInputArray.flags = cv::_InputArray::OCL_MAT;
newInputArray.obj = reinterpret_cast<void *>(this);
return newInputArray;
}
cv::ocl::oclMat::operator cv::_OutputArray()
{
_OutputArray newOutputArray;
newOutputArray.flags = cv::_InputArray::OCL_MAT;
newOutputArray.obj = reinterpret_cast<void *>(this);
return newOutputArray;
}
cv::ocl::oclMat& cv::ocl::getOclMatRef(InputArray src)
{
CV_Assert(src.flags & cv::_InputArray::OCL_MAT);
return *reinterpret_cast<oclMat*>(src.obj);
}
cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src)
{
CV_Assert(src.flags & cv::_InputArray::OCL_MAT);
return *reinterpret_cast<oclMat*>(src.obj);
}
void cv::ocl::oclMat::download(cv::Mat &m) const void cv::ocl::oclMat::download(cv::Mat &m) const
{ {
CV_DbgAssert(!this->empty()); CV_DbgAssert(!this->empty());
@ -382,7 +411,7 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be
if( rtype < 0 ) if( rtype < 0 )
rtype = type(); rtype = type();
else else
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), oclchannels()); rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
//int scn = channels(); //int scn = channels();
int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype); int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype);

@ -80,7 +80,7 @@ namespace cv
// provide additional methods for the user to interact with the command queue after a task is fired // provide additional methods for the user to interact with the command queue after a task is fired
static void openCLExecuteKernel_2(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3], static void openCLExecuteKernel_2(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, char *build_options, FLUSH_MODE finish_mode) int depth, const char *build_options, FLUSH_MODE finish_mode)
{ {
//construct kernel name //construct kernel name
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
@ -133,7 +133,7 @@ namespace cv
} }
void openCLExecuteKernel2(Context *clCxt , const char **source, String kernelName, void openCLExecuteKernel2(Context *clCxt , const char **source, String kernelName,
size_t globalThreads[3], size_t localThreads[3], size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode) std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options, FLUSH_MODE finish_mode)
{ {
openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,

@ -43,7 +43,6 @@
// //
//M*/ //M*/
#define CELL_WIDTH 8 #define CELL_WIDTH 8
#define CELL_HEIGHT 8 #define CELL_HEIGHT 8
#define CELLS_PER_BLOCK_X 2 #define CELLS_PER_BLOCK_X 2
@ -51,6 +50,100 @@
#define NTHREADS 256 #define NTHREADS 256
#define CV_PI_F 3.1415926535897932384626433832795f #define CV_PI_F 3.1415926535897932384626433832795f
//----------------------------------------------------------------------------
// Histogram computation
// 12 threads for a cell, 12x4 threads per block
// Use pre-computed gaussian and interp_weight lookup tables if sigma is 4.0f
__kernel void compute_hists_lut_kernel(
const int cblock_stride_x, const int cblock_stride_y,
const int cnbins, const int cblock_hist_size, const int img_block_width,
const int blocks_in_group, const int blocks_total,
const int grad_quadstep, const int qangle_step,
__global const float* grad, __global const uchar* qangle,
__global const float* gauss_w_lut,
__global float* block_hists, __local float* smem)
{
const int lx = get_local_id(0);
const int lp = lx / 24; /* local group id */
const int gid = get_group_id(0) * blocks_in_group + lp;/* global group id */
const int gidY = gid / img_block_width;
const int gidX = gid - gidY * img_block_width;
const int lidX = lx - lp * 24;
const int lidY = get_local_id(1);
const int cell_x = lidX / 12;
const int cell_y = lidY;
const int cell_thread_x = lidX - cell_x * 12;
__local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X *
CELLS_PER_BLOCK_Y * 12 + CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y);
__local float* final_hist = hists + cnbins *
(CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12);
const int offset_x = gidX * cblock_stride_x + (cell_x << 2) + cell_thread_x;
const int offset_y = gidY * cblock_stride_y + (cell_y << 2);
__global const float* grad_ptr = (gid < blocks_total) ?
grad + offset_y * grad_quadstep + (offset_x << 1) : grad;
__global const uchar* qangle_ptr = (gid < blocks_total) ?
qangle + offset_y * qangle_step + (offset_x << 1) : qangle;
__local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) +
cell_thread_x;
for (int bin_id = 0; bin_id < cnbins; ++bin_id)
hist[bin_id * 48] = 0.f;
const int dist_x = -4 + cell_thread_x - 4 * cell_x;
const int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
const int dist_y_begin = -4 - 4 * lidY;
for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
{
float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]);
grad_ptr += grad_quadstep;
qangle_ptr += qangle_step;
int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
int idx = (dist_center_y + 8) * 16 + (dist_center_x + 8);
float gaussian = gauss_w_lut[idx];
idx = (dist_y + 8) * 16 + (dist_x + 8);
float interp_weight = gauss_w_lut[256+idx];
hist[bin.x * 48] += gaussian * interp_weight * vote.x;
hist[bin.y * 48] += gaussian * interp_weight * vote.y;
}
barrier(CLK_LOCAL_MEM_FENCE);
volatile __local float* hist_ = hist;
for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48)
{
if (cell_thread_x < 6)
hist_[0] += hist_[6];
barrier(CLK_LOCAL_MEM_FENCE);
if (cell_thread_x < 3)
hist_[0] += hist_[3];
#ifdef CPU
barrier(CLK_LOCAL_MEM_FENCE);
#endif
if (cell_thread_x == 0)
final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] =
hist_[0] + hist_[1] + hist_[2];
}
barrier(CLK_LOCAL_MEM_FENCE);
int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x;
if ((tid < cblock_hist_size) && (gid < blocks_total))
{
__global float* block_hist = block_hists +
(gidY * img_block_width + gidX) * cblock_hist_size;
block_hist[tid] = final_hist[tid];
}
}
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
// Histogram computation // Histogram computation
// 12 threads for a cell, 12x4 threads per block // 12 threads for a cell, 12x4 threads per block
@ -125,16 +218,14 @@ __kernel void compute_hists_kernel(
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (cell_thread_x < 3) if (cell_thread_x < 3)
hist_[0] += hist_[3]; hist_[0] += hist_[3];
#ifdef WAVE_SIZE_1 #ifdef CPU
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
#endif #endif
if (cell_thread_x == 0) if (cell_thread_x == 0)
final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] =
hist_[0] + hist_[1] + hist_[2]; hist_[0] + hist_[1] + hist_[2];
} }
#ifdef WAVE_SIZE_1
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
#endif
int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x; int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x;
if ((tid < cblock_hist_size) && (gid < blocks_total)) if ((tid < cblock_hist_size) && (gid < blocks_total))
@ -147,82 +238,107 @@ __kernel void compute_hists_kernel(
//------------------------------------------------------------- //-------------------------------------------------------------
// Normalization of histograms via L2Hys_norm // Normalization of histograms via L2Hys_norm
// // optimized for the case of 9 bins
float reduce_smem(volatile __local float* smem, int size) __kernel void normalize_hists_36_kernel(__global float* block_hists,
const float threshold, __local float *squares)
{ {
unsigned int tid = get_local_id(0); const int tid = get_local_id(0);
float sum = smem[tid]; const int gid = get_global_id(0);
const int bid = tid / 36; /* block-hist id, (0 - 6) */
const int boffset = bid * 36; /* block-hist offset in the work-group */
const int hid = tid - boffset; /* histogram bin id, (0 - 35) */
if (size >= 512) float elem = block_hists[gid];
{ squares[tid] = elem * elem;
if (tid < 256) smem[tid] = sum = sum + smem[tid + 256];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
}
if (size >= 256) __local float* smem = squares + boffset;
{ float sum = smem[hid];
if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; if (hid < 18)
smem[hid] = sum = sum + smem[hid + 18];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} if (hid < 9)
if (size >= 128) smem[hid] = sum = sum + smem[hid + 9];
{
if (tid < 64) smem[tid] = sum = sum + smem[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} if (hid < 4)
smem[hid] = sum + smem[hid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
elem = elem / (sqrt(sum) + 3.6f);
elem = min(elem, threshold);
barrier(CLK_LOCAL_MEM_FENCE);
squares[tid] = elem * elem;
barrier(CLK_LOCAL_MEM_FENCE);
sum = smem[hid];
if (hid < 18)
smem[hid] = sum = sum + smem[hid + 18];
barrier(CLK_LOCAL_MEM_FENCE);
if (hid < 9)
smem[hid] = sum = sum + smem[hid + 9];
barrier(CLK_LOCAL_MEM_FENCE);
if (hid < 4)
smem[hid] = sum + smem[hid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
block_hists[gid] = elem / (sqrt(sum) + 1e-3f);
}
//-------------------------------------------------------------
// Normalization of histograms via L2Hys_norm
//
float reduce_smem(volatile __local float* smem, int size)
{
unsigned int tid = get_local_id(0);
float sum = smem[tid];
if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE); }
#ifdef CPU
if (size >= 64) { if (tid < 32) smem[tid] = sum = sum + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 32) { if (tid < 16) smem[tid] = sum = sum + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 16) { if (tid < 8) smem[tid] = sum = sum + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 8) { if (tid < 4) smem[tid] = sum = sum + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 4) { if (tid < 2) smem[tid] = sum = sum + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE); }
if (size >= 2) { if (tid < 1) smem[tid] = sum = sum + smem[tid + 1];
barrier(CLK_LOCAL_MEM_FENCE); }
#else
if (tid < 32) if (tid < 32)
{ {
if (size >= 64) smem[tid] = sum = sum + smem[tid + 32]; if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1)
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
#endif
if (size >= 32) smem[tid] = sum = sum + smem[tid + 16]; if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
#ifdef WAVE_SIZE_1
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
#endif
if (size >= 16) smem[tid] = sum = sum + smem[tid + 8]; if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
#ifdef WAVE_SIZE_1
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
#endif
if (size >= 8) smem[tid] = sum = sum + smem[tid + 4]; if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
#ifdef WAVE_SIZE_1
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
{
#endif
if (size >= 4) smem[tid] = sum = sum + smem[tid + 2]; if (size >= 4) smem[tid] = sum = sum + smem[tid + 2];
#ifdef WAVE_SIZE_1
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1)
{
#endif
if (size >= 2) smem[tid] = sum = sum + smem[tid + 1]; if (size >= 2) smem[tid] = sum = sum + smem[tid + 1];
} }
#endif
barrier(CLK_LOCAL_MEM_FENCE);
sum = smem[0];
return sum; return sum;
} }
__kernel void normalize_hists_kernel(const int nthreads, const int block_hist_size, const int img_block_width, __kernel void normalize_hists_kernel(
const int nthreads, const int block_hist_size, const int img_block_width,
__global float* block_hists, const float threshold, __local float *squares) __global float* block_hists, const float threshold, __local float *squares)
{ {
const int tid = get_local_id(0); const int tid = get_local_id(0);
const int gidX = get_group_id(0); const int gidX = get_group_id(0);
const int gidY = get_group_id(1); const int gidY = get_group_id(1);
__global float* hist = block_hists + (gidY * img_block_width + gidX) * block_hist_size + tid; __global float* hist = block_hists + (gidY * img_block_width + gidX) *
block_hist_size + tid;
float elem = 0.f; float elem = 0.f;
if (tid < block_hist_size) if (tid < block_hist_size)
@ -249,8 +365,10 @@ __kernel void normalize_hists_kernel(const int nthreads, const int block_hist_si
//--------------------------------------------------------------------- //---------------------------------------------------------------------
// Linear SVM based classification // Linear SVM based classification
// // 48x96 window, 9 bins and default parameters
__kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr_size, const int cdescr_width, // 180 threads, each thread corresponds to a bin in a row
__kernel void classify_hists_180_kernel(
const int cdescr_width, const int cdescr_height, const int cblock_hist_size,
const int img_win_width, const int img_block_width, const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y, const int win_block_stride_x, const int win_block_stride_y,
__global const float * block_hists, __global const float* coefs, __global const float * block_hists, __global const float* coefs,
@ -260,78 +378,200 @@ __kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr
const int gidX = get_group_id(0); const int gidX = get_group_id(0);
const int gidY = get_group_id(1); const int gidY = get_group_id(1);
__global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; __global const float* hist = block_hists + (gidY * win_block_stride_y *
img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
float product = 0.f; float product = 0.f;
for (int i = tid; i < cdescr_size; i += NTHREADS)
for (int i = 0; i < cdescr_height; i++)
{ {
int offset_y = i / cdescr_width; product += coefs[i * cdescr_width + tid] *
int offset_x = i - offset_y * cdescr_width; hist[i * img_block_width * cblock_hist_size + tid];
product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x];
} }
__local float products[NTHREADS]; __local float products[180];
products[tid] = product; products[tid] = product;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128) products[tid] = product = product + products[tid + 128]; if (tid < 90) products[tid] = product = product + products[tid + 90];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64) products[tid] = product = product + products[tid + 64]; if (tid < 45) products[tid] = product = product + products[tid + 45];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
volatile __local float* smem = products; volatile __local float* smem = products;
if (tid < 32) #ifdef CPU
if (tid < 13) smem[tid] = product = product + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) smem[tid] = product = product + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<8) smem[tid] = product = product + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<4) smem[tid] = product = product + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<2) smem[tid] = product = product + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
#else
if (tid < 13)
{ {
smem[tid] = product = product + smem[tid + 32]; smem[tid] = product = product + smem[tid + 32];
#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1)
} }
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) if (tid < 16)
{ {
#endif
smem[tid] = product = product + smem[tid + 16]; smem[tid] = product = product + smem[tid + 16];
#ifdef WAVE_SIZE_1 smem[tid] = product = product + smem[tid + 8];
smem[tid] = product = product + smem[tid + 4];
smem[tid] = product = product + smem[tid + 2];
} }
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
#endif #endif
smem[tid] = product = product + smem[tid + 8];
#ifdef WAVE_SIZE_1 if (tid == 0){
product = product + smem[tid + 1];
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
} }
barrier(CLK_LOCAL_MEM_FENCE); }
if (tid < 4)
//---------------------------------------------------------------------
// Linear SVM based classification
// 64x128 window, 9 bins and default parameters
// 256 threads, 252 of them are used
__kernel void classify_hists_252_kernel(
const int cdescr_width, const int cdescr_height, const int cblock_hist_size,
const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
__global const float * block_hists, __global const float* coefs,
float free_coef, float threshold, __global uchar* labels)
{
const int tid = get_local_id(0);
const int gidX = get_group_id(0);
const int gidY = get_group_id(1);
__global const float* hist = block_hists + (gidY * win_block_stride_y *
img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
float product = 0.f;
if (tid < cdescr_width)
{ {
#endif for (int i = 0; i < cdescr_height; i++)
smem[tid] = product = product + smem[tid + 4]; product += coefs[i * cdescr_width + tid] *
#ifdef WAVE_SIZE_1 hist[i * img_block_width * cblock_hist_size + tid];
} }
__local float products[NTHREADS];
products[tid] = product;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128) products[tid] = product = product + products[tid + 128];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64) products[tid] = product = product + products[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
volatile __local float* smem = products;
#ifdef CPU
if(tid<32) smem[tid] = product = product + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<16) smem[tid] = product = product + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<8) smem[tid] = product = product + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<4) smem[tid] = product = product + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<2) smem[tid] = product = product + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
#else
if (tid < 32)
{ {
#endif smem[tid] = product = product + smem[tid + 32];
smem[tid] = product = product + smem[tid + 16];
smem[tid] = product = product + smem[tid + 8];
smem[tid] = product = product + smem[tid + 4];
smem[tid] = product = product + smem[tid + 2]; smem[tid] = product = product + smem[tid + 2];
#ifdef WAVE_SIZE_1
} }
#endif
if (tid == 0){
product = product + smem[tid + 1];
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
}
}
//---------------------------------------------------------------------
// Linear SVM based classification
// 256 threads
__kernel void classify_hists_kernel(
const int cdescr_size, const int cdescr_width, const int cblock_hist_size,
const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
__global const float * block_hists, __global const float* coefs,
float free_coef, float threshold, __global uchar* labels)
{
const int tid = get_local_id(0);
const int gidX = get_group_id(0);
const int gidY = get_group_id(1);
__global const float* hist = block_hists + (gidY * win_block_stride_y *
img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
float product = 0.f;
for (int i = tid; i < cdescr_size; i += NTHREADS)
{
int offset_y = i / cdescr_width;
int offset_x = i - offset_y * cdescr_width;
product += coefs[i] *
hist[offset_y * img_block_width * cblock_hist_size + offset_x];
}
__local float products[NTHREADS];
products[tid] = product;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128) products[tid] = product = product + products[tid + 128];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64) products[tid] = product = product + products[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE);
volatile __local float* smem = products;
#ifdef CPU
if(tid<32) smem[tid] = product = product + smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<16) smem[tid] = product = product + smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1) if(tid<8) smem[tid] = product = product + smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<4) smem[tid] = product = product + smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if(tid<2) smem[tid] = product = product + smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
#else
if (tid < 32)
{ {
smem[tid] = product = product + smem[tid + 32];
smem[tid] = product = product + smem[tid + 16];
smem[tid] = product = product + smem[tid + 8];
smem[tid] = product = product + smem[tid + 4];
smem[tid] = product = product + smem[tid + 2];
}
#endif #endif
if (tid == 0){
smem[tid] = product = product + smem[tid + 1]; smem[tid] = product = product + smem[tid + 1];
}
if (tid == 0)
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold); labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
}
} }
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
// Extract descriptors // Extract descriptors
__kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, const int cdescr_width, __kernel void extract_descrs_by_rows_kernel(
const int img_block_width, const int win_block_stride_x, const int win_block_stride_y, const int cblock_hist_size, const int descriptors_quadstep,
const int cdescr_size, const int cdescr_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
__global const float* block_hists, __global float* descriptors) __global const float* block_hists, __global float* descriptors)
{ {
int tid = get_local_id(0); int tid = get_local_id(0);
@ -339,10 +579,12 @@ __kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const in
int gidY = get_group_id(1); int gidY = get_group_id(1);
// Get left top corner of the window in src // Get left top corner of the window in src
__global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; __global const float* hist = block_hists + (gidY * win_block_stride_y *
img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
// Get left top corner of the window in dst // Get left top corner of the window in dst
__global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; __global float* descriptor = descriptors +
(gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
// Copy elements from src to dst // Copy elements from src to dst
for (int i = tid; i < cdescr_size; i += NTHREADS) for (int i = tid; i < cdescr_size; i += NTHREADS)
@ -353,19 +595,23 @@ __kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const in
} }
} }
__kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, __kernel void extract_descrs_by_cols_kernel(
const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, const int win_block_stride_x, const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size,
const int win_block_stride_y, __global const float* block_hists, __global float* descriptors) const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
__global const float* block_hists, __global float* descriptors)
{ {
int tid = get_local_id(0); int tid = get_local_id(0);
int gidX = get_group_id(0); int gidX = get_group_id(0);
int gidY = get_group_id(1); int gidY = get_group_id(1);
// Get left top corner of the window in src // Get left top corner of the window in src
__global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; __global const float* hist = block_hists + (gidY * win_block_stride_y *
img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
// Get left top corner of the window in dst // Get left top corner of the window in dst
__global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; __global float* descriptor = descriptors +
(gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
// Copy elements from src to dst // Copy elements from src to dst
for (int i = tid; i < cdescr_size; i += NTHREADS) for (int i = tid; i < cdescr_size; i += NTHREADS)
@ -376,14 +622,17 @@ __kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const in
int y = block_idx / cnblocks_win_x; int y = block_idx / cnblocks_win_x;
int x = block_idx - y * cnblocks_win_x; int x = block_idx - y * cnblocks_win_x;
descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] = hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block]; descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] =
hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block];
} }
} }
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
// Gradients computation // Gradients computation
__kernel void compute_gradients_8UC4_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step, __kernel void compute_gradients_8UC4_kernel(
const int height, const int width,
const int img_step, const int grad_quadstep, const int qangle_step,
const __global uchar4 * img, __global float * grad, __global uchar * qangle, const __global uchar4 * img, __global float * grad, __global uchar * qangle,
const float angle_scale, const char correct_gamma, const int cnbins) const float angle_scale, const char correct_gamma, const int cnbins)
{ {
@ -426,8 +675,10 @@ __kernel void compute_gradients_8UC4_kernel(const int height, const int width, c
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (x < width) if (x < width)
{ {
float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)], sh_row[tid + 2 * (NTHREADS + 2)]); float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)],
float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)], sh_row[tid + 2 + 2 * (NTHREADS + 2)]); sh_row[tid + 2 * (NTHREADS + 2)]);
float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)],
sh_row[tid + 2 + 2 * (NTHREADS + 2)]);
float3 dx; float3 dx;
if (correct_gamma == 1) if (correct_gamma == 1)
@ -482,7 +733,9 @@ __kernel void compute_gradients_8UC4_kernel(const int height, const int width, c
} }
} }
__kernel void compute_gradients_8UC1_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step, __kernel void compute_gradients_8UC1_kernel(
const int height, const int width,
const int img_step, const int grad_quadstep, const int qangle_step,
__global const uchar * img, __global float * grad, __global uchar * qangle, __global const uchar * img, __global float * grad, __global uchar * qangle,
const float angle_scale, const char correct_gamma, const int cnbins) const float angle_scale, const char correct_gamma, const int cnbins)
{ {
@ -540,42 +793,3 @@ __kernel void compute_gradients_8UC1_kernel(const int height, const int width, c
grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang; grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang;
} }
} }
//----------------------------------------------------------------------------
// Resize
__kernel void resize_8UC4_kernel(__global uchar4 * dst, __global const uchar4 * src,
int dst_offset, int src_offset, int dst_step, int src_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int sx = (int)floor(dx*ifx+0.5f);
int sy = (int)floor(dy*ify+0.5f);
sx = min(sx, src_cols-1);
sy = min(sy, src_rows-1);
int dpos = (dst_offset>>2) + dy * (dst_step>>2) + dx;
int spos = (src_offset>>2) + sy * (src_step>>2) + sx;
if(dx<dst_cols && dy<dst_rows)
dst[dpos] = src[spos];
}
__kernel void resize_8UC1_kernel(__global uchar * dst, __global const uchar * src,
int dst_offset, int src_offset, int dst_step, int src_step,
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
{
int dx = get_global_id(0);
int dy = get_global_id(1);
int sx = (int)floor(dx*ifx+0.5f);
int sy = (int)floor(dy*ify+0.5f);
sx = min(sx, src_cols-1);
sy = min(sy, src_rows-1);
int dpos = dst_offset + dy * dst_step + dx;
int spos = src_offset + sy * src_step + sx;
if(dx<dst_cols && dy<dst_rows)
dst[dpos] = src[spos];
}

@ -1,180 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Sen Liu, swjutls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/objdetect.hpp"
#include "precomp.hpp"
#if 0 //def HAVE_OPENCL
using namespace cvtest;
using namespace testing;
using namespace std;
using namespace cv;
extern string workdir;
namespace
{
IMPLEMENT_PARAM_CLASS(CascadeName, std::string);
CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml"));
CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml"));
struct getRect
{
Rect operator ()(const CvAvgComp &e) const
{
return e.rect;
}
};
}
PARAM_TEST_CASE(Haar, double, int, CascadeName)
{
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
cv::CascadeClassifier cpucascade, cpunestedCascade;
double scale;
int flags;
std::string cascadeName;
virtual void SetUp()
{
scale = GET_PARAM(0);
flags = GET_PARAM(1);
cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(2));
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) )
{
cout << "ERROR: Could not load classifier cascade" << endl;
return;
}
}
};
////////////////////////////////faceDetect/////////////////////////////////////////////////
TEST_P(Haar, FaceDetect)
{
string imgName = workdir + "lena.jpg";
Mat img = imread( imgName, 1 );
if(img.empty())
{
std::cout << "Couldn't read " << imgName << std::endl;
return ;
}
vector<Rect> faces, oclfaces;
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
MemStorage storage(cvCreateMemStorage(0));
cvtColor( img, gray, COLOR_BGR2GRAY );
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
cv::ocl::oclMat image;
CvSeq *_objects;
image.upload(smallImg);
_objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
3, flags, Size(30, 30), Size(0, 0) );
vector<CvAvgComp> vecAvgComp;
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
oclfaces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
cpucascade.detectMultiScale( smallImg, faces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0) );
EXPECT_EQ(faces.size(), oclfaces.size());
}
TEST_P(Haar, FaceDetectUseBuf)
{
string imgName = workdir + "lena.jpg";
Mat img = imread( imgName, 1 );
if(img.empty())
{
std::cout << "Couldn't read " << imgName << std::endl;
return ;
}
vector<Rect> faces, oclfaces;
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
cvtColor( img, gray, CV_BGR2GRAY );
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
cv::ocl::oclMat image;
image.upload(smallImg);
cv::ocl::OclCascadeClassifierBuf cascadebuf;
if( !cascadebuf.load( cascadeName ) )
{
cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << endl;
return;
}
cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0) );
cpucascade.detectMultiScale( smallImg, faces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0) );
EXPECT_EQ(faces.size(), oclfaces.size());
// intentionally run ocl facedetect again and check if it still works after the first run
cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3,
flags,
Size(30, 30));
cascadebuf.release();
EXPECT_EQ(faces.size(), oclfaces.size());
}
INSTANTIATE_TEST_CASE_P(FaceDetect, Haar,
Combine(Values(1.0),
Values(CV_HAAR_SCALE_IMAGE, 0), Values(cascade_frontalface_alt, cascade_frontalface_alt2)));
#endif // HAVE_OPENCL

@ -1573,6 +1573,47 @@ TEST_P(Convolve, Mat)
} }
} }
//////////////////////////////// ColumnSum //////////////////////////////////////
PARAM_TEST_CASE(ColumnSum, cv::Size)
{
cv::Size size;
cv::Mat src;
virtual void SetUp()
{
size = GET_PARAM(0);
}
};
TEST_P(ColumnSum, Accuracy)
{
cv::Mat src = randomMat(size, CV_32FC1);
cv::ocl::oclMat d_dst;
cv::ocl::oclMat d_src(src);
cv::ocl::columnSum(d_src, d_dst);
cv::Mat dst(d_dst);
for (int j = 0; j < src.cols; ++j)
{
float gold = src.at<float>(0, j);
float res = dst.at<float>(0, j);
ASSERT_NEAR(res, gold, 1e-5);
}
for (int i = 1; i < src.rows; ++i)
{
for (int j = 0; j < src.cols; ++j)
{
float gold = src.at<float>(i, j) += src.at<float>(i - 1, j);
float res = dst.at<float>(i, j);
ASSERT_NEAR(res, gold, 1e-5);
}
}
}
/////////////////////////////////////////////////////////////////////////////////////
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine( INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
ONE_TYPE(CV_8UC1), ONE_TYPE(CV_8UC1),
NULL_TYPE, NULL_TYPE,
@ -1688,7 +1729,6 @@ INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine(
Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)), Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)),
Values(0.0, 40.0))); Values(0.0, 40.0)));
//INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine( INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES);
// Values(CV_32FC1, CV_32FC1),
// Values(false))); // Values(false) is the reserved parameter
#endif // HAVE_OPENCL #endif // HAVE_OPENCL

@ -15,7 +15,7 @@
// Third party copyrights are property of their respective owners. // Third party copyrights are property of their respective owners.
// //
// @Authors // @Authors
// Wenju He, wenju@multicorewareinc.com // Yao Wang, bitwangyaoyao@gmail.com
// //
// Redistribution and use in source and binary forms, with or without modification, // Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met: // are permitted provided that the following conditions are met:
@ -45,51 +45,61 @@
#include "precomp.hpp" #include "precomp.hpp"
#include "opencv2/objdetect.hpp"
#include "opencv2/objdetect/objdetect_c.h"
using namespace std; using namespace std;
using namespace cv;
using namespace testing;
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
extern string workdir; extern string workdir;
PARAM_TEST_CASE(HOG, cv::Size, int)
///////////////////// HOG /////////////////////////////
PARAM_TEST_CASE(HOG, Size, int)
{ {
cv::Size winSize; Size winSize;
int type; int type;
Mat img_rgb;
virtual void SetUp() virtual void SetUp()
{ {
winSize = GET_PARAM(0); winSize = GET_PARAM(0);
type = GET_PARAM(1); type = GET_PARAM(1);
img_rgb = readImage(workdir + "../gpu/road.png");
if(img_rgb.empty())
{
std::cout << "Couldn't read road.png" << std::endl;
}
} }
}; };
TEST_P(HOG, GetDescriptors) TEST_P(HOG, GetDescriptors)
{ {
// Load image
cv::Mat img_rgb = readImage(workdir + "lena.jpg");
ASSERT_FALSE(img_rgb.empty());
// Convert image // Convert image
cv::Mat img; Mat img;
switch (type) switch (type)
{ {
case CV_8UC1: case CV_8UC1:
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY); cvtColor(img_rgb, img, COLOR_BGR2GRAY);
break; break;
case CV_8UC4: case CV_8UC4:
default: default:
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA); cvtColor(img_rgb, img, COLOR_BGR2BGRA);
break; break;
} }
cv::ocl::oclMat d_img(img); ocl::oclMat d_img(img);
// HOGs // HOGs
cv::ocl::HOGDescriptor ocl_hog; ocl::HOGDescriptor ocl_hog;
ocl_hog.gamma_correction = true; ocl_hog.gamma_correction = true;
cv::HOGDescriptor hog; HOGDescriptor hog;
hog.gammaCorrection = true; hog.gammaCorrection = true;
// Compute descriptor // Compute descriptor
cv::ocl::oclMat d_descriptors; ocl::oclMat d_descriptors;
ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL); ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL);
cv::Mat down_descriptors; Mat down_descriptors;
d_descriptors.download(down_descriptors); d_descriptors.download(down_descriptors);
down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows); down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows);
@ -105,45 +115,34 @@ TEST_P(HOG, GetDescriptors)
hog.compute(img_rgb, descriptors, ocl_hog.win_size); hog.compute(img_rgb, descriptors, ocl_hog.win_size);
break; break;
} }
cv::Mat cpu_descriptors(descriptors); Mat cpu_descriptors(descriptors);
EXPECT_MAT_SIMILAR(down_descriptors, cpu_descriptors, 1e-2); EXPECT_MAT_SIMILAR(down_descriptors, cpu_descriptors, 1e-2);
} }
bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
{
return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) &&
(abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
}
TEST_P(HOG, Detect) TEST_P(HOG, Detect)
{ {
// Load image
cv::Mat img_rgb = readImage(workdir + "lena.jpg");
ASSERT_FALSE(img_rgb.empty());
// Convert image // Convert image
cv::Mat img; Mat img;
switch (type) switch (type)
{ {
case CV_8UC1: case CV_8UC1:
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY); cvtColor(img_rgb, img, COLOR_BGR2GRAY);
break; break;
case CV_8UC4: case CV_8UC4:
default: default:
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA); cvtColor(img_rgb, img, COLOR_BGR2BGRA);
break; break;
} }
cv::ocl::oclMat d_img(img); ocl::oclMat d_img(img);
// HOGs // HOGs
if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128))) if ((winSize != Size(48, 96)) && (winSize != Size(64, 128)))
winSize = cv::Size(64, 128); winSize = Size(64, 128);
cv::ocl::HOGDescriptor ocl_hog(winSize); ocl::HOGDescriptor ocl_hog(winSize);
ocl_hog.gamma_correction = true; ocl_hog.gamma_correction = true;
cv::HOGDescriptor hog; HOGDescriptor hog;
hog.winSize = winSize; hog.winSize = winSize;
hog.gammaCorrection = true; hog.gammaCorrection = true;
@ -165,88 +164,119 @@ TEST_P(HOG, Detect)
} }
// OpenCL detection // OpenCL detection
std::vector<cv::Rect> d_found; std::vector<Rect> d_found;
ocl_hog.detectMultiScale(d_img, d_found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); ocl_hog.detectMultiScale(d_img, d_found, 0, Size(8, 8), Size(0, 0), 1.05, 6);
// CPU detection // CPU detection
std::vector<cv::Rect> found; std::vector<Rect> found;
switch (type) switch (type)
{ {
case CV_8UC1: case CV_8UC1:
hog.detectMultiScale(img, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); hog.detectMultiScale(img, found, 0, Size(8, 8), Size(0, 0), 1.05, 6);
break; break;
case CV_8UC4: case CV_8UC4:
default: default:
hog.detectMultiScale(img_rgb, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); hog.detectMultiScale(img_rgb, found, 0, Size(8, 8), Size(0, 0), 1.05, 6);
break; break;
} }
// Ground-truth rectangular people window EXPECT_LT(checkRectSimilarity(img.size(), found, d_found), 1.0);
cv::Rect win1_64x128(231, 190, 72, 144); }
cv::Rect win2_64x128(621, 156, 97, 194);
cv::Rect win1_48x96(238, 198, 63, 126);
cv::Rect win2_48x96(619, 161, 92, 185);
cv::Rect win3_48x96(488, 136, 56, 112);
// Compare whether ground-truth windows are detected and compare the number of windows detected.
std::vector<int> d_comp(4);
std::vector<int> comp(4);
for(int i = 0; i < (int)d_comp.size(); i++)
{
d_comp[i] = 0;
comp[i] = 0;
}
int threshold = 10;
int val = 32; INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
d_comp[0] = (int)d_found.size(); testing::Values(Size(64, 128), Size(48, 96)),
comp[0] = (int)found.size(); testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
if (winSize == cv::Size(48, 96))
{ #if 0
for(int i = 0; i < (int)d_found.size(); i++) ///////////////////////////// Haar //////////////////////////////
{ IMPLEMENT_PARAM_CLASS(CascadeName, std::string);
if (match_rect(d_found[i], win1_48x96, threshold)) CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml"));
d_comp[1] = val; CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml"));
if (match_rect(d_found[i], win2_48x96, threshold)) struct getRect
d_comp[2] = val; {
if (match_rect(d_found[i], win3_48x96, threshold)) Rect operator ()(const CvAvgComp &e) const
d_comp[3] = val;
}
for(int i = 0; i < (int)found.size(); i++)
{ {
if (match_rect(found[i], win1_48x96, threshold)) return e.rect;
comp[1] = val;
if (match_rect(found[i], win2_48x96, threshold))
comp[2] = val;
if (match_rect(found[i], win3_48x96, threshold))
comp[3] = val;
}
} }
else if (winSize == cv::Size(64, 128)) };
PARAM_TEST_CASE(Haar, int, CascadeName)
{
ocl::OclCascadeClassifier cascade, nestedCascade;
CascadeClassifier cpucascade, cpunestedCascade;
int flags;
std::string cascadeName;
vector<Rect> faces, oclfaces;
Mat img;
ocl::oclMat d_img;
virtual void SetUp()
{ {
for(int i = 0; i < (int)d_found.size(); i++) flags = GET_PARAM(0);
cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(1));
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) )
{ {
if (match_rect(d_found[i], win1_64x128, threshold)) std::cout << "ERROR: Could not load classifier cascade" << std::endl;
d_comp[1] = val; return;
if (match_rect(d_found[i], win2_64x128, threshold))
d_comp[2] = val;
} }
for(int i = 0; i < (int)found.size(); i++) img = readImage(workdir + "lena.jpg", IMREAD_GRAYSCALE);
if(img.empty())
{ {
if (match_rect(found[i], win1_64x128, threshold)) std::cout << "Couldn't read lena.jpg" << std::endl;
comp[1] = val; return ;
if (match_rect(found[i], win2_64x128, threshold))
comp[2] = val;
} }
equalizeHist(img, img);
d_img.upload(img);
} }
};
EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3); TEST_P(Haar, FaceDetect)
{
MemStorage storage(cvCreateMemStorage(0));
CvSeq *_objects;
_objects = cascade.oclHaarDetectObjects(d_img, storage, 1.1, 3,
flags, Size(30, 30), Size(0, 0));
vector<CvAvgComp> vecAvgComp;
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
oclfaces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
cpucascade.detectMultiScale(img, faces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0));
EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0);
} }
TEST_P(Haar, FaceDetectUseBuf)
{
ocl::OclCascadeClassifierBuf cascadebuf;
if(!cascadebuf.load(cascadeName))
{
std::cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << std::endl;
return;
}
cascadebuf.detectMultiScale(d_img, oclfaces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0));
cpucascade.detectMultiScale(img, faces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0));
// intentionally run ocl facedetect again and check if it still works after the first run
cascadebuf.detectMultiScale(d_img, oclfaces, 1.1, 3,
flags,
Size(30, 30));
cascadebuf.release();
INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine( EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0);
testing::Values(cv::Size(64, 128), cv::Size(48, 96)), }
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, Haar,
Combine(Values(CV_HAAR_SCALE_IMAGE, 0),
Values(cascade_frontalface_alt/*, cascade_frontalface_alt2*/)));
#endif
#endif //HAVE_OPENCL #endif //HAVE_OPENCL

@ -15,7 +15,6 @@
// Third party copyrights are property of their respective owners. // Third party copyrights are property of their respective owners.
// //
// @Authors // @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com
// Yao Wang yao@multicorewareinc.com // Yao Wang yao@multicorewareinc.com
// //
// Redistribution and use in source and binary forms, with or without modification, // Redistribution and use in source and binary forms, with or without modification,
@ -56,11 +55,12 @@ using namespace cvtest;
using namespace testing; using namespace testing;
using namespace std; using namespace std;
PARAM_TEST_CASE(PyrDown, MatType, int) PARAM_TEST_CASE(PyrBase, MatType, int)
{ {
int type; int type;
int channels; int channels;
Mat dst_cpu;
oclMat gdst;
virtual void SetUp() virtual void SetUp()
{ {
type = GET_PARAM(0); type = GET_PARAM(0);
@ -69,19 +69,19 @@ PARAM_TEST_CASE(PyrDown, MatType, int)
}; };
/////////////////////// PyrDown //////////////////////////
struct PyrDown : PyrBase {};
TEST_P(PyrDown, Mat) TEST_P(PyrDown, Mat)
{ {
for(int j = 0; j < LOOP_TIMES; j++) for(int j = 0; j < LOOP_TIMES; j++)
{ {
cv::Size size(MWIDTH, MHEIGHT); Size size(MWIDTH, MHEIGHT);
cv::RNG &rng = TS::ptr()->get_rng(); Mat src = randomMat(size, CV_MAKETYPE(type, channels));
cv::Mat src = randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false); oclMat gsrc(src);
cv::ocl::oclMat gsrc(src), gdst; pyrDown(src, dst_cpu);
cv::Mat dst_cpu; pyrDown(gsrc, gdst);
cv::pyrDown(src, dst_cpu);
cv::ocl::pyrDown(gsrc, gdst);
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), type == CV_32F ? 1e-4f : 1.0f); EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), type == CV_32F ? 1e-4f : 1.0f);
} }
@ -90,5 +90,27 @@ TEST_P(PyrDown, Mat)
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine( INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine(
Values(CV_8U, CV_32F), Values(1, 3, 4))); Values(CV_8U, CV_32F), Values(1, 3, 4)));
/////////////////////// PyrUp //////////////////////////
struct PyrUp : PyrBase {};
TEST_P(PyrUp, Accuracy)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
Size size(MWIDTH, MHEIGHT);
Mat src = randomMat(size, CV_MAKETYPE(type, channels));
oclMat gsrc(src);
pyrUp(src, dst_cpu);
pyrUp(gsrc, gdst);
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (type == CV_32F ? 1e-4f : 1.0));
}
}
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine(
Values(CV_8U, CV_32F), Values(1, 3, 4)));
#endif // HAVE_OPENCL #endif // HAVE_OPENCL

@ -1,90 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Zhang Chunpeng chunpeng@multicorewareinc.com
// Yao Wang yao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(PyrUp, MatType, int)
{
int type;
int channels;
virtual void SetUp()
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
}
};
TEST_P(PyrUp, Accuracy)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
Size size(MWIDTH, MHEIGHT);
Mat src = randomMat(size, CV_MAKETYPE(type, channels));
Mat dst_gold;
pyrUp(src, dst_gold);
ocl::oclMat dst;
ocl::oclMat srcMat(src);
ocl::pyrUp(srcMat, dst);
EXPECT_MAT_NEAR(dst_gold, Mat(dst), (type == CV_32F ? 1e-4f : 1.0));
}
}
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine(
Values(CV_8U, CV_32F), Values(1, 3, 4)));
#endif // HAVE_OPENCL

@ -100,12 +100,6 @@ Mat randomMat(Size size, int type, double minVal, double maxVal)
return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false); return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false);
} }
/* /*
void showDiff(InputArray gold_, InputArray actual_, double eps) void showDiff(InputArray gold_, InputArray actual_, double eps)
{ {
@ -137,58 +131,7 @@ void showDiff(InputArray gold_, InputArray actual_, double eps)
} }
*/ */
/*
bool supportFeature(const DeviceInfo& info, FeatureSet feature)
{
return TargetArchs::builtWith(feature) && info.supports(feature);
}
const vector<DeviceInfo>& devices()
{
static vector<DeviceInfo> devs;
static bool first = true;
if (first)
{
int deviceCount = getCudaEnabledDeviceCount();
devs.reserve(deviceCount);
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
if (info.isCompatible())
devs.push_back(info);
}
first = false;
}
return devs;
}
vector<DeviceInfo> devices(FeatureSet feature)
{
const vector<DeviceInfo>& d = devices();
vector<DeviceInfo> devs_filtered;
if (TargetArchs::builtWith(feature))
{
devs_filtered.reserve(d.size());
for (size_t i = 0, size = d.size(); i < size; ++i)
{
const DeviceInfo& info = d[i];
if (info.supports(feature))
devs_filtered.push_back(info);
}
}
return devs_filtered;
}
*/
vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end) vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
{ {
@ -264,3 +207,48 @@ void PrintTo(const Inverse &inverse, std::ostream *os)
(*os) << "direct"; (*os) << "direct";
} }
double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& ob2)
{
double final_test_result = 0.0;
size_t sz1 = ob1.size();
size_t sz2 = ob2.size();
if(sz1 != sz2)
{
return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
}
else
{
if(sz1==0 && sz2==0)
return 0;
cv::Mat cpu_result(sz, CV_8UC1);
cpu_result.setTo(0);
for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
{
cv::Mat cpu_result_roi(cpu_result, *r);
cpu_result_roi.setTo(1);
cpu_result.copyTo(cpu_result);
}
int cpu_area = cv::countNonZero(cpu_result > 0);
cv::Mat gpu_result(sz, CV_8UC1);
gpu_result.setTo(0);
for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
{
cv::Mat gpu_result_roi(gpu_result, *r2);
gpu_result_roi.setTo(1);
gpu_result.copyTo(gpu_result);
}
cv::Mat result_;
multiply(cpu_result, gpu_result, result_);
int result = cv::countNonZero(result_ > 0);
if(cpu_area!=0 && result!=0)
final_test_result = 1.0 - (double)result/(double)cpu_area;
else if(cpu_area==0 && result!=0)
final_test_result = -1;
}
return final_test_result;
}

@ -57,13 +57,12 @@ cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal =
void showDiff(cv::InputArray gold, cv::InputArray actual, double eps); void showDiff(cv::InputArray gold, cv::InputArray actual, double eps);
//! return true if device supports specified feature and gpu module was built with support the feature. // This function test if gpu_rst matches cpu_rst.
//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature); // If the two vectors are not equal, it will return the difference in vector size
// Else it will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
// The smaller, the better matched
double checkRectSimilarity(cv::Size sz, std::vector<cv::Rect>& ob1, std::vector<cv::Rect>& ob2);
//! return all devices compatible with current gpu module build.
//const std::vector<cv::ocl::DeviceInfo>& devices();
//! return all devices compatible with current gpu module build which support specified feature.
//std::vector<cv::ocl::DeviceInfo> devices(cv::gpu::FeatureSet feature);
//! read image from testdata folder. //! read image from testdata folder.
cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR); cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR);

@ -100,34 +100,39 @@ class TestInfo(object):
def dump(self, units="ms"): def dump(self, units="ms"):
print "%s ->\t\033[1;31m%s\033[0m = \t%.2f%s" % (str(self), self.status, self.get("gmean", units), units) print "%s ->\t\033[1;31m%s\033[0m = \t%.2f%s" % (str(self), self.status, self.get("gmean", units), units)
def shortName(self):
def getName(self):
pos = self.name.find("/") pos = self.name.find("/")
if pos > 0: if pos > 0:
name = self.name[:pos] return self.name[:pos]
else: return self.name
name = self.name
if self.fixture.endswith(name):
fixture = self.fixture[:-len(name)] def getFixture(self):
if self.fixture.endswith(self.getName()):
fixture = self.fixture[:-len(self.getName())]
else: else:
fixture = self.fixture fixture = self.fixture
if fixture.endswith("_"): if fixture.endswith("_"):
fixture = fixture[:-1] fixture = fixture[:-1]
return fixture
def param(self):
return '::'.join(filter(None, [self.type_param, self.value_param]))
def shortName(self):
name = self.getName()
fixture = self.getFixture()
return '::'.join(filter(None, [name, fixture])) return '::'.join(filter(None, [name, fixture]))
def __str__(self): def __str__(self):
pos = self.name.find("/") name = self.getName()
if pos > 0: fixture = self.getFixture()
name = self.name[:pos]
else:
name = self.name
if self.fixture.endswith(name):
fixture = self.fixture[:-len(name)]
else:
fixture = self.fixture
if fixture.endswith("_"):
fixture = fixture[:-1]
return '::'.join(filter(None, [name, fixture, self.type_param, self.value_param])) return '::'.join(filter(None, [name, fixture, self.type_param, self.value_param]))
def __cmp__(self, other): def __cmp__(self, other):
r = cmp(self.fixture, other.fixture); r = cmp(self.fixture, other.fixture);
if r != 0: if r != 0:

@ -0,0 +1,167 @@
#!/usr/bin/env python
from __future__ import division
import ast
import logging
import numbers
import os, os.path
import re
from argparse import ArgumentParser
from collections import OrderedDict
from glob import glob
from itertools import ifilter
import xlwt
from testlog_parser import parseLogFile
# To build XLS report you neet to put your xmls (OpenCV tests output) in the
# following way:
#
# "root" --- folder, representing the whole XLS document. It contains several
# subfolders --- sheet-paths of the XLS document. Each sheet-path contains it's
# subfolders --- config-paths. Config-paths are columns of the sheet and
# they contains xmls files --- output of OpenCV modules testing.
# Config-path means OpenCV build configuration, including different
# options such as NEON, TBB, GPU enabling/disabling.
#
# root
# root\sheet_path
# root\sheet_path\configuration1 (column 1)
# root\sheet_path\configuration2 (column 2)
re_image_size = re.compile(r'^ \d+ x \d+$', re.VERBOSE)
re_data_type = re.compile(r'^ (?: 8 | 16 | 32 | 64 ) [USF] C [1234] $', re.VERBOSE)
time_style = xlwt.easyxf(num_format_str='#0.00')
no_time_style = xlwt.easyxf('pattern: pattern solid, fore_color gray25')
speedup_style = time_style
good_speedup_style = xlwt.easyxf('font: color green', num_format_str='#0.00')
bad_speedup_style = xlwt.easyxf('font: color red', num_format_str='#0.00')
no_speedup_style = no_time_style
error_speedup_style = xlwt.easyxf('pattern: pattern solid, fore_color orange')
header_style = xlwt.easyxf('font: bold true; alignment: horizontal centre, vertical top, wrap True')
def collect_xml(collection, configuration, xml_fullname):
xml_fname = os.path.split(xml_fullname)[1]
module = xml_fname[:xml_fname.index('_')]
module_tests = collection.setdefault(module, OrderedDict())
for test in sorted(parseLogFile(xml_fullname)):
test_results = module_tests.setdefault((test.shortName(), test.param()), {})
test_results[configuration] = test.get("gmean") if test.status == 'run' else test.status
def main():
arg_parser = ArgumentParser(description='Build an XLS performance report.')
arg_parser.add_argument('sheet_dirs', nargs='+', metavar='DIR', help='directory containing perf test logs')
arg_parser.add_argument('-o', '--output', metavar='XLS', default='report.xls', help='name of output file')
arg_parser.add_argument('-c', '--config', metavar='CONF', help='global configuration file')
args = arg_parser.parse_args()
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG)
if args.config is not None:
with open(args.config) as global_conf_file:
global_conf = ast.literal_eval(global_conf_file.read())
else:
global_conf = {}
wb = xlwt.Workbook()
for sheet_path in args.sheet_dirs:
try:
with open(os.path.join(sheet_path, 'sheet.conf')) as sheet_conf_file:
sheet_conf = ast.literal_eval(sheet_conf_file.read())
except Exception:
sheet_conf = {}
logging.debug('no sheet.conf for %s', sheet_path)
sheet_conf = dict(global_conf.items() + sheet_conf.items())
if 'configurations' in sheet_conf:
config_names = sheet_conf['configurations']
else:
try:
config_names = [p for p in os.listdir(sheet_path)
if os.path.isdir(os.path.join(sheet_path, p))]
except Exception as e:
logging.warning('error while determining configuration names for %s: %s', sheet_path, e)
continue
collection = {}
for configuration, configuration_path in \
[(c, os.path.join(sheet_path, c)) for c in config_names]:
logging.info('processing %s', configuration_path)
for xml_fullname in glob(os.path.join(configuration_path, '*.xml')):
collect_xml(collection, configuration, xml_fullname)
sheet = wb.add_sheet(sheet_conf.get('sheet_name', os.path.basename(os.path.abspath(sheet_path))))
sheet.row(0).height = 800
sheet.panes_frozen = True
sheet.remove_splits = True
sheet.horz_split_pos = 1
sheet.horz_split_first_visible = 1
sheet_comparisons = sheet_conf.get('comparisons', [])
for i, w in enumerate([2000, 15000, 2500, 2000, 15000]
+ (len(config_names) + 1 + len(sheet_comparisons)) * [3000]):
sheet.col(i).width = w
for i, caption in enumerate(['Module', 'Test', 'Image\nsize', 'Data\ntype', 'Parameters']
+ config_names + [None]
+ [comp['to'] + '\nvs\n' + comp['from'] for comp in sheet_comparisons]):
sheet.row(0).write(i, caption, header_style)
row = 1
module_colors = sheet_conf.get('module_colors', {})
module_styles = {module: xlwt.easyxf('pattern: pattern solid, fore_color {}'.format(color))
for module, color in module_colors.iteritems()}
for module, tests in sorted(collection.iteritems()):
for ((test, param), configs) in tests.iteritems():
sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style))
sheet.write(row, 1, test)
param_list = param[1:-1].split(", ")
sheet.write(row, 2, next(ifilter(re_image_size.match, param_list), None))
sheet.write(row, 3, next(ifilter(re_data_type.match, param_list), None))
sheet.row(row).write(4, param)
for i, c in enumerate(config_names):
if c in configs:
sheet.write(row, 5 + i, configs[c], time_style)
else:
sheet.write(row, 5 + i, None, no_time_style)
for i, comp in enumerate(sheet_comparisons):
cmp_from = configs.get(comp["from"])
cmp_to = configs.get(comp["to"])
col = 5 + len(config_names) + 1 + i
if isinstance(cmp_from, numbers.Number) and isinstance(cmp_to, numbers.Number):
try:
speedup = cmp_from / cmp_to
sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else
bad_speedup_style if speedup < 0.9 else
speedup_style)
except ArithmeticError as e:
sheet.write(row, col, None, error_speedup_style)
else:
sheet.write(row, col, None, no_speedup_style)
row += 1
if row % 1000 == 0: sheet.flush_row_data()
wb.save(args.output)
if __name__ == '__main__':
main()

@ -2,6 +2,10 @@
#include <float.h> #include <float.h>
#include <limits.h> #include <limits.h>
#ifdef HAVE_TEGRA_OPTIMIZATION
#include "tegra.hpp"
#endif
using namespace cv; using namespace cv;
namespace cvtest namespace cvtest
@ -2939,28 +2943,76 @@ MatComparator::operator()(const char* expr1, const char* expr2,
void printVersionInfo(bool useStdOut) void printVersionInfo(bool useStdOut)
{ {
::testing::Test::RecordProperty("CV_VERSION", CV_VERSION); ::testing::Test::RecordProperty("cv_version", CV_VERSION);
if(useStdOut) std::cout << "OpenCV version: " << CV_VERSION << std::endl; if(useStdOut) std::cout << "OpenCV version: " << CV_VERSION << std::endl;
std::string buildInfo( cv::getBuildInformation() ); std::string buildInfo( cv::getBuildInformation() );
size_t pos1 = buildInfo.find("Version control"); size_t pos1 = buildInfo.find("Version control");
size_t pos2 = buildInfo.find("\n", pos1);\ size_t pos2 = buildInfo.find('\n', pos1);
if(pos1 != std::string::npos && pos2 != std::string::npos) if(pos1 != std::string::npos && pos2 != std::string::npos)
{ {
std::string ver( buildInfo.substr(pos1, pos2-pos1) ); size_t value_start = buildInfo.rfind(' ', pos2) + 1;
::testing::Test::RecordProperty("Version_control", ver); std::string ver( buildInfo.substr(value_start, pos2 - value_start) );
if(useStdOut) std::cout << ver << std::endl; ::testing::Test::RecordProperty("cv_vcs_version", ver);
if (useStdOut) std::cout << "OpenCV VCS version: " << ver << std::endl;
} }
pos1 = buildInfo.find("inner version"); pos1 = buildInfo.find("inner version");
pos2 = buildInfo.find("\n", pos1);\ pos2 = buildInfo.find('\n', pos1);
if(pos1 != std::string::npos && pos2 != std::string::npos) if(pos1 != std::string::npos && pos2 != std::string::npos)
{ {
std::string ver( buildInfo.substr(pos1, pos2-pos1) ); size_t value_start = buildInfo.rfind(' ', pos2) + 1;
::testing::Test::RecordProperty("inner_version", ver); std::string ver( buildInfo.substr(value_start, pos2 - value_start) );
if(useStdOut) std::cout << ver << std::endl; ::testing::Test::RecordProperty("cv_inner_vcs_version", ver);
} if(useStdOut) std::cout << "Inner VCS version: " << ver << std::endl;
}
#ifdef CV_PARALLEL_FRAMEWORK
::testing::Test::RecordProperty("cv_parallel_framework", CV_PARALLEL_FRAMEWORK);
if (useStdOut)
{
std::cout << "Parallel framework: " << CV_PARALLEL_FRAMEWORK << std::endl;
}
#endif
std::string cpu_features;
#if CV_SSE
if (checkHardwareSupport(CV_CPU_SSE)) cpu_features += " sse";
#endif
#if CV_SSE2
if (checkHardwareSupport(CV_CPU_SSE2)) cpu_features += " sse2";
#endif
#if CV_SSE3
if (checkHardwareSupport(CV_CPU_SSE3)) cpu_features += " sse3";
#endif
#if CV_SSSE3
if (checkHardwareSupport(CV_CPU_SSSE3)) cpu_features += " ssse3";
#endif
#if CV_SSE4_1
if (checkHardwareSupport(CV_CPU_SSE4_1)) cpu_features += " sse4.1";
#endif
#if CV_SSE4_2
if (checkHardwareSupport(CV_CPU_SSE4_2)) cpu_features += " sse4.2";
#endif
#if CV_AVX
if (checkHardwareSupport(CV_CPU_AVX)) cpu_features += " avx";
#endif
#if CV_NEON
cpu_features += " neon"; // NEON is currently not checked at runtime
#endif
cpu_features.erase(0, 1); // erase initial space
::testing::Test::RecordProperty("cv_cpu_features", cpu_features);
if (useStdOut) std::cout << "CPU features: " << cpu_features << std::endl;
#ifdef HAVE_TEGRA_OPTIMIZATION
const char * tegra_optimization = tegra::isDeviceSupported() ? "enabled" : "disabled";
::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization);
if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl;
#endif
} }
} }

@ -23,10 +23,13 @@ const static Scalar colors[] = { CV_RGB(0,0,255),
CV_RGB(255,128,0), CV_RGB(255,128,0),
CV_RGB(255,255,0), CV_RGB(255,255,0),
CV_RGB(255,0,0), CV_RGB(255,0,0),
CV_RGB(255,0,255)} ; CV_RGB(255,0,255)
} ;
int64 work_begin = 0; int64 work_begin = 0;
int64 work_end = 0; int64 work_end = 0;
string outputName;
static void workBegin() static void workBegin()
{ {
@ -37,34 +40,40 @@ static void workEnd()
work_end += (getTickCount() - work_begin); work_end += (getTickCount() - work_begin);
} }
static double getTime()
static double getTime(){ {
return work_end /((double)cvGetTickFrequency() * 1000.); return work_end /((double)cvGetTickFrequency() * 1000.);
} }
void detect( Mat& img, vector<Rect>& faces, void detect( Mat& img, vector<Rect>& faces,
cv::ocl::OclCascadeClassifierBuf& cascade, ocl::OclCascadeClassifierBuf& cascade,
double scale, bool calTime); double scale, bool calTime);
void detectCPU( Mat& img, vector<Rect>& faces, void detectCPU( Mat& img, vector<Rect>& faces,
CascadeClassifier& cascade, CascadeClassifier& cascade,
double scale, bool calTime); double scale, bool calTime);
void Draw(Mat& img, vector<Rect>& faces, double scale); void Draw(Mat& img, vector<Rect>& faces, double scale);
// This function test if gpu_rst matches cpu_rst. // This function test if gpu_rst matches cpu_rst.
// If the two vectors are not equal, it will return the difference in vector size // If the two vectors are not equal, it will return the difference in vector size
// Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) // Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
double checkRectSimilarity(Size sz, std::vector<Rect>& cpu_rst, std::vector<Rect>& gpu_rst); double checkRectSimilarity(Size sz, vector<Rect>& cpu_rst, vector<Rect>& gpu_rst);
int main( int argc, const char** argv ) int main( int argc, const char** argv )
{ {
const char* keys = const char* keys =
"{ h | help | false | print help message }" "{ h | help | false | print help message }"
"{ i | input | | specify input image }" "{ i | input | | specify input image }"
"{ t | template | ../../../data/haarcascades/haarcascade_frontalface_alt.xml | specify template file }" "{ t | template | haarcascade_frontalface_alt.xml |"
" specify template file path }"
"{ c | scale | 1.0 | scale image }" "{ c | scale | 1.0 | scale image }"
"{ s | use_cpu | false | use cpu or gpu to process the image }"; "{ s | use_cpu | false | use cpu or gpu to process the image }"
"{ o | output | facedetect_output.jpg |"
" specify output image save path(only works when input is images) }";
CommandLineParser cmd(argc, argv, keys); CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help")) if (cmd.get<bool>("help"))
@ -78,9 +87,10 @@ int main( int argc, const char** argv )
bool useCPU = cmd.get<bool>("s"); bool useCPU = cmd.get<bool>("s");
string inputName = cmd.get<string>("i"); string inputName = cmd.get<string>("i");
outputName = cmd.get<string>("o");
string cascadeName = cmd.get<string>("t"); string cascadeName = cmd.get<string>("t");
double scale = cmd.get<double>("c"); double scale = cmd.get<double>("c");
cv::ocl::OclCascadeClassifierBuf cascade; ocl::OclCascadeClassifierBuf cascade;
CascadeClassifier cpu_cascade; CascadeClassifier cpu_cascade;
if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) ) if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) )
@ -114,9 +124,10 @@ int main( int argc, const char** argv )
return -1; return -1;
} }
cvNamedWindow( "result", 1 ); cvNamedWindow( "result", 1 );
std::vector<cv::ocl::Info> oclinfo; vector<ocl::Info> oclinfo;
int devnums = cv::ocl::getDevice(oclinfo); int devnums = ocl::getDevice(oclinfo);
if( devnums < 1 ) if( devnums < 1 )
{ {
std::cout << "no device found\n"; std::cout << "no device found\n";
@ -139,10 +150,12 @@ int main( int argc, const char** argv )
frame.copyTo( frameCopy ); frame.copyTo( frameCopy );
else else
flip( frame, frameCopy, 0 ); flip( frame, frameCopy, 0 );
if(useCPU){ if(useCPU)
{
detectCPU(frameCopy, faces, cpu_cascade, scale, false); detectCPU(frameCopy, faces, cpu_cascade, scale, false);
} }
else{ else
{
detect(frameCopy, faces, cascade, scale, false); detect(frameCopy, faces, cascade, scale, false);
} }
Draw(frameCopy, faces, scale); Draw(frameCopy, faces, scale);
@ -150,8 +163,10 @@ int main( int argc, const char** argv )
goto _cleanup_; goto _cleanup_;
} }
waitKey(0); waitKey(0);
_cleanup_: _cleanup_:
cvReleaseCapture( &capture ); cvReleaseCapture( &capture );
} }
@ -161,15 +176,18 @@ _cleanup_:
vector<Rect> faces; vector<Rect> faces;
vector<Rect> ref_rst; vector<Rect> ref_rst;
double accuracy = 0.; double accuracy = 0.;
for(int i = 0; i <= LOOP_NUM;i ++) for(int i = 0; i <= LOOP_NUM; i ++)
{ {
cout << "loop" << i << endl; cout << "loop" << i << endl;
if(useCPU){ if(useCPU)
{
detectCPU(image, faces, cpu_cascade, scale, i==0?false:true); detectCPU(image, faces, cpu_cascade, scale, i==0?false:true);
} }
else{ else
{
detect(image, faces, cascade, scale, i==0?false:true); detect(image, faces, cascade, scale, i==0?false:true);
if(i == 0){ if(i == 0)
{
detectCPU(image, ref_rst, cpu_cascade, scale, false); detectCPU(image, ref_rst, cpu_cascade, scale, false);
accuracy = checkRectSimilarity(image.size(), ref_rst, faces); accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
} }
@ -189,20 +207,19 @@ _cleanup_:
} }
cvDestroyWindow("result"); cvDestroyWindow("result");
return 0; return 0;
} }
void detect( Mat& img, vector<Rect>& faces, void detect( Mat& img, vector<Rect>& faces,
cv::ocl::OclCascadeClassifierBuf& cascade, ocl::OclCascadeClassifierBuf& cascade,
double scale, bool calTime) double scale, bool calTime)
{ {
cv::ocl::oclMat image(img); ocl::oclMat image(img);
cv::ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
if(calTime) workBegin(); if(calTime) workBegin();
cv::ocl::cvtColor( image, gray, COLOR_BGR2GRAY ); ocl::cvtColor( image, gray, COLOR_BGR2GRAY );
cv::ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
cv::ocl::equalizeHist( smallImg, smallImg ); ocl::equalizeHist( smallImg, smallImg );
cascade.detectMultiScale( smallImg, faces, 1.1, cascade.detectMultiScale( smallImg, faces, 1.1,
3, 0 3, 0
@ -226,6 +243,7 @@ void detectCPU( Mat& img, vector<Rect>& faces,
if(calTime) workEnd(); if(calTime) workEnd();
} }
void Draw(Mat& img, vector<Rect>& faces, double scale) void Draw(Mat& img, vector<Rect>& faces, double scale)
{ {
int i = 0; int i = 0;
@ -239,31 +257,38 @@ void Draw(Mat& img, vector<Rect>& faces, double scale)
radius = cvRound((r->width + r->height)*0.25*scale); radius = cvRound((r->width + r->height)*0.25*scale);
circle( img, center, radius, color, 3, 8, 0 ); circle( img, center, radius, color, 3, 8, 0 );
} }
cv::imshow( "result", img ); imshow( "result", img );
imwrite( outputName, img );
} }
double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& ob2)
double checkRectSimilarity(Size sz, vector<Rect>& ob1, vector<Rect>& ob2)
{ {
double final_test_result = 0.0; double final_test_result = 0.0;
size_t sz1 = ob1.size(); size_t sz1 = ob1.size();
size_t sz2 = ob2.size(); size_t sz2 = ob2.size();
if(sz1 != sz2) if(sz1 != sz2)
{
return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
}
else else
{ {
cv::Mat cpu_result(sz, CV_8UC1); if(sz1==0 && sz2==0)
return 0;
Mat cpu_result(sz, CV_8UC1);
cpu_result.setTo(0); cpu_result.setTo(0);
for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++) for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
{ {
cv::Mat cpu_result_roi(cpu_result, *r); Mat cpu_result_roi(cpu_result, *r);
cpu_result_roi.setTo(1); cpu_result_roi.setTo(1);
cpu_result.copyTo(cpu_result); cpu_result.copyTo(cpu_result);
} }
int cpu_area = cv::countNonZero(cpu_result > 0); int cpu_area = countNonZero(cpu_result > 0);
cv::Mat gpu_result(sz, CV_8UC1); Mat gpu_result(sz, CV_8UC1);
gpu_result.setTo(0); gpu_result.setTo(0);
for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
{ {
@ -272,11 +297,13 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
gpu_result.copyTo(gpu_result); gpu_result.copyTo(gpu_result);
} }
cv::Mat result_; Mat result_;
multiply(cpu_result, gpu_result, result_); multiply(cpu_result, gpu_result, result_);
int result = cv::countNonZero(result_ > 0); int result = countNonZero(result_ > 0);
if(cpu_area!=0 && result!=0)
final_test_result = 1.0 - (double)result/(double)cpu_area; final_test_result = 1.0 - (double)result/(double)cpu_area;
else if(cpu_area==0 && result!=0)
final_test_result = -1;
} }
return final_test_result; return final_test_result;
} }

@ -11,59 +11,21 @@
using namespace std; using namespace std;
using namespace cv; using namespace cv;
bool help_showed = false;
class Args
{
public:
Args();
static Args read(int argc, char** argv);
string src;
bool src_is_video;
bool src_is_camera;
int camera_id;
bool write_video;
string dst_video;
double dst_video_fps;
bool make_gray;
bool resize_src;
int width, height;
double scale;
int nlevels;
int gr_threshold;
double hit_threshold;
bool hit_threshold_auto;
int win_width;
int win_stride_width, win_stride_height;
bool gamma_corr;
};
class App class App
{ {
public: public:
App(const Args& s); App(CommandLineParser& cmd);
void run(); void run();
void handleKey(char key); void handleKey(char key);
void hogWorkBegin(); void hogWorkBegin();
void hogWorkEnd(); void hogWorkEnd();
string hogWorkFps() const; string hogWorkFps() const;
void workBegin(); void workBegin();
void workEnd(); void workEnd();
string workFps() const; string workFps() const;
string message() const; string message() const;
// This function test if gpu_rst matches cpu_rst. // This function test if gpu_rst matches cpu_rst.
// If the two vectors are not equal, it will return the difference in vector size // If the two vectors are not equal, it will return the difference in vector size
// Else if will return // Else if will return
@ -74,12 +36,14 @@ public:
private: private:
App operator=(App&); App operator=(App&);
Args args; //Args args;
bool running; bool running;
bool use_gpu; bool use_gpu;
bool make_gray; bool make_gray;
double scale; double scale;
double resize_scale;
int win_width;
int win_stride_width, win_stride_height;
int gr_threshold; int gr_threshold;
int nlevels; int nlevels;
double hit_threshold; double hit_threshold;
@ -87,119 +51,49 @@ private:
int64 hog_work_begin; int64 hog_work_begin;
double hog_work_fps; double hog_work_fps;
int64 work_begin; int64 work_begin;
double work_fps; double work_fps;
};
static void printHelp() string img_source;
{ string vdo_source;
cout << "Histogram of Oriented Gradients descriptor and detector sample.\n" string output;
<< "\nUsage: hog_gpu\n" int camera_id;
<< " (<image>|--video <vide>|--camera <camera_id>) # frames source\n" };
<< " [--make_gray <true/false>] # convert image to gray one or not\n"
<< " [--resize_src <true/false>] # do resize of the source image or not\n"
<< " [--width <int>] # resized image width\n"
<< " [--height <int>] # resized image height\n"
<< " [--hit_threshold <double>] # classifying plane distance threshold (0.0 usually)\n"
<< " [--scale <double>] # HOG window scale factor\n"
<< " [--nlevels <int>] # max number of HOG window scales\n"
<< " [--win_width <int>] # width of the window (48 or 64)\n"
<< " [--win_stride_width <int>] # distance by OX axis between neighbour wins\n"
<< " [--win_stride_height <int>] # distance by OY axis between neighbour wins\n"
<< " [--gr_threshold <int>] # merging similar rects constant\n"
<< " [--gamma_correct <int>] # do gamma correction or not\n"
<< " [--write_video <bool>] # write video or not\n"
<< " [--dst_video <path>] # output video path\n"
<< " [--dst_video_fps <double>] # output video fps\n";
help_showed = true;
}
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
const char* keys =
"{ h | help | false | print help message }"
"{ i | input | | specify input image}"
"{ c | camera | -1 | enable camera capturing }"
"{ v | video | | use video as input }"
"{ g | gray | false | convert image to gray one or not}"
"{ s | scale | 1.0 | resize the image before detect}"
"{ l |larger_win| false | use 64x128 window}"
"{ o | output | | specify output path when input is images}";
CommandLineParser cmd(argc, argv, keys);
App app(cmd);
try try
{ {
if (argc < 2)
printHelp();
Args args = Args::read(argc, argv);
if (help_showed)
return -1;
App app(args);
app.run(); app.run();
} }
catch (const Exception& e) { return cout << "error: " << e.what() << endl, 1; } catch (const Exception& e)
catch (const exception& e) { return cout << "error: " << e.what() << endl, 1; }
catch(...) { return cout << "unknown exception" << endl, 1; }
return 0;
}
Args::Args()
{
src_is_video = false;
src_is_camera = false;
camera_id = 0;
write_video = false;
dst_video_fps = 24.;
make_gray = false;
resize_src = false;
width = 640;
height = 480;
scale = 1.05;
nlevels = 13;
gr_threshold = 8;
hit_threshold = 1.4;
hit_threshold_auto = true;
win_width = 48;
win_stride_width = 8;
win_stride_height = 8;
gamma_corr = true;
}
Args Args::read(int argc, char** argv)
{
Args args;
for (int i = 1; i < argc; i++)
{ {
if (string(argv[i]) == "--make_gray") args.make_gray = (string(argv[++i]) == "true"); return cout << "error: " << e.what() << endl, 1;
else if (string(argv[i]) == "--resize_src") args.resize_src = (string(argv[++i]) == "true"); }
else if (string(argv[i]) == "--width") args.width = atoi(argv[++i]); catch (const exception& e)
else if (string(argv[i]) == "--height") args.height = atoi(argv[++i]);
else if (string(argv[i]) == "--hit_threshold")
{ {
args.hit_threshold = atof(argv[++i]); return cout << "error: " << e.what() << endl, 1;
args.hit_threshold_auto = false;
} }
else if (string(argv[i]) == "--scale") args.scale = atof(argv[++i]); catch(...)
else if (string(argv[i]) == "--nlevels") args.nlevels = atoi(argv[++i]); {
else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]); return cout << "unknown exception" << endl, 1;
else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]);
else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]);
else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]);
else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--dst_video") args.dst_video = argv[++i];
else if (string(argv[i]) == "--dst_video_fps") args.dst_video_fps = atof(argv[++i]);
else if (string(argv[i]) == "--help") printHelp();
else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; }
else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; }
else if (args.src.empty()) args.src = argv[i];
else throw runtime_error((string("unknown key: ") + argv[i]));
} }
return args; return 0;
} }
App::App(CommandLineParser& cmd)
App::App(const Args& s)
{ {
args = s;
cout << "\nControls:\n" cout << "\nControls:\n"
<< "\tESC - exit\n" << "\tESC - exit\n"
<< "\tm - change mode GPU <-> CPU\n" << "\tm - change mode GPU <-> CPU\n"
@ -210,55 +104,55 @@ App::App(const Args& s)
<< "\t4/r - increase/decrease hit threshold\n" << "\t4/r - increase/decrease hit threshold\n"
<< endl; << endl;
use_gpu = true;
make_gray = args.make_gray;
scale = args.scale;
gr_threshold = args.gr_threshold;
nlevels = args.nlevels;
if (args.hit_threshold_auto) use_gpu = true;
args.hit_threshold = args.win_width == 48 ? 1.4 : 0.; make_gray = cmd.get<bool>("g");
hit_threshold = args.hit_threshold; resize_scale = cmd.get<double>("s");
win_width = cmd.get<bool>("l") == true ? 64 : 48;
gamma_corr = args.gamma_corr; vdo_source = cmd.get<string>("v");
img_source = cmd.get<string>("i");
output = cmd.get<string>("o");
camera_id = cmd.get<int>("c");
if (args.win_width != 64 && args.win_width != 48) win_stride_width = 8;
args.win_width = 64; win_stride_height = 8;
gr_threshold = 8;
nlevels = 13;
hit_threshold = win_width == 48 ? 1.4 : 0.;
scale = 1.05;
gamma_corr = true;
cout << "Scale: " << scale << endl;
if (args.resize_src)
cout << "Resized source: (" << args.width << ", " << args.height << ")\n";
cout << "Group threshold: " << gr_threshold << endl; cout << "Group threshold: " << gr_threshold << endl;
cout << "Levels number: " << nlevels << endl; cout << "Levels number: " << nlevels << endl;
cout << "Win width: " << args.win_width << endl; cout << "Win width: " << win_width << endl;
cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n"; cout << "Win stride: (" << win_stride_width << ", " << win_stride_height << ")\n";
cout << "Hit threshold: " << hit_threshold << endl; cout << "Hit threshold: " << hit_threshold << endl;
cout << "Gamma correction: " << gamma_corr << endl; cout << "Gamma correction: " << gamma_corr << endl;
cout << endl; cout << endl;
} }
void App::run() void App::run()
{ {
std::vector<ocl::Info> oclinfo; vector<ocl::Info> oclinfo;
ocl::getDevice(oclinfo); ocl::getDevice(oclinfo);
running = true; running = true;
cv::VideoWriter video_writer; VideoWriter video_writer;
Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96) Size win_size(win_width, win_width * 2);
Size win_stride(args.win_stride_width, args.win_stride_height); Size win_stride(win_stride_width, win_stride_height);
// Create HOG descriptors and detectors here // Create HOG descriptors and detectors here
vector<float> detector; vector<float> detector;
if (win_size == Size(64, 128)) if (win_size == Size(64, 128))
detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128(); detector = ocl::HOGDescriptor::getPeopleDetector64x128();
else else
detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96(); detector = ocl::HOGDescriptor::getPeopleDetector48x96();
cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
cv::ocl::HOGDescriptor::DEFAULT_NLEVELS); ocl::HOGDescriptor::DEFAULT_NLEVELS);
cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS); HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
gpu_hog.setSVMDetector(detector); gpu_hog.setSVMDetector(detector);
cpu_hog.setSVMDetector(detector); cpu_hog.setSVMDetector(detector);
@ -268,29 +162,29 @@ void App::run()
VideoCapture vc; VideoCapture vc;
Mat frame; Mat frame;
if (args.src_is_video) if (vdo_source!="")
{ {
vc.open(args.src.c_str()); vc.open(vdo_source.c_str());
if (!vc.isOpened()) if (!vc.isOpened())
throw runtime_error(string("can't open video file: " + args.src)); throw runtime_error(string("can't open video file: " + vdo_source));
vc >> frame; vc >> frame;
} }
else if (args.src_is_camera) else if (camera_id != -1)
{ {
vc.open(args.camera_id); vc.open(camera_id);
if (!vc.isOpened()) if (!vc.isOpened())
{ {
stringstream msg; stringstream msg;
msg << "can't open camera: " << args.camera_id; msg << "can't open camera: " << camera_id;
throw runtime_error(msg.str()); throw runtime_error(msg.str());
} }
vc >> frame; vc >> frame;
} }
else else
{ {
frame = imread(args.src); frame = imread(img_source);
if (frame.empty()) if (frame.empty())
throw runtime_error(string("can't open image file: " + args.src)); throw runtime_error(string("can't open image file: " + img_source));
} }
Mat img_aux, img, img_to_show; Mat img_aux, img, img_to_show;
@ -308,13 +202,15 @@ void App::run()
else frame.copyTo(img_aux); else frame.copyTo(img_aux);
// Resize image // Resize image
if (args.resize_src) resize(img_aux, img, Size(args.width, args.height)); if (abs(scale-1.0)>0.001)
{
Size sz((int)((double)img_aux.cols/resize_scale), (int)((double)img_aux.rows/resize_scale));
resize(img_aux, img, sz);
}
else img = img_aux; else img = img_aux;
img_to_show = img; img_to_show = img;
gpu_hog.nlevels = nlevels; gpu_hog.nlevels = nlevels;
cpu_hog.nlevels = nlevels; cpu_hog.nlevels = nlevels;
vector<Rect> found; vector<Rect> found;
// Perform HOG classification // Perform HOG classification
@ -340,6 +236,7 @@ void App::run()
Size(0, 0), scale, gr_threshold); Size(0, 0), scale, gr_threshold);
hogWorkEnd(); hogWorkEnd();
// Draw positive classified windows // Draw positive classified windows
for (size_t i = 0; i < found.size(); i++) for (size_t i = 0; i < found.size(); i++)
{ {
@ -354,16 +251,21 @@ void App::run()
putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
imshow("opencv_gpu_hog", img_to_show); imshow("opencv_gpu_hog", img_to_show);
if (vdo_source!="" || camera_id!=-1) vc >> frame;
if (args.src_is_video || args.src_is_camera) vc >> frame;
workEnd(); workEnd();
if (args.write_video) if (output!="")
{
if (img_source!="") // wirte image
{
imwrite(output, img_to_show);
}
else //write video
{ {
if (!video_writer.isOpened()) if (!video_writer.isOpened())
{ {
video_writer.open(args.dst_video, VideoWriter::fourcc('x','v','i','d'), args.dst_video_fps, video_writer.open(output, VideoWriter::fourcc('x','v','i','d'), 24,
img_to_show.size(), true); img_to_show.size(), true);
if (!video_writer.isOpened()) if (!video_writer.isOpened())
throw std::runtime_error("can't create video writer"); throw std::runtime_error("can't create video writer");
@ -374,13 +276,13 @@ void App::run()
video_writer << img; video_writer << img;
} }
}
handleKey((char)waitKey(3)); handleKey((char)waitKey(3));
} }
} }
} }
void App::handleKey(char key) void App::handleKey(char key)
{ {
switch (key) switch (key)
@ -443,7 +345,10 @@ void App::handleKey(char key)
} }
inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); } inline void App::hogWorkBegin()
{
hog_work_begin = getTickCount();
}
inline void App::hogWorkEnd() inline void App::hogWorkEnd()
{ {
@ -459,8 +364,10 @@ inline string App::hogWorkFps() const
return ss.str(); return ss.str();
} }
inline void App::workBegin()
inline void App::workBegin() { work_begin = getTickCount(); } {
work_begin = getTickCount();
}
inline void App::workEnd() inline void App::workEnd()
{ {
@ -476,6 +383,7 @@ inline string App::workFps() const
return ss.str(); return ss.str();
} }
double App::checkRectSimilarity(Size sz, double App::checkRectSimilarity(Size sz,
std::vector<Rect>& ob1, std::vector<Rect>& ob1,
std::vector<Rect>& ob2) std::vector<Rect>& ob2)
@ -485,12 +393,17 @@ double App::checkRectSimilarity(Size sz,
size_t sz2 = ob2.size(); size_t sz2 = ob2.size();
if(sz1 != sz2) if(sz1 != sz2)
{
return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
}
else else
{ {
if(sz1==0 && sz2==0)
return 0;
cv::Mat cpu_result(sz, CV_8UC1); cv::Mat cpu_result(sz, CV_8UC1);
cpu_result.setTo(0); cpu_result.setTo(0);
for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++) for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
{ {
cv::Mat cpu_result_roi(cpu_result, *r); cv::Mat cpu_result_roi(cpu_result, *r);
@ -499,6 +412,7 @@ double App::checkRectSimilarity(Size sz,
} }
int cpu_area = cv::countNonZero(cpu_result > 0); int cpu_area = cv::countNonZero(cpu_result > 0);
cv::Mat gpu_result(sz, CV_8UC1); cv::Mat gpu_result(sz, CV_8UC1);
gpu_result.setTo(0); gpu_result.setTo(0);
for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
@ -511,10 +425,10 @@ double App::checkRectSimilarity(Size sz,
cv::Mat result_; cv::Mat result_;
multiply(cpu_result, gpu_result, result_); multiply(cpu_result, gpu_result, result_);
int result = cv::countNonZero(result_ > 0); int result = cv::countNonZero(result_ > 0);
if(cpu_area!=0 && result!=0)
final_test_result = 1.0 - (double)result/(double)cpu_area; final_test_result = 1.0 - (double)result/(double)cpu_area;
else if(cpu_area==0 && result!=0)
final_test_result = -1;
} }
return final_test_result; return final_test_result;
} }

@ -24,7 +24,8 @@ static void workEnd()
{ {
work_end += (getTickCount() - work_begin); work_end += (getTickCount() - work_begin);
} }
static double getTime(){ static double getTime()
{
return work_end * 1000. / getTickFrequency(); return work_end * 1000. / getTickFrequency();
} }
@ -100,6 +101,7 @@ int main(int argc, const char* argv[])
"{ camera c | 0 | enable camera capturing }" "{ camera c | 0 | enable camera capturing }"
"{ use_cpu s | false | use cpu or gpu to process the image }" "{ use_cpu s | false | use cpu or gpu to process the image }"
"{ video v | | use video as input }" "{ video v | | use video as input }"
"{ output o | pyrlk_output.jpg| specify output save path when input is images }"
"{ points | 1000 | specify points count [GoodFeatureToTrack] }" "{ points | 1000 | specify points count [GoodFeatureToTrack] }"
"{ min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }"; "{ min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }";
@ -115,10 +117,10 @@ int main(int argc, const char* argv[])
string fname0 = cmd.get<string>("left"); string fname0 = cmd.get<string>("left");
string fname1 = cmd.get<string>("right"); string fname1 = cmd.get<string>("right");
string vdofile = cmd.get<string>("video"); string vdofile = cmd.get<string>("video");
string outfile = cmd.get<string>("output");
int points = cmd.get<int>("points"); int points = cmd.get<int>("points");
double minDist = cmd.get<double>("min_dist"); double minDist = cmd.get<double>("min_dist");
bool useCPU = cmd.has("s"); bool useCPU = cmd.has("s");
bool useCamera = cmd.has("c");
int inputName = cmd.get<int>("c"); int inputName = cmd.get<int>("c");
oclMat d_nextPts, d_status; oclMat d_nextPts, d_status;
@ -131,21 +133,9 @@ int main(int argc, const char* argv[])
vector<unsigned char> status(points); vector<unsigned char> status(points);
vector<float> err; vector<float> err;
if (frame0.empty() || frame1.empty())
{
useCamera = true;
defaultPicturesFail = true;
VideoCapture capture(inputName);
if (!capture.isOpened())
{
cout << "Can't load input images" << endl;
return -1;
}
}
cout << "Points count : " << points << endl << endl; cout << "Points count : " << points << endl << endl;
if (useCamera) if (frame0.empty() || frame1.empty())
{ {
VideoCapture capture; VideoCapture capture;
Mat frame, frameCopy; Mat frame, frameCopy;
@ -238,7 +228,7 @@ _cleanup_:
else else
{ {
nocamera: nocamera:
for(int i = 0; i <= LOOP_NUM;i ++) for(int i = 0; i <= LOOP_NUM; i ++)
{ {
cout << "loop" << i << endl; cout << "loop" << i << endl;
if (i > 0) workBegin(); if (i > 0) workBegin();
@ -271,8 +261,8 @@ nocamera:
cout << getTime() / LOOP_NUM << " ms" << endl; cout << getTime() / LOOP_NUM << " ms" << endl;
drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0)); drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0));
imshow("PyrLK [Sparse]", frame0); imshow("PyrLK [Sparse]", frame0);
imwrite(outfile, frame0);
} }
} }
} }

@ -2,11 +2,11 @@
// It loads several images sequentially and tries to find squares in // It loads several images sequentially and tries to find squares in
// each image // each image
#include "opencv2/core/core.hpp" #include "opencv2/core.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/imgproc/imgproc.hpp" #include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp" #include "opencv2/highgui/highgui.hpp"
#include "opencv2/ocl/ocl.hpp" #include "opencv2/ocl/ocl.hpp"
#include <iostream> #include <iostream>
#include <math.h> #include <math.h>
#include <string.h> #include <string.h>
@ -14,23 +14,50 @@
using namespace cv; using namespace cv;
using namespace std; using namespace std;
static void help() #define ACCURACY_CHECK 1
#if ACCURACY_CHECK
// check if two vectors of vector of points are near or not
// prior assumption is that they are in correct order
static bool checkPoints(
vector< vector<Point> > set1,
vector< vector<Point> > set2,
int maxDiff = 5)
{ {
cout << if(set1.size() != set2.size())
"\nA program using OCL module pyramid scaling, Canny, dilate functions, threshold, split; cpu contours, contour simpification and\n" {
"memory storage (it's got it all folks) to find\n" return false;
"squares in a list of images pic1-6.png\n" }
"Returns sequence of squares detected on the image.\n"
"the sequence is stored in the specified memory storage\n" for(vector< vector<Point> >::iterator it1 = set1.begin(), it2 = set2.begin();
"Call:\n" it1 < set1.end() && it2 < set2.end(); it1 ++, it2 ++)
"./squares\n" {
"Using OpenCV version %s\n" << CV_VERSION << "\n" << endl; vector<Point> pts1 = *it1;
} vector<Point> pts2 = *it2;
if(pts1.size() != pts2.size())
{
return false;
}
for(size_t i = 0; i < pts1.size(); i ++)
{
Point pt1 = pts1[i], pt2 = pts2[i];
if(std::abs(pt1.x - pt2.x) > maxDiff ||
std::abs(pt1.y - pt2.y) > maxDiff)
{
return false;
}
}
}
return true;
}
#endif
int thresh = 50, N = 11; int thresh = 50, N = 11;
const char* wndname = "OpenCL Square Detection Demo"; const char* wndname = "OpenCL Square Detection Demo";
// helper function: // helper function:
// finds a cosine of angle between vectors // finds a cosine of angle between vectors
// from pt0->pt1 and from pt0->pt2 // from pt0->pt1 and from pt0->pt2
@ -43,9 +70,92 @@ static double angle( Point pt1, Point pt2, Point pt0 )
return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10); return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
} }
// returns sequence of squares detected on the image. // returns sequence of squares detected on the image.
// the sequence is stored in the specified memory storage // the sequence is stored in the specified memory storage
static void findSquares( const Mat& image, vector<vector<Point> >& squares ) static void findSquares( const Mat& image, vector<vector<Point> >& squares )
{
squares.clear();
Mat pyr, timg, gray0(image.size(), CV_8U), gray;
// down-scale and upscale the image to filter out the noise
pyrDown(image, pyr, Size(image.cols/2, image.rows/2));
pyrUp(pyr, timg, image.size());
vector<vector<Point> > contours;
// find squares in every color plane of the image
for( int c = 0; c < 3; c++ )
{
int ch[] = {c, 0};
mixChannels(&timg, 1, &gray0, 1, ch, 1);
// try several threshold levels
for( int l = 0; l < N; l++ )
{
// hack: use Canny instead of zero threshold level.
// Canny helps to catch squares with gradient shading
if( l == 0 )
{
// apply Canny. Take the upper threshold from slider
// and set the lower to 0 (which forces edges merging)
Canny(gray0, gray, 0, thresh, 5);
// dilate canny output to remove potential
// holes between edge segments
dilate(gray, gray, Mat(), Point(-1,-1));
}
else
{
// apply threshold if l!=0:
// tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY);
}
// find contours and store them all as a list
findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
vector<Point> approx;
// test each contour
for( size_t i = 0; i < contours.size(); i++ )
{
// approximate contour with accuracy proportional
// to the contour perimeter
approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
// square contours should have 4 vertices after approximation
// relatively large area (to filter out noisy contours)
// and be convex.
// Note: absolute value of an area is used because
// area may be positive or negative - in accordance with the
// contour orientation
if( approx.size() == 4 &&
fabs(contourArea(Mat(approx))) > 1000 &&
isContourConvex(Mat(approx)) )
{
double maxCosine = 0;
for( int j = 2; j < 5; j++ )
{
// find the maximum cosine of the angle between joint edges
double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
maxCosine = MAX(maxCosine, cosine);
}
// if cosines of all angles are small
// (all angles are ~90 degree) then write quandrange
// vertices to resultant sequence
if( maxCosine < 0.3 )
squares.push_back(approx);
}
}
}
}
}
// returns sequence of squares detected on the image.
// the sequence is stored in the specified memory storage
static void findSquares_ocl( const Mat& image, vector<vector<Point> >& squares )
{ {
squares.clear(); squares.clear();
@ -91,7 +201,6 @@ static void findSquares( const Mat& image, vector<vector<Point> >& squares )
findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE); findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
vector<Point> approx; vector<Point> approx;
// test each contour // test each contour
for( size_t i = 0; i < contours.size(); i++ ) for( size_t i = 0; i < contours.size(); i++ )
{ {
@ -110,7 +219,6 @@ static void findSquares( const Mat& image, vector<vector<Point> >& squares )
isContourConvex(Mat(approx)) ) isContourConvex(Mat(approx)) )
{ {
double maxCosine = 0; double maxCosine = 0;
for( int j = 2; j < 5; j++ ) for( int j = 2; j < 5; j++ )
{ {
// find the maximum cosine of the angle between joint edges // find the maximum cosine of the angle between joint edges
@ -139,40 +247,93 @@ static void drawSquares( Mat& image, const vector<vector<Point> >& squares )
int n = (int)squares[i].size(); int n = (int)squares[i].size();
polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, LINE_AA); polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, LINE_AA);
} }
}
imshow(wndname, image);
// draw both pure-C++ and ocl square results onto a single image
static Mat drawSquaresBoth( const Mat& image,
const vector<vector<Point> >& sqsCPP,
const vector<vector<Point> >& sqsOCL
)
{
Mat imgToShow(Size(image.cols * 2, image.rows), image.type());
Mat lImg = imgToShow(Rect(Point(0, 0), image.size()));
Mat rImg = imgToShow(Rect(Point(image.cols, 0), image.size()));
image.copyTo(lImg);
image.copyTo(rImg);
drawSquares(lImg, sqsCPP);
drawSquares(rImg, sqsOCL);
float fontScale = 0.8f;
Scalar white = Scalar::all(255), black = Scalar::all(0);
putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
return imgToShow;
} }
int main(int /*argc*/, char** /*argv*/) int main(int argc, char** argv)
{ {
const char* keys =
"{ i | input | | specify input image }"
"{ o | output | squares_output.jpg | specify output save path}";
CommandLineParser cmd(argc, argv, keys);
string inputName = cmd.get<string>("i");
string outfile = cmd.get<string>("o");
if(inputName.empty())
{
cout << "Avaible options:" << endl;
cmd.printMessage();
return 0;
}
//ocl::setBinpath("F:/kernel_bin");
vector<ocl::Info> info; vector<ocl::Info> info;
CV_Assert(ocl::getDevice(info)); CV_Assert(ocl::getDevice(info));
int iterations = 10;
static const char* names[] = { "pic1.png", "pic2.png", "pic3.png",
"pic4.png", "pic5.png", "pic6.png", 0 };
help();
namedWindow( wndname, 1 ); namedWindow( wndname, 1 );
vector<vector<Point> > squares; vector<vector<Point> > squares_cpu, squares_ocl;
for( int i = 0; names[i] != 0; i++ ) Mat image = imread(inputName, 1);
{
Mat image = imread(names[i], 1);
if( image.empty() ) if( image.empty() )
{ {
cout << "Couldn't load " << names[i] << endl; cout << "Couldn't load " << inputName << endl;
continue; return -1;
} }
int j = iterations;
int64 t_ocl = 0, t_cpp = 0;
//warm-ups
cout << "warming up ..." << endl;
findSquares(image, squares_cpu);
findSquares_ocl(image, squares_ocl);
#if ACCURACY_CHECK
cout << "Checking ocl accuracy ... " << endl;
cout << (checkPoints(squares_cpu, squares_ocl) ? "Pass" : "Failed") << endl;
#endif
do
{
int64 t_start = cv::getTickCount();
findSquares(image, squares_cpu);
t_cpp += cv::getTickCount() - t_start;
findSquares(image, squares);
drawSquares(image, squares);
int c = waitKey(); t_start = cv::getTickCount();
if( (char)c == 27 ) findSquares_ocl(image, squares_ocl);
break; t_ocl += cv::getTickCount() - t_start;
cout << "run loop: " << j << endl;
} }
while(--j);
cout << "cpp average time: " << 1000.0f * (double)t_cpp / getTickFrequency() / iterations << "ms" << endl;
cout << "ocl average time: " << 1000.0f * (double)t_ocl / getTickFrequency() / iterations << "ms" << endl;
Mat result = drawSquaresBoth(image, squares_cpu, squares_ocl);
imshow(wndname, result);
imwrite(outfile, result);
waitKey(0);
return 0; return 0;
} }

@ -12,56 +12,45 @@ using namespace cv;
using namespace std; using namespace std;
using namespace ocl; using namespace ocl;
bool help_showed = false;
struct Params
{
Params();
static Params read(int argc, char** argv);
string left;
string right;
string method_str() const
{
switch (method)
{
case BM: return "BM";
case BP: return "BP";
case CSBP: return "CSBP";
}
return "";
}
enum {BM, BP, CSBP} method;
int ndisp; // Max disparity + 1
enum {GPU, CPU} type;
};
struct App struct App
{ {
App(const Params& p); App(CommandLineParser& cmd);
void run(); void run();
void handleKey(char key); void handleKey(char key);
void printParams() const; void printParams() const;
void workBegin() { work_begin = getTickCount(); } void workBegin()
{
work_begin = getTickCount();
}
void workEnd() void workEnd()
{ {
int64 d = getTickCount() - work_begin; int64 d = getTickCount() - work_begin;
double f = getTickFrequency(); double f = getTickFrequency();
work_fps = f / d; work_fps = f / d;
} }
string method_str() const
{
switch (method)
{
case BM:
return "BM";
case BP:
return "BP";
case CSBP:
return "CSBP";
}
return "";
}
string text() const string text() const
{ {
stringstream ss; stringstream ss;
ss << "(" << p.method_str() << ") FPS: " << setiosflags(ios::left) ss << "(" << method_str() << ") FPS: " << setiosflags(ios::left)
<< setprecision(4) << work_fps; << setprecision(4) << work_fps;
return ss.str(); return ss.str();
} }
private: private:
Params p;
bool running; bool running;
Mat left_src, right_src; Mat left_src, right_src;
@ -74,42 +63,45 @@ private:
int64 work_begin; int64 work_begin;
double work_fps; double work_fps;
};
static void printHelp() string l_img, r_img;
{ string out_img;
cout << "Usage: stereo_match_gpu\n" enum {BM, BP, CSBP} method;
<< "\t--left <left_view> --right <right_view> # must be rectified\n" int ndisp; // Max disparity + 1
<< "\t--method <stereo_match_method> # BM | BP | CSBP\n" enum {GPU, CPU} type;
<< "\t--ndisp <number> # number of disparity levels\n" };
<< "\t--type <device_type> # cpu | CPU | gpu | GPU\n";
help_showed = true;
}
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
const char* keys =
"{ h | help | false | print help message }"
"{ l | left | | specify left image }"
"{ r | right | | specify right image }"
"{ m | method | BM | specify match method(BM/BP/CSBP) }"
"{ n | ndisp | 64 | specify number of disparity levels }"
"{ s | cpu_ocl | false | use cpu or gpu as ocl device to process the image }"
"{ o | output | stereo_match_output.jpg | specify output path when input is images}";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help"))
{
cout << "Avaible options:" << endl;
cmd.printMessage();
return 0;
}
try try
{ {
if (argc < 2) App app(cmd);
{ int flag = CVCL_DEVICE_TYPE_GPU;
printHelp(); if(cmd.get<bool>("s") == true)
return 1; flag = CVCL_DEVICE_TYPE_CPU;
}
Params args = Params::read(argc, argv);
if (help_showed)
return -1;
int flags[2] = { CVCL_DEVICE_TYPE_GPU, CVCL_DEVICE_TYPE_CPU };
vector<Info> info; vector<Info> info;
if(getDevice(info, flag) == 0)
if(getDevice(info, flags[args.type]) == 0)
{ {
throw runtime_error("Error: Did not find a valid OpenCL device!"); throw runtime_error("Error: Did not find a valid OpenCL device!");
} }
cout << "Device name:" << info[0].DeviceName[0] << endl; cout << "Device name:" << info[0].DeviceName[0] << endl;
App app(args);
app.run(); app.run();
} }
catch (const exception& e) catch (const exception& e)
@ -119,55 +111,8 @@ int main(int argc, char** argv)
return 0; return 0;
} }
App::App(CommandLineParser& cmd)
Params::Params() : running(false),method(BM)
{
method = BM;
ndisp = 64;
type = GPU;
}
Params Params::read(int argc, char** argv)
{
Params p;
for (int i = 1; i < argc; i++)
{
if (string(argv[i]) == "--left") p.left = argv[++i];
else if (string(argv[i]) == "--right") p.right = argv[++i];
else if (string(argv[i]) == "--method")
{
if (string(argv[i + 1]) == "BM") p.method = BM;
else if (string(argv[i + 1]) == "BP") p.method = BP;
else if (string(argv[i + 1]) == "CSBP") p.method = CSBP;
else throw runtime_error("unknown stereo match method: " + string(argv[i + 1]));
i++;
}
else if (string(argv[i]) == "--ndisp") p.ndisp = atoi(argv[++i]);
else if (string(argv[i]) == "--type")
{
string t(argv[++i]);
if (t == "cpu" || t == "CPU")
{
p.type = CPU;
}
else if (t == "gpu" || t == "GPU")
{
p.type = GPU;
}
else throw runtime_error("unknown device type: " + t);
}
else if (string(argv[i]) == "--help") printHelp();
else throw runtime_error("unknown key: " + string(argv[i]));
}
return p;
}
App::App(const Params& params)
: p(params), running(false)
{ {
cout << "stereo_match_ocl sample\n"; cout << "stereo_match_ocl sample\n";
cout << "\nControls:\n" cout << "\nControls:\n"
@ -180,16 +125,25 @@ App::App(const Params& params)
<< "\t2/w - increase/decrease window size (for BM only)\n" << "\t2/w - increase/decrease window size (for BM only)\n"
<< "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n" << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n"
<< "\t4/r - increase/decrease level count (for BP and CSBP only)\n"; << "\t4/r - increase/decrease level count (for BP and CSBP only)\n";
l_img = cmd.get<string>("l");
r_img = cmd.get<string>("r");
string mstr = cmd.get<string>("m");
if(mstr == "BM") method = BM;
else if(mstr == "BP") method = BP;
else if(mstr == "CSBP") method = CSBP;
else cout << "unknown method!\n";
ndisp = cmd.get<int>("n");
out_img = cmd.get<string>("o");
} }
void App::run() void App::run()
{ {
// Load images // Load images
left_src = imread(p.left); left_src = imread(l_img);
right_src = imread(p.right); right_src = imread(r_img);
if (left_src.empty()) throw runtime_error("can't open file \"" + p.left + "\""); if (left_src.empty()) throw runtime_error("can't open file \"" + l_img + "\"");
if (right_src.empty()) throw runtime_error("can't open file \"" + p.right + "\""); if (right_src.empty()) throw runtime_error("can't open file \"" + r_img + "\"");
cvtColor(left_src, left, COLOR_BGR2GRAY); cvtColor(left_src, left, COLOR_BGR2GRAY);
cvtColor(right_src, right, COLOR_BGR2GRAY); cvtColor(right_src, right, COLOR_BGR2GRAY);
@ -201,14 +155,15 @@ void App::run()
imshow("right", right); imshow("right", right);
// Set common parameters // Set common parameters
bm.ndisp = p.ndisp; bm.ndisp = ndisp;
bp.ndisp = p.ndisp; bp.ndisp = ndisp;
csbp.ndisp = p.ndisp; csbp.ndisp = ndisp;
cout << endl; cout << endl;
printParams(); printParams();
running = true; running = true;
bool written = false;
while (running) while (running)
{ {
@ -216,9 +171,9 @@ void App::run()
Mat disp; Mat disp;
oclMat d_disp; oclMat d_disp;
workBegin(); workBegin();
switch (p.method) switch (method)
{ {
case Params::BM: case BM:
if (d_left.channels() > 1 || d_right.channels() > 1) if (d_left.channels() > 1 || d_right.channels() > 1)
{ {
cout << "BM doesn't support color images\n"; cout << "BM doesn't support color images\n";
@ -232,25 +187,27 @@ void App::run()
} }
bm(d_left, d_right, d_disp); bm(d_left, d_right, d_disp);
break; break;
case Params::BP: case BP:
bp(d_left, d_right, d_disp); bp(d_left, d_right, d_disp);
break; break;
case Params::CSBP: case CSBP:
csbp(d_left, d_right, d_disp); csbp(d_left, d_right, d_disp);
break; break;
} }
ocl::finish();
workEnd();
// Show results // Show results
d_disp.download(disp); d_disp.download(disp);
if (p.method != Params::BM) workEnd();
if (method != BM)
{ {
disp.convertTo(disp, 0); disp.convertTo(disp, 0);
} }
putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255)); putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255));
imshow("disparity", disp); imshow("disparity", disp);
if(!written)
{
imwrite(out_img, disp);
written = true;
}
handleKey((char)waitKey(3)); handleKey((char)waitKey(3));
} }
} }
@ -261,19 +218,19 @@ void App::printParams() const
cout << "--- Parameters ---\n"; cout << "--- Parameters ---\n";
cout << "image_size: (" << left.cols << ", " << left.rows << ")\n"; cout << "image_size: (" << left.cols << ", " << left.rows << ")\n";
cout << "image_channels: " << left.channels() << endl; cout << "image_channels: " << left.channels() << endl;
cout << "method: " << p.method_str() << endl cout << "method: " << method_str() << endl
<< "ndisp: " << p.ndisp << endl; << "ndisp: " << ndisp << endl;
switch (p.method) switch (method)
{ {
case Params::BM: case BM:
cout << "win_size: " << bm.winSize << endl; cout << "win_size: " << bm.winSize << endl;
cout << "prefilter_sobel: " << bm.preset << endl; cout << "prefilter_sobel: " << bm.preset << endl;
break; break;
case Params::BP: case BP:
cout << "iter_count: " << bp.iters << endl; cout << "iter_count: " << bp.iters << endl;
cout << "level_count: " << bp.levels << endl; cout << "level_count: " << bp.levels << endl;
break; break;
case Params::CSBP: case CSBP:
cout << "iter_count: " << csbp.iters << endl; cout << "iter_count: " << csbp.iters << endl;
cout << "level_count: " << csbp.levels << endl; cout << "level_count: " << csbp.levels << endl;
break; break;
@ -289,11 +246,13 @@ void App::handleKey(char key)
case 27: case 27:
running = false; running = false;
break; break;
case 'p': case 'P': case 'p':
case 'P':
printParams(); printParams();
break; break;
case 'g': case 'G': case 'g':
if (left.channels() == 1 && p.method != Params::BM) case 'G':
if (left.channels() == 1 && method != BM)
{ {
left = left_src; left = left_src;
right = right_src; right = right_src;
@ -309,23 +268,25 @@ void App::handleKey(char key)
imshow("left", left); imshow("left", left);
imshow("right", right); imshow("right", right);
break; break;
case 'm': case 'M': case 'm':
switch (p.method) case 'M':
switch (method)
{ {
case Params::BM: case BM:
p.method = Params::BP; method = BP;
break; break;
case Params::BP: case BP:
p.method = Params::CSBP; method = CSBP;
break; break;
case Params::CSBP: case CSBP:
p.method = Params::BM; method = BM;
break; break;
} }
cout << "method: " << p.method_str() << endl; cout << "method: " << method_str() << endl;
break; break;
case 's': case 'S': case 's':
if (p.method == Params::BM) case 'S':
if (method == BM)
{ {
switch (bm.preset) switch (bm.preset)
{ {
@ -340,76 +301,80 @@ void App::handleKey(char key)
} }
break; break;
case '1': case '1':
p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8; ndisp == 1 ? ndisp = 8 : ndisp += 8;
cout << "ndisp: " << p.ndisp << endl; cout << "ndisp: " << ndisp << endl;
bm.ndisp = p.ndisp; bm.ndisp = ndisp;
bp.ndisp = p.ndisp; bp.ndisp = ndisp;
csbp.ndisp = p.ndisp; csbp.ndisp = ndisp;
break; break;
case 'q': case 'Q': case 'q':
p.ndisp = max(p.ndisp - 8, 1); case 'Q':
cout << "ndisp: " << p.ndisp << endl; ndisp = max(ndisp - 8, 1);
bm.ndisp = p.ndisp; cout << "ndisp: " << ndisp << endl;
bp.ndisp = p.ndisp; bm.ndisp = ndisp;
csbp.ndisp = p.ndisp; bp.ndisp = ndisp;
csbp.ndisp = ndisp;
break; break;
case '2': case '2':
if (p.method == Params::BM) if (method == BM)
{ {
bm.winSize = min(bm.winSize + 1, 51); bm.winSize = min(bm.winSize + 1, 51);
cout << "win_size: " << bm.winSize << endl; cout << "win_size: " << bm.winSize << endl;
} }
break; break;
case 'w': case 'W': case 'w':
if (p.method == Params::BM) case 'W':
if (method == BM)
{ {
bm.winSize = max(bm.winSize - 1, 2); bm.winSize = max(bm.winSize - 1, 2);
cout << "win_size: " << bm.winSize << endl; cout << "win_size: " << bm.winSize << endl;
} }
break; break;
case '3': case '3':
if (p.method == Params::BP) if (method == BP)
{ {
bp.iters += 1; bp.iters += 1;
cout << "iter_count: " << bp.iters << endl; cout << "iter_count: " << bp.iters << endl;
} }
else if (p.method == Params::CSBP) else if (method == CSBP)
{ {
csbp.iters += 1; csbp.iters += 1;
cout << "iter_count: " << csbp.iters << endl; cout << "iter_count: " << csbp.iters << endl;
} }
break; break;
case 'e': case 'E': case 'e':
if (p.method == Params::BP) case 'E':
if (method == BP)
{ {
bp.iters = max(bp.iters - 1, 1); bp.iters = max(bp.iters - 1, 1);
cout << "iter_count: " << bp.iters << endl; cout << "iter_count: " << bp.iters << endl;
} }
else if (p.method == Params::CSBP) else if (method == CSBP)
{ {
csbp.iters = max(csbp.iters - 1, 1); csbp.iters = max(csbp.iters - 1, 1);
cout << "iter_count: " << csbp.iters << endl; cout << "iter_count: " << csbp.iters << endl;
} }
break; break;
case '4': case '4':
if (p.method == Params::BP) if (method == BP)
{ {
bp.levels += 1; bp.levels += 1;
cout << "level_count: " << bp.levels << endl; cout << "level_count: " << bp.levels << endl;
} }
else if (p.method == Params::CSBP) else if (method == CSBP)
{ {
csbp.levels += 1; csbp.levels += 1;
cout << "level_count: " << csbp.levels << endl; cout << "level_count: " << csbp.levels << endl;
} }
break; break;
case 'r': case 'R': case 'r':
if (p.method == Params::BP) case 'R':
if (method == BP)
{ {
bp.levels = max(bp.levels - 1, 1); bp.levels = max(bp.levels - 1, 1);
cout << "level_count: " << bp.levels << endl; cout << "level_count: " << bp.levels << endl;
} }
else if (p.method == Params::CSBP) else if (method == CSBP)
{ {
csbp.levels = max(csbp.levels - 1, 1); csbp.levels = max(csbp.levels - 1, 1);
cout << "level_count: " << csbp.levels << endl; cout << "level_count: " << csbp.levels << endl;
@ -417,5 +382,3 @@ void App::handleKey(char key)
break; break;
} }
} }

@ -1,48 +1,3 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iostream> #include <iostream>
#include <stdio.h> #include <stdio.h>
#include "opencv2/core/core.hpp" #include "opencv2/core/core.hpp"
@ -62,14 +17,6 @@ const float GOOD_PORTION = 0.15f;
namespace namespace
{ {
void help();
void help()
{
std::cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << std::endl;
std::cout << "\nUsage:\n\tsurf_matcher --left <image1> --right <image2> [-c]" << std::endl;
std::cout << "\nExample:\n\tsurf_matcher --left box.png --right box_in_scene.png" << std::endl;
}
int64 work_begin = 0; int64 work_begin = 0;
int64 work_end = 0; int64 work_end = 0;
@ -82,7 +29,8 @@ void workEnd()
{ {
work_end = getTickCount() - work_begin; work_end = getTickCount() - work_begin;
} }
double getTime(){ double getTime()
{
return work_end /((double)getTickFrequency() * 1000.); return work_end /((double)getTickFrequency() * 1000.);
} }
@ -155,8 +103,10 @@ Mat drawGoodMatches(
} }
//-- Get the corners from the image_1 ( the object to be "detected" ) //-- Get the corners from the image_1 ( the object to be "detected" )
std::vector<Point2f> obj_corners(4); std::vector<Point2f> obj_corners(4);
obj_corners[0] = Point(0,0); obj_corners[1] = Point( cpu_img1.cols, 0 ); obj_corners[0] = Point(0,0);
obj_corners[2] = Point( cpu_img1.cols, cpu_img1.rows ); obj_corners[3] = Point( 0, cpu_img1.rows ); obj_corners[1] = Point( cpu_img1.cols, 0 );
obj_corners[2] = Point( cpu_img1.cols, cpu_img1.rows );
obj_corners[3] = Point( 0, cpu_img1.rows );
std::vector<Point2f> scene_corners(4); std::vector<Point2f> scene_corners(4);
Mat H = findHomography( obj, scene, RANSAC ); Mat H = findHomography( obj, scene, RANSAC );
@ -186,6 +136,21 @@ Mat drawGoodMatches(
// use cpu findHomography interface to calculate the transformation matrix // use cpu findHomography interface to calculate the transformation matrix
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
const char* keys =
"{ help h | false | print help message }"
"{ left l | | specify left image }"
"{ right r | | specify right image }"
"{ output o | SURF_output.jpg | specify output save path (only works in CPU or GPU only mode) }"
"{ use_cpu c | false | use CPU algorithms }"
"{ use_all a | false | use both CPU and GPU algorithms}";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help"))
{
std::cout << "Avaible options:" << std::endl;
cmd.printMessage();
return 0;
}
std::vector<cv::ocl::Info> info; std::vector<cv::ocl::Info> info;
if(cv::ocl::getDevice(info) == 0) if(cv::ocl::getDevice(info) == 0)
{ {
@ -196,48 +161,32 @@ int main(int argc, char* argv[])
Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey; Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey;
oclMat img1, img2; oclMat img1, img2;
bool useCPU = false; bool useCPU = cmd.get<bool>("c");
bool useGPU = false; bool useGPU = false;
bool useALL = false; bool useALL = cmd.get<bool>("a");
for (int i = 1; i < argc; ++i) std::string outpath = cmd.get<std::string>("o");
{
if (String(argv[i]) == "--left") cpu_img1 = imread(cmd.get<std::string>("l"));
{
cpu_img1 = imread(argv[++i]);
CV_Assert(!cpu_img1.empty()); CV_Assert(!cpu_img1.empty());
cvtColor(cpu_img1, cpu_img1_grey, COLOR_BGR2GRAY); cvtColor(cpu_img1, cpu_img1_grey, COLOR_BGR2GRAY);
img1 = cpu_img1_grey; img1 = cpu_img1_grey;
}
else if (String(argv[i]) == "--right") cpu_img2 = imread(cmd.get<std::string>("r"));
{
cpu_img2 = imread(argv[++i]);
CV_Assert(!cpu_img2.empty()); CV_Assert(!cpu_img2.empty());
cvtColor(cpu_img2, cpu_img2_grey, COLOR_BGR2GRAY); cvtColor(cpu_img2, cpu_img2_grey, COLOR_BGR2GRAY);
img2 = cpu_img2_grey; img2 = cpu_img2_grey;
}
else if (String(argv[i]) == "-c") if(useALL)
{
useCPU = true;
useGPU = false;
useALL = false;
}else if(String(argv[i]) == "-g")
{
useGPU = true;
useCPU = false;
useALL = false;
}else if(String(argv[i]) == "-a")
{ {
useALL = true;
useCPU = false; useCPU = false;
useGPU = false; useGPU = false;
} }
else if (String(argv[i]) == "--help") else if(useCPU==false && useALL==false)
{ {
help(); useGPU = true;
return -1;
}
} }
if(!useCPU) if(!useCPU)
{ {
std::cout std::cout
@ -299,7 +248,8 @@ int main(int argc, char* argv[])
surf_time = getTime(); surf_time = getTime();
std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n"; std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
}else }
else
{ {
//cpu runs //cpu runs
for (int i = 0; i <= LOOP_NUM; i++) for (int i = 0; i <= LOOP_NUM; i++)
@ -372,12 +322,15 @@ int main(int argc, char* argv[])
{ {
namedWindow("cpu surf matches", 0); namedWindow("cpu surf matches", 0);
imshow("cpu surf matches", img_matches); imshow("cpu surf matches", img_matches);
imwrite(outpath, img_matches);
} }
else if(useGPU) else if(useGPU)
{ {
namedWindow("ocl surf matches", 0); namedWindow("ocl surf matches", 0);
imshow("ocl surf matches", img_matches); imshow("ocl surf matches", img_matches);
}else imwrite(outpath, img_matches);
}
else
{ {
namedWindow("cpu surf matches", 0); namedWindow("cpu surf matches", 0);
imshow("cpu surf matches", img_matches); imshow("cpu surf matches", img_matches);

@ -0,0 +1,264 @@
#include <iostream>
#include <vector>
#include <iomanip>
#include "opencv2/core/utility.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/ocl/ocl.hpp"
#include "opencv2/video/video.hpp"
using namespace std;
using namespace cv;
using namespace cv::ocl;
typedef unsigned char uchar;
#define LOOP_NUM 10
int64 work_begin = 0;
int64 work_end = 0;
static void workBegin()
{
work_begin = getTickCount();
}
static void workEnd()
{
work_end += (getTickCount() - work_begin);
}
static double getTime()
{
return work_end * 1000. / getTickFrequency();
}
template <typename T> inline T clamp (T x, T a, T b)
{
return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a));
}
template <typename T> inline T mapValue(T x, T a, T b, T c, T d)
{
x = clamp(x, a, b);
return c + (d - c) * (x - a) / (b - a);
}
static void getFlowField(const Mat& u, const Mat& v, Mat& flowField)
{
float maxDisplacement = 1.0f;
for (int i = 0; i < u.rows; ++i)
{
const float* ptr_u = u.ptr<float>(i);
const float* ptr_v = v.ptr<float>(i);
for (int j = 0; j < u.cols; ++j)
{
float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j]));
if (d > maxDisplacement)
maxDisplacement = d;
}
}
flowField.create(u.size(), CV_8UC4);
for (int i = 0; i < flowField.rows; ++i)
{
const float* ptr_u = u.ptr<float>(i);
const float* ptr_v = v.ptr<float>(i);
Vec4b* row = flowField.ptr<Vec4b>(i);
for (int j = 0; j < flowField.cols; ++j)
{
row[j][0] = 0;
row[j][1] = static_cast<unsigned char> (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
row[j][2] = static_cast<unsigned char> (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
row[j][3] = 255;
}
}
}
int main(int argc, const char* argv[])
{
static std::vector<Info> ocl_info;
ocl::getDevice(ocl_info);
//if you want to use undefault device, set it here
setDevice(ocl_info[0]);
//set this to save kernel compile time from second time you run
ocl::setBinpath("./");
const char* keys =
"{ h | help | false | print help message }"
"{ l | left | | specify left image }"
"{ r | right | | specify right image }"
"{ o | output | tvl1_output.jpg | specify output save path }"
"{ c | camera | 0 | enable camera capturing }"
"{ s | use_cpu | false | use cpu or gpu to process the image }"
"{ v | video | | use video as input }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help"))
{
cout << "Usage: pyrlk_optical_flow [options]" << endl;
cout << "Avaible options:" << endl;
cmd.printMessage();
return 0;
}
bool defaultPicturesFail = false;
string fname0 = cmd.get<string>("l");
string fname1 = cmd.get<string>("r");
string vdofile = cmd.get<string>("v");
string outpath = cmd.get<string>("o");
bool useCPU = cmd.get<bool>("s");
bool useCamera = cmd.get<bool>("c");
int inputName = cmd.get<int>("c");
Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE);
Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE);
cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
Mat flow, show_flow;
Mat flow_vec[2];
if (frame0.empty() || frame1.empty())
{
useCamera = true;
defaultPicturesFail = true;
VideoCapture capture( inputName );
if (!capture.isOpened())
{
cout << "Can't load input images" << endl;
return -1;
}
}
if (useCamera)
{
VideoCapture capture;
Mat frame, frameCopy;
Mat frame0Gray, frame1Gray;
Mat ptr0, ptr1;
if(vdofile == "")
capture.open( inputName );
else
capture.open(vdofile.c_str());
int c = inputName ;
if(!capture.isOpened())
{
if(vdofile == "")
cout << "Capture from CAM " << c << " didn't work" << endl;
else
cout << "Capture from file " << vdofile << " failed" <<endl;
if (defaultPicturesFail)
{
return -1;
}
goto nocamera;
}
cout << "In capture ..." << endl;
for(int i = 0;; i++)
{
if( !capture.read(frame) )
break;
if (i == 0)
{
frame.copyTo( frame0 );
cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
}
else
{
if (i%2 == 1)
{
frame.copyTo(frame1);
cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
ptr0 = frame0Gray;
ptr1 = frame1Gray;
}
else
{
frame.copyTo(frame0);
cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
ptr0 = frame1Gray;
ptr1 = frame0Gray;
}
if (useCPU)
{
alg->calc(ptr0, ptr1, flow);
split(flow, flow_vec);
}
else
{
oclMat d_flowx, d_flowy;
d_alg(oclMat(ptr0), oclMat(ptr1), d_flowx, d_flowy);
d_flowx.download(flow_vec[0]);
d_flowy.download(flow_vec[1]);
}
if (i%2 == 1)
frame1.copyTo(frameCopy);
else
frame0.copyTo(frameCopy);
getFlowField(flow_vec[0], flow_vec[1], show_flow);
imshow("PyrLK [Sparse]", show_flow);
}
if( waitKey( 10 ) >= 0 )
goto _cleanup_;
}
waitKey(0);
_cleanup_:
capture.release();
}
else
{
nocamera:
oclMat d_flowx, d_flowy;
for(int i = 0; i <= LOOP_NUM; i ++)
{
cout << "loop" << i << endl;
if (i > 0) workBegin();
if (useCPU)
{
alg->calc(frame0, frame1, flow);
split(flow, flow_vec);
}
else
{
d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
d_flowx.download(flow_vec[0]);
d_flowy.download(flow_vec[1]);
}
if (i > 0 && i <= LOOP_NUM)
workEnd();
if (i == LOOP_NUM)
{
if (useCPU)
cout << "average CPU time (noCamera) : ";
else
cout << "average GPU time (noCamera) : ";
cout << getTime() / LOOP_NUM << " ms" << endl;
getFlowField(flow_vec[0], flow_vec[1], show_flow);
imshow("PyrLK [Sparse]", show_flow);
imwrite(outpath, show_flow);
}
}
}
waitKey();
return 0;
}
Loading…
Cancel
Save