Merge branch 'master' of code.opencv.org:opencv

pull/15/merge
Alexander Mordvintesv 13 years ago
commit 4fb15ae1f0
  1. 2
      3rdparty/libtiff/CMakeLists.txt
  2. 8
      CMakeLists.txt
  3. 6
      cmake/OpenCVDetectCUDA.cmake
  4. 9
      cmake/OpenCVGenConfig.cmake
  5. 2
      cmake/OpenCVModule.cmake
  6. 1275
      data/lbpcascades/lbpcascade_profileface.xml
  7. 1279
      data/lbpcascades/lbpcascade_silverware.xml
  8. 2
      ios/cmake/Modules/Platform/iOS.cmake
  9. 2
      modules/core/include/opencv2/core/core.hpp
  10. 4
      modules/core/include/opencv2/core/gpumat.hpp
  11. 13
      modules/core/src/cuda/matrix_operations.cu
  12. 4
      modules/core/src/gpumat.cpp
  13. 2
      modules/features2d/src/features2d_init.cpp
  14. 4
      modules/features2d/test/test_fast.cpp
  15. 6
      modules/features2d/test/test_nearestneighbors.cpp
  16. 30
      modules/features2d/test/test_rotation_and_scale_invariance.cpp
  17. 40
      modules/gpu/CMakeLists.txt
  18. 18
      modules/gpu/doc/object_detection.rst
  19. 1
      modules/gpu/include/opencv2/gpu/gpu.hpp
  20. 359
      modules/gpu/misc/mark_nvidia.py
  21. 125
      modules/gpu/perf/main.cpp
  22. 376
      modules/gpu/perf/perf_calib3d.cpp
  23. 2673
      modules/gpu/perf/perf_core.cpp
  24. 307
      modules/gpu/perf/perf_features2d.cpp
  25. 425
      modules/gpu/perf/perf_filters.cpp
  26. 1896
      modules/gpu/perf/perf_imgproc.cpp
  27. 182
      modules/gpu/perf/perf_labeling.cpp
  28. 20
      modules/gpu/perf/perf_main.cpp
  29. 188
      modules/gpu/perf/perf_matop.cpp
  30. 136
      modules/gpu/perf/perf_objdetect.cpp
  31. 10
      modules/gpu/perf/perf_precomp.hpp
  32. 77
      modules/gpu/perf/perf_utility.hpp
  33. 1010
      modules/gpu/perf/perf_video.cpp
  34. 43
      modules/gpu/perf/utility.cpp
  35. 45
      modules/gpu/perf/utility.hpp
  36. 136
      modules/gpu/perf_cpu/perf_calib3d.cpp
  37. 1388
      modules/gpu/perf_cpu/perf_core.cpp
  38. 1
      modules/gpu/perf_cpu/perf_cpu_precomp.cpp
  39. 32
      modules/gpu/perf_cpu/perf_cpu_precomp.hpp
  40. 187
      modules/gpu/perf_cpu/perf_features2d.cpp
  41. 283
      modules/gpu/perf_cpu/perf_filters.cpp
  42. 771
      modules/gpu/perf_cpu/perf_imgproc.cpp
  43. 158
      modules/gpu/perf_cpu/perf_labeling.cpp
  44. 20
      modules/gpu/perf_cpu/perf_main.cpp
  45. 124
      modules/gpu/perf_cpu/perf_matop.cpp
  46. 74
      modules/gpu/perf_cpu/perf_objdetect.cpp
  47. 220
      modules/gpu/perf_cpu/perf_utility.cpp
  48. 77
      modules/gpu/perf_cpu/perf_utility.hpp
  49. 466
      modules/gpu/perf_cpu/perf_video.cpp
  50. 50
      modules/gpu/src/brute_force_matcher.cpp
  51. 21
      modules/gpu/src/calib3d.cpp
  52. 8
      modules/gpu/src/cascadeclassifier.cpp
  53. 4
      modules/gpu/src/cuda/ccomponetns.cu
  54. 123
      modules/gpu/src/cuda/hough.cu
  55. 385
      modules/gpu/src/cuda/integral_image.cu
  56. 2
      modules/gpu/src/cuda/lbp.cu
  57. 6
      modules/gpu/src/hog.cpp
  58. 44
      modules/gpu/src/hough.cpp
  59. 100
      modules/gpu/src/imgproc.cpp
  60. 65
      modules/gpu/src/match_template.cpp
  61. 32
      modules/gpu/src/nvidia/NCVHaarObjectDetection.hpp
  62. 18
      modules/gpu/src/nvidia/NPP_staging/NPP_staging.cu
  63. 4
      modules/gpu/src/opencv2/gpu/device/emulation.hpp
  64. 1
      modules/gpu/src/split_merge.cpp
  65. 42
      modules/gpu/src/video_decoder.cpp
  66. 135
      modules/gpu/test/main.cpp
  67. 4
      modules/gpu/test/main_test_nvidia.h
  68. 6
      modules/gpu/test/nvidia/TestHaarCascadeApplication.cpp
  69. 4
      modules/gpu/test/nvidia/main_nvidia.cpp
  70. 8
      modules/gpu/test/test_calib3d.cpp
  71. 2
      modules/gpu/test/test_color.cpp
  72. 6
      modules/gpu/test/test_copy_make_border.cpp
  73. 6
      modules/gpu/test/test_core.cpp
  74. 6
      modules/gpu/test/test_features2d.cpp
  75. 6
      modules/gpu/test/test_filters.cpp
  76. 8
      modules/gpu/test/test_global_motion.cpp
  77. 6
      modules/gpu/test/test_gpumat.cpp
  78. 80
      modules/gpu/test/test_imgproc.cpp
  79. 15
      modules/gpu/test/test_labeling.cpp
  80. 14
      modules/gpu/test/test_nvidia.cpp
  81. 28
      modules/gpu/test/test_objdetect.cpp
  82. 2
      modules/gpu/test/test_precomp.cpp
  83. 3
      modules/gpu/test/test_precomp.hpp
  84. 2
      modules/gpu/test/test_pyramids.cpp
  85. 2
      modules/gpu/test/test_remap.cpp
  86. 3
      modules/gpu/test/test_resize.cpp
  87. 2
      modules/gpu/test/test_threshold.cpp
  88. 6
      modules/gpu/test/test_video.cpp
  89. 2
      modules/gpu/test/test_warp_affine.cpp
  90. 2
      modules/gpu/test/test_warp_perspective.cpp
  91. 72
      modules/gpu/test/utility.cpp
  92. 19
      modules/gpu/test/utility.hpp
  93. 6
      modules/highgui/CMakeLists.txt
  94. 163
      modules/highgui/include/opencv2/highgui/cap_ios.h
  95. 408
      modules/highgui/src/cap_ios_abstract_camera.mm
  96. 165
      modules/highgui/src/cap_ios_photo_camera.mm
  97. 585
      modules/highgui/src/cap_ios_video_camera.mm
  98. 68
      modules/imgproc/perf/perf_remap.cpp
  99. 32
      modules/imgproc/perf/perf_resize.cpp
  100. 1706
      modules/imgproc/src/imgwarp.cpp
  101. Some files were not shown because too many files have changed in this diff Show More

@ -89,7 +89,7 @@ endif(WIN32)
ocv_warnings_disable(CMAKE_C_FLAGS -Wno-unused-but-set-variable -Wmissing-prototypes -Wmissing-declarations -Wundef -Wunused -Wsign-compare
-Wcast-align -Wshadow -Wno-maybe-uninitialized -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wmissing-declarations -Wunused-parameter /wd4100 /wd4244 /wd4706 /wd4127 /wd4701 /wd4018 /wd4267 /wd4306 /wd4305 /wd4312 /wd4311)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wmissing-declarations -Wunused-parameter /wd4100 /wd4244 /wd4706 /wd4127 /wd4701 /wd4018 /wd4267 /wd4306 /wd4305 /wd4312 /wd4311 /wd4703)
if(UNIX AND (CMAKE_COMPILER_IS_GNUCXX OR CV_ICC))
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")

@ -189,11 +189,11 @@ OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX )
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF (MSVC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF (MSVC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" OFF IF (CV_ICC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" ON IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF (CV_ICC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF )
OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF )

@ -88,7 +88,11 @@ if(CUDA_FOUND)
if(APPLE)
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only)
endif()
string(REPLACE "-Wsign-promo" "" CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
# disabled because of multiple warnings during building nvcc auto generated files
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_GCC_REGEX_VERSION VERSION_GREATER "4.6.0")
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wunused-but-set-variable)
endif()
# we remove -ggdb3 flag as it leads to preprocessor errors when compiling CUDA files (CUDA 4.1)
set(CMAKE_CXX_FLAGS_DEBUG_ ${CMAKE_CXX_FLAGS_DEBUG})

@ -64,9 +64,14 @@ macro(ocv_generate_dependencies_map_configcmake suffix configuration)
string(REGEX REPLACE "${CMAKE_SHARED_LIBRARY_SUFFIX}$" "${OPENCV_LINK_LIBRARY_SUFFIX}" __libname "${__libname}")
endif()
string(REPLACE " " "\\ " __mod_deps "${${__ocv_lib}_MODULE_DEPS_${suffix}}")
string(REPLACE " " "\\ " __ext_deps "${${__ocv_lib}_EXTRA_DEPS_${suffix}}")
string(REPLACE "\"" "\\\"" __mod_deps "${__mod_deps}")
string(REPLACE "\"" "\\\"" __ext_deps "${__ext_deps}")
set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_LIBNAME_${suffix} \"${__libname}\")\n")
set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_DEPS_${suffix} ${${__ocv_lib}_MODULE_DEPS_${suffix}})\n")
set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_EXTRA_DEPS_${suffix} ${${__ocv_lib}_EXTRA_DEPS_${suffix}})\n")
set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_DEPS_${suffix} ${__mod_deps})\n")
set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_EXTRA_DEPS_${suffix} ${__ext_deps})\n")
list(APPEND OPENCV_PROCESSED_LIBS ${__ocv_lib})
list(APPEND OPENCV_LIBS_TO_PROCESS ${${__ocv_lib}_MODULE_DEPS_${suffix}})

@ -509,8 +509,6 @@ endmacro()
macro(ocv_add_precompiled_headers the_target)
if("${the_target}" MATCHES "^opencv_test_.*$")
SET(pch_path "test/test_")
elseif("${the_target}" MATCHES "opencv_perf_gpu_cpu")
SET(pch_path "perf_cpu/perf_cpu_")
elseif("${the_target}" MATCHES "^opencv_perf_.*$")
SET(pch_path "perf/perf_")
else()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -42,6 +42,8 @@ set (CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}")
set (CMAKE_C_FLAGS "")
set (CMAKE_CXX_FLAGS "-headerpad_max_install_names -fvisibility=hidden -fvisibility-inlines-hidden")
set (CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -fomit-frame-pointer -ffast-math")
if (HAVE_FLAG_SEARCH_PATHS_FIRST)
set (CMAKE_C_LINK_FLAGS "-Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}")
set (CMAKE_CXX_LINK_FLAGS "-Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}")

@ -440,7 +440,7 @@ template<typename _Tp, int m, int n> class CV_EXPORTS Matx
{
public:
typedef _Tp value_type;
typedef Matx<_Tp, MIN(m, n), 1> diag_type;
typedef Matx<_Tp, (m < n ? m : n), 1> diag_type;
typedef Matx<_Tp, m, n> mat_type;
enum { depth = DataDepth<_Tp>::value, rows = m, cols = n, channels = rows*cols,
type = CV_MAKETYPE(depth, channels) };

@ -72,9 +72,11 @@ namespace cv { namespace gpu
FEATURE_SET_COMPUTE_13 = 13,
FEATURE_SET_COMPUTE_20 = 20,
FEATURE_SET_COMPUTE_21 = 21,
FEATURE_SET_COMPUTE_30 = 30,
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30
};
// Gives information about what GPU archs this OpenCV GPU module was

@ -44,7 +44,7 @@
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/functional.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
template <typename T> struct shift_and_sizeof;
template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
@ -272,7 +272,7 @@ namespace cv { namespace gpu { namespace device
template <typename T, typename D> struct TransformFunctorTraits< Convertor<T, D> > : detail::ConvertTraits< Convertor<T, D> >
{
};
template<typename T, typename D>
void cvt_(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream)
{
@ -282,6 +282,11 @@ namespace cv { namespace gpu { namespace device
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
}
#if defined __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wmissing-declarations"
#endif
void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream)
{
typedef void (*caller_t)(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream);
@ -318,4 +323,8 @@ namespace cv { namespace gpu { namespace device
func(src, dst, alpha, beta, stream);
}
#if defined __clang__
# pragma clang diagnostic pop
#endif
}}} // namespace cv { namespace gpu { namespace device

@ -1199,10 +1199,6 @@ namespace
void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const
{
NppiSize sz;
sz.width = m.cols;
sz.height = m.rows;
if (mask.empty())
{
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)

@ -59,7 +59,7 @@ CV_INIT_ALGORITHM(BriefDescriptorExtractor, "Feature2D.BRIEF",
CV_INIT_ALGORITHM(FastFeatureDetector, "Feature2D.FAST",
obj.info()->addParam(obj, "threshold", obj.threshold);
obj.info()->addParam(obj, "nonmaxSuppression", obj.nonmaxSuppression);
obj.info()->addParam(obj, "type", obj.type, FastFeatureDetector::TYPE_9_16));
obj.info()->addParam(obj, "type", obj.type));
///////////////////////////////////////////////////////////////////////////////////////////////////////////

@ -75,8 +75,8 @@ void CV_FastTest::run( int )
vector<KeyPoint> keypoints1;
vector<KeyPoint> keypoints2;
FAST(gray1, keypoints1, 30, type);
FAST(gray2, keypoints2, 30, type);
FAST(gray1, keypoints1, 30, true, type);
FAST(gray2, keypoints2, 30, true, type);
for(size_t i = 0; i < keypoints1.size(); ++i)
{

@ -200,7 +200,7 @@ int CV_KDTreeTest_CPP::checkGetPoins( const Mat& data )
int CV_KDTreeTest_CPP::checkFindBoxed()
{
vector<float> min( dims, minValue), max(dims, maxValue);
vector<float> min( dims, static_cast<float>(minValue)), max(dims, static_cast<float>(maxValue));
vector<int> indices;
tr->findOrthoRange( min, max, indices );
// TODO check indices
@ -214,8 +214,8 @@ int CV_KDTreeTest_CPP::findNeighbors( Mat& points, Mat& neighbors )
const int emax = 20;
Mat neighbors2( neighbors.size(), CV_32SC1 );
int j;
vector<float> min(points.cols, minValue);
vector<float> max(points.cols, maxValue);
vector<float> min(points.cols, static_cast<float>(minValue));
vector<float> max(points.cols, static_cast<float>(maxValue));
for( int pi = 0; pi < points.rows; pi++ )
{
// 1st way

@ -54,7 +54,7 @@ static
Mat generateHomography(float angle)
{
// angle - rotation around Oz in degrees
float angleRadian = angle * CV_PI / 180.;
float angleRadian = static_cast<float>(angle * CV_PI / 180);
Mat H = Mat::eye(3, 3, CV_32FC1);
H.at<float>(0,0) = H.at<float>(1,1) = std::cos(angleRadian);
H.at<float>(0,1) = -std::sin(angleRadian);
@ -69,8 +69,8 @@ Mat rotateImage(const Mat& srcImage, float angle, Mat& dstImage, Mat& dstMask)
// angle - rotation around Oz in degrees
float diag = std::sqrt(static_cast<float>(srcImage.cols * srcImage.cols + srcImage.rows * srcImage.rows));
Mat LUShift = Mat::eye(3, 3, CV_32FC1); // left up
LUShift.at<float>(0,2) = -srcImage.cols/2;
LUShift.at<float>(1,2) = -srcImage.rows/2;
LUShift.at<float>(0,2) = static_cast<float>(-srcImage.cols/2);
LUShift.at<float>(1,2) = static_cast<float>(-srcImage.rows/2);
Mat RDShift = Mat::eye(3, 3, CV_32FC1); // right down
RDShift.at<float>(0,2) = diag/2;
RDShift.at<float>(1,2) = diag/2;
@ -114,7 +114,7 @@ void scaleKeyPoints(const vector<KeyPoint>& src, vector<KeyPoint>& dst, float sc
static
float calcCirclesIntersectArea(const Point2f& p0, float r0, const Point2f& p1, float r1)
{
float c = norm(p0 - p1), sqr_c = c * c;
float c = static_cast<float>(norm(p0 - p1)), sqr_c = c * c;
float sqr_r0 = r0 * r0;
float sqr_r1 = r1 * r1;
@ -125,7 +125,7 @@ float calcCirclesIntersectArea(const Point2f& p0, float r0, const Point2f& p1, f
float minR = std::min(r0, r1);
float maxR = std::max(r0, r1);
if(c + minR <= maxR)
return CV_PI * minR * minR;
return static_cast<float>(CV_PI * minR * minR);
float cos_halfA0 = (sqr_r0 + sqr_c - sqr_r1) / (2 * r0 * c);
float cos_halfA1 = (sqr_r1 + sqr_c - sqr_r0) / (2 * r1 * c);
@ -133,15 +133,15 @@ float calcCirclesIntersectArea(const Point2f& p0, float r0, const Point2f& p1, f
float A0 = 2 * acos(cos_halfA0);
float A1 = 2 * acos(cos_halfA1);
return 0.5 * sqr_r0 * (A0 - sin(A0)) +
0.5 * sqr_r1 * (A1 - sin(A1));
return 0.5f * sqr_r0 * (A0 - sin(A0)) +
0.5f * sqr_r1 * (A1 - sin(A1));
}
static
float calcIntersectRatio(const Point2f& p0, float r0, const Point2f& p1, float r1)
{
float intersectArea = calcCirclesIntersectArea(p0, r0, p1, r1);
float unionArea = CV_PI * (r0 * r0 + r1 * r1) - intersectArea;
float unionArea = static_cast<float>(CV_PI) * (r0 * r0 + r1 * r1) - intersectArea;
return intersectArea / unionArea;
}
@ -160,7 +160,7 @@ void matchKeyPoints(const vector<KeyPoint>& keypoints0, const Mat& H,
matches.clear();
vector<uchar> usedMask(keypoints1.size(), 0);
for(size_t i0 = 0; i0 < keypoints0.size(); i0++)
for(int i0 = 0; i0 < static_cast<int>(keypoints0.size()); i0++)
{
int nearestPointIndex = -1;
float maxIntersectRatio = 0.f;
@ -176,7 +176,7 @@ void matchKeyPoints(const vector<KeyPoint>& keypoints0, const Mat& H,
if(intersectRatio > maxIntersectRatio)
{
maxIntersectRatio = intersectRatio;
nearestPointIndex = i1;
nearestPointIndex = static_cast<int>(i1);
}
}
@ -222,7 +222,7 @@ protected:
const int maxAngle = 360, angleStep = 15;
for(int angle = 0; angle < maxAngle; angle += angleStep)
{
Mat H = rotateImage(image0, angle, image1, mask1);
Mat H = rotateImage(image0, static_cast<float>(angle), image1, mask1);
vector<KeyPoint> keypoints1;
featureDetector->detect(image1, keypoints1, mask1);
@ -339,10 +339,10 @@ protected:
const int maxAngle = 360, angleStep = 15;
for(int angle = 0; angle < maxAngle; angle += angleStep)
{
Mat H = rotateImage(image0, angle, image1, mask1);
Mat H = rotateImage(image0, static_cast<float>(angle), image1, mask1);
vector<KeyPoint> keypoints1;
rotateKeyPoints(keypoints0, H, angle, keypoints1);
rotateKeyPoints(keypoints0, H, static_cast<float>(angle), keypoints1);
Mat descriptors1;
descriptorExtractor->compute(image1, keypoints1, descriptors1);
@ -457,7 +457,7 @@ protected:
keyPointMatchesCount++;
// Check does this inlier have consistent sizes
const float maxSizeDiff = 0.8;//0.9f; // grad
const float maxSizeDiff = 0.8f;//0.9f; // grad
float size0 = keypoints0[matches[m].trainIdx].size;
float size1 = osiKeypoints1[matches[m].queryIdx].size;
CV_Assert(size0 > 0 && size1 > 0);
@ -545,7 +545,7 @@ protected:
resize(image0, image1, Size(), 1./scale, 1./scale);
vector<KeyPoint> keypoints1;
scaleKeyPoints(keypoints0, keypoints1, 1./scale);
scaleKeyPoints(keypoints0, keypoints1, 1.0f/scale);
Mat descriptors1;
descriptorExtractor->compute(image1, keypoints1, descriptors1);

@ -111,43 +111,3 @@ ocv_add_accuracy_tests(FILES "Include" ${test_hdrs}
FILES "Src" ${test_srcs}
${nvidia})
ocv_add_perf_tests()
set(perf_cpu_path "${CMAKE_CURRENT_SOURCE_DIR}/perf_cpu")
if(BUILD_PERF_TESTS AND EXISTS "${perf_cpu_path}")
# opencv_highgui is required for imread/imwrite
set(perf_deps ${the_module} opencv_ts opencv_highgui opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_nonfree)
ocv_check_dependencies(${perf_deps})
if(OCV_DEPENDENCIES_FOUND)
set(the_target "opencv_perf_gpu_cpu")
ocv_module_include_directories(${perf_deps} "${perf_cpu_path}")
if(NOT OPENCV_PERF_${the_module}_CPU_SOURCES)
file(GLOB perf_srcs "${perf_cpu_path}/*.cpp")
file(GLOB perf_hdrs "${perf_cpu_path}/*.hpp" "${perf_cpu_path}/*.h")
source_group("Src" FILES ${perf_srcs})
source_group("Include" FILES ${perf_hdrs})
set(OPENCV_PERF_${the_module}_CPU_SOURCES ${perf_srcs} ${perf_hdrs})
endif()
add_executable(${the_target} ${OPENCV_PERF_${the_module}_CPU_SOURCES})
target_link_libraries(${the_target} ${OPENCV_MODULE_${the_module}_DEPS} ${perf_deps} ${OPENCV_LINKER_LIBS})
# Additional target properties
set_target_properties(${the_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
RUNTIME_OUTPUT_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}"
)
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_target} PROPERTIES FOLDER "tests performance")
endif()
ocv_add_precompiled_headers(${the_target})
else(OCV_DEPENDENCIES_FOUND)
#TODO: warn about unsatisfied dependencies
endif(OCV_DEPENDENCIES_FOUND)
endif()

@ -204,7 +204,7 @@ gpu::CascadeClassifier_GPU
--------------------------
.. ocv:class:: gpu::CascadeClassifier_GPU
Cascade classifier class used for object detection. ::
Cascade classifier class used for object detection. Supports HAAR and LBP cascades. ::
class CV_EXPORTS CascadeClassifier_GPU
{
@ -219,6 +219,7 @@ Cascade classifier class used for object detection. ::
/* Returns number of detected objects */
int detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor=1.2, int minNeighbors=4, Size minSize=Size());
int detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
/* Finds only the largest object. Special mode if training is required.*/
bool findLargestObject;
@ -233,11 +234,11 @@ Cascade classifier class used for object detection. ::
gpu::CascadeClassifier_GPU::CascadeClassifier_GPU
-----------------------------------------------------
Loads the classifier from a file.
Loads the classifier from a file. Cascade type is detected automatically by constructor parameter.
.. ocv:function:: gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string& filename)
:param filename: Name of the file from which the classifier is loaded. Only the old ``haar`` classifier (trained by the ``haar`` training application) and NVIDIA's ``nvbin`` are supported.
:param filename: Name of the file from which the classifier is loaded. Only the old ``haar`` classifier (trained by the ``haar`` training application) and NVIDIA's ``nvbin`` are supported for HAAR and only new type of OpenCV XML cascade supported for LBP.
@ -255,8 +256,7 @@ Loads the classifier from a file. The previous content is destroyed.
.. ocv:function:: bool gpu::CascadeClassifier_GPU::load(const string& filename)
:param filename: Name of the file from which the classifier is loaded. Only the old ``haar`` classifier (trained by the ``haar`` training application) and NVIDIA's ``nvbin`` are supported.
:param filename: Name of the file from which the classifier is loaded. Only the old ``haar`` classifier (trained by the ``haar`` training application) and NVIDIA's ``nvbin`` are supported for HAAR and only new type of OpenCV XML cascade supported for LBP.
gpu::CascadeClassifier_GPU::release
@ -273,13 +273,17 @@ Detects objects of different sizes in the input image.
.. ocv:function:: int gpu::CascadeClassifier_GPU::detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor=1.2, int minNeighbors=4, Size minSize=Size())
.. ocv:function:: int gpu::CascadeClassifier_GPU::detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4)
:param image: Matrix of type ``CV_8U`` containing an image where objects should be detected.
:param objectsBuf: Buffer to store detected objects (rectangles). If it is empty, it is allocated with the default size. If not empty, the function searches not more than N objects, where ``N = sizeof(objectsBufer's data)/sizeof(cv::Rect)``.
:param scaleFactor: Value to specify how much the image size is reduced at each image scale.
:param maxObjectSize: Maximum possible object size. Objects larger than that are ignored. Used for second signature and supported only for LBP cascades.
:param scaleFactor: Parameter specifying how much the image size is reduced at each image scale.
:param minNeighbors: Value to specify how many neighbours each candidate rectangle has to retain.
:param minNeighbors: Parameter specifying how many neighbors each candidate rectangle should have to retain it.
:param minSize: Minimum possible object size. Objects smaller than that are ignored.

@ -820,6 +820,7 @@ private:
int nLayers_;
};
//! HoughLines
CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, GpuMat& accum, GpuMat& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
CV_EXPORTS void HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf, float rho, float theta);

@ -1,255 +1,234 @@
import sys, re
spaces = '[\s]*'
symbols = '[\s\w\d,.=:|]*'
symbols = '[\s\w\d,.:|]*'
def pattern1(prefix, test):
return re.compile(spaces + 'perf::' + prefix + '/' + test + '::' + '\(' + symbols + '\)' + spaces)
return re.compile(spaces + prefix + '_' + test + '::' + symbols + '::' + '\(' + symbols + '\)' + spaces)
def pattern2(prefix, test, cvtype):
return re.compile(spaces + 'perf::' + prefix + '/' + test + '::' + '\(' + symbols + cvtype + symbols + '\)' + spaces)
def pattern2(prefix, test, param1):
return re.compile(spaces + prefix + '_' + test + '::' + symbols + '::' + '\(' + symbols + param1 + symbols + '\)' + spaces)
def pattern3(prefix, test, cvtype, param1):
return re.compile(spaces + 'perf::' + prefix + '/' + test + '::' + '\(' + symbols + cvtype + symbols + param1 + symbols + '\)' + spaces)
def pattern3(prefix, test, param1, param2):
return re.compile(spaces + prefix + '_' + test + '::' + symbols + '::' + '\(' + symbols + param1 + symbols + param2 + symbols + '\)' + spaces)
def pattern4(prefix, test, cvtype, param1, param2):
return re.compile(spaces + 'perf::' + prefix + '/' + test + '::' + '\(' + symbols + cvtype + symbols + param1 + symbols + param2 + symbols + '\)' + spaces)
def pattern4(prefix, test, param1, param2, param3):
return re.compile(spaces + prefix + '_' + test + '::' + symbols + '::' + '\(' + symbols + param1 + symbols + param2 + symbols + param3 + symbols + '\)' + spaces)
def pattern5(prefix, test, param1, param2, param3, param5):
return re.compile(spaces + prefix + '_' + test + '::' + symbols + '::' + '\(' + symbols + param1 + symbols + param2 + symbols + param3 + symbols + param4 + symbols + '\)' + spaces)
npp_patterns = [
##############################################################
# Core
# Core/Add_Mat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Add_Mat', '8U'),
pattern2('Core', 'Add_Mat', '16U'),
pattern2('Core', 'Add_Mat', '32F'),
# Core/Add_Scalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Add_Scalar', '8U'),
pattern2('Core', 'Add_Scalar', '16U'),
pattern2('Core', 'Add_Scalar', '32F'),
# Core/Subtract_Mat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Subtract_Mat', '8U'),
pattern2('Core', 'Subtract_Mat', '16U'),
pattern2('Core', 'Subtract_Mat', '32F'),
# Core/Subtract_Scalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Subtract_Scalar', '8U'),
pattern2('Core', 'Subtract_Scalar', '16U'),
pattern2('Core', 'Subtract_Scalar', '32F'),
# Core/Multiply_Mat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Multiply_Mat', '8U'),
pattern2('Core', 'Multiply_Mat', '16U'),
pattern2('Core', 'Multiply_Mat', '32F'),
# Core/Multiply_Scalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Multiply_Scalar', '8U'),
pattern2('Core', 'Multiply_Scalar', '16U'),
pattern2('Core', 'Multiply_Scalar', '32F'),
# Core/Divide_Mat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Divide_Mat', '8U'),
pattern2('Core', 'Divide_Mat', '16U'),
pattern2('Core', 'Divide_Mat', '32F'),
# Core/Divide_Scalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'Divide_Scalar', '8U'),
pattern2('Core', 'Divide_Scalar', '16U'),
pattern2('Core', 'Divide_Scalar', '32F'),
# Core/AbsDiff_Mat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'AbsDiff_Mat', '8U'),
pattern2('Core', 'AbsDiff_Mat', '16U'),
pattern2('Core', 'AbsDiff_Mat', '32F'),
# Core/AbsDiff_Scalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'AbsDiff_Scalar', '8U'),
pattern2('Core', 'AbsDiff_Scalar', '16U'),
pattern2('Core', 'AbsDiff_Scalar', '32F'),
# Core/Abs
# Core_AddMat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'AddMat', '8U'),
pattern2('Core', 'AddMat', '16U'),
pattern2('Core', 'AddMat', '32F'),
# Core_AddScalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'AddScalar', '8U'),
pattern2('Core', 'AddScalar', '16U'),
pattern2('Core', 'AddScalar', '32F'),
# Core_SubtractMat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'SubtractMat', '8U'),
pattern2('Core', 'SubtractMat', '16U'),
pattern2('Core', 'SubtractMat', '32F'),
# Core_SubtractScalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'SubtractScalar', '8U'),
pattern2('Core', 'SubtractScalar', '16U'),
pattern2('Core', 'SubtractScalar', '32F'),
# Core_MultiplyMat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'MultiplyMat', '8U'),
pattern2('Core', 'MultiplyMat', '16U'),
pattern2('Core', 'MultiplyMat', '32F'),
# Core_MultiplyScalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'MultiplyScalar', '8U'),
pattern2('Core', 'MultiplyScalar', '16U'),
pattern2('Core', 'MultiplyScalar', '32F'),
# Core_DivideMat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'DivideMat', '8U'),
pattern2('Core', 'DivideMat', '16U'),
pattern2('Core', 'DivideMat', '32F'),
# Core_Divide_Scalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'DivideScalar', '8U'),
pattern2('Core', 'DivideScalar', '16U'),
pattern2('Core', 'DivideScalar', '32F'),
# Core_AbsDiff_Mat (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'AbsDiffMat', '8U'),
pattern2('Core', 'AbsDiffMat', '16U'),
pattern2('Core', 'AbsDiffMat', '32F'),
# Core_AbsDiffScalar (CV_8U | CV_16U | CV_32F)
pattern2('Core', 'AbsDiffScalar', '8U'),
pattern2('Core', 'AbsDiffScalar', '16U'),
pattern2('Core', 'AbsDiffScalar', '32F'),
# Core_Abs
pattern1('Core', 'Abs'),
# Core/Sqr
# Core_Sqr
pattern1('Core', 'Sqr'),
# Core/Sqrt
# Core_Sqrt
pattern1('Core', 'Sqrt'),
# Core/Log
# Core_Log
pattern1('Core', 'Log'),
# Core/Exp
# Core_Exp
pattern1('Core', 'Exp'),
# Core/Bitwise_And_Scalar
pattern1('Core', 'Bitwise_And_Scalar'),
# Core_BitwiseAndScalar
pattern1('Core', 'BitwiseAndScalar'),
# Core/Bitwise_Or_Scalar
pattern1('Core', 'Bitwise_Or_Scalar'),
# Core_BitwiseOrScalar
pattern1('Core', 'BitwiseOrScalar'),
# Core/Bitwise_Xor_Scalar
pattern1('Core', 'Bitwise_Xor_Scalar'),
# Core_BitwiseXorScalar
pattern1('Core', 'BitwiseXorScalar'),
# Core/RShift
# Core_RShift
pattern1('Core', 'RShift'),
# Core/LShift
# Core_LShift
pattern1('Core', 'LShift'),
# Core/Transpose
# Core_Transpose
pattern1('Core', 'Transpose'),
# Core/Flip
# Core_Flip
pattern1('Core', 'Flip'),
# Core/LUT_OneChannel
pattern1('Core', 'LUT_OneChannel'),
# Core_LutOneChannel
pattern1('Core', 'LutOneChannel'),
# Core/LUT_MultiChannel
pattern1('Core', 'LUT_MultiChannel'),
# Core_LutMultiChannel
pattern1('Core', 'LutMultiChannel'),
# Core/Magnitude_Complex
pattern1('Core', 'Magnitude_Complex'),
# Core_MagnitudeComplex
pattern1('Core', 'MagnitudeComplex'),
# Core/Magnitude_Sqr_Complex
pattern1('Core', 'Magnitude_Sqr_Complex'),
# Core_MagnitudeSqrComplex
pattern1('Core', 'MagnitudeSqrComplex'),
# Core/MeanStdDev
# Core_MeanStdDev
pattern1('Core', 'MeanStdDev'),
# Core/NormDiff
# Core_NormDiff
pattern1('Core', 'NormDiff'),
##############################################################
# Filters
# Filters/Blur
# Filters_Blur
pattern1('Filters', 'Blur'),
# Filters/Erode
# Filters_Erode
pattern1('Filters', 'Erode'),
# Filters/Dilate
# Filters_Dilate
pattern1('Filters', 'Dilate'),
# Filters/MorphologyEx
# Filters_MorphologyEx
pattern1('Filters', 'MorphologyEx'),
##############################################################
# ImgProc
# ImgProc/Resize (8UC1 | 8UC4, INTER_NEAREST | INTER_LINEAR)
pattern3('ImgProc', 'Resize', '8UC1', 'INTER_NEAREST'),
pattern3('ImgProc', 'Resize', '8UC4', 'INTER_NEAREST'),
pattern3('ImgProc', 'Resize', '8UC1', 'INTER_LINEAR'),
pattern3('ImgProc', 'Resize', '8UC4', 'INTER_LINEAR'),
# ImgProc/Resize (8UC4, INTER_CUBIC)
pattern3('ImgProc', 'Resize', '8UC4', 'INTER_CUBIC'),
# ImgProc/WarpAffine (8UC1 | 8UC3 | 8UC4 | 32FC1 | 32FC3 | 32FC4, INTER_NEAREST | INTER_LINEAR | INTER_CUBIC, BORDER_CONSTANT)
pattern4('ImgProc', 'WarpAffine', '8UC1', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC1', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC1', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC3', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC3', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC3', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC4', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC4', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8UC4', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC1', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC1', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC1', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC3', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC3', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC3', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC4', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC4', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32FC4', 'INTER_CUBIC', 'BORDER_CONSTANT'),
# ImgProc/WarpPerspective (8UC1 | 8UC3 | 8UC4 | 32FC1 | 32FC3 | 32FC4, INTER_NEAREST | INTER_LINEAR | INTER_CUBIC, BORDER_CONSTANT)
pattern4('ImgProc', 'WarpPerspective', '8UC1', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC1', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC1', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC3', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC3', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC3', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC4', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC4', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8UC4', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC1', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC1', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC1', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC3', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC3', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC3', 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC4', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC4', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32FC4', 'INTER_CUBIC', 'BORDER_CONSTANT'),
# ImgProc/CopyMakeBorder (8UC1 | 8UC4 | 32SC1 | 32FC1, BORDER_CONSTANT)
pattern3('ImgProc', 'CopyMakeBorder', '8UC1', 'BORDER_CONSTANT'),
pattern3('ImgProc', 'CopyMakeBorder', '8UC4', 'BORDER_CONSTANT'),
pattern3('ImgProc', 'CopyMakeBorder', '32SC1', 'BORDER_CONSTANT'),
pattern3('ImgProc', 'CopyMakeBorder', '32FC1', 'BORDER_CONSTANT'),
# ImgProc/Threshold (32F, THRESH_TRUNC)
# ImgProc_Resize (8U, 1 | 4, INTER_NEAREST | INTER_LINEAR)
pattern4('ImgProc', 'Resize', '8U', '1', 'INTER_NEAREST'),
pattern4('ImgProc', 'Resize', '8U', '4', 'INTER_NEAREST'),
pattern4('ImgProc', 'Resize', '8U', '1', 'INTER_LINEAR'),
pattern4('ImgProc', 'Resize', '8U', '4', 'INTER_LINEAR'),
# ImgProc_Resize (8U, 4, INTER_CUBIC)
pattern4('ImgProc', 'Resize', '8U', '4', 'INTER_CUBIC'),
# ImgProc_WarpAffine (8U | 32F, INTER_NEAREST | INTER_LINEAR | INTER_CUBIC, BORDER_CONSTANT)
pattern4('ImgProc', 'WarpAffine', '8U' , 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8U' , 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '8U' , 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32F', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32F', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpAffine', '32F', 'INTER_CUBIC', 'BORDER_CONSTANT'),
# ImgProc_WarpPerspective (8U | 32F, INTER_NEAREST | INTER_LINEAR | INTER_CUBIC, BORDER_CONSTANT)
pattern4('ImgProc', 'WarpPerspective', '8U' , 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8U' , 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '8U' , 'INTER_CUBIC', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32F', 'INTER_NEAREST', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32F', 'INTER_LINEAR', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'WarpPerspective', '32F', 'INTER_CUBIC', 'BORDER_CONSTANT'),
# ImgProc_CopyMakeBorder (8UC1 | 8UC4 | 32SC1 | 32FC1, BORDER_CONSTANT)
pattern4('ImgProc', 'CopyMakeBorder', '8U' , '1', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'CopyMakeBorder', '8U' , '4', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'CopyMakeBorder', '32S', '1', 'BORDER_CONSTANT'),
pattern4('ImgProc', 'CopyMakeBorder', '32F', '1', 'BORDER_CONSTANT'),
# ImgProc_Threshold (32F, THRESH_TRUNC)
pattern3('ImgProc', 'Threshold', '32F', 'THRESH_TRUNC'),
# ImgProc/Integral_Sqr
pattern1('ImgProc', 'Integral_Sqr'),
# ImgProc_IntegralSqr
pattern1('ImgProc', 'IntegralSqr'),
# ImgProc/HistEven_OneChannel
pattern1('ImgProc', 'HistEven_OneChannel'),
# ImgProc_HistEven_OneChannel
pattern1('ImgProc', 'HistEvenOneChannel'),
# ImgProc/HistEven_FourChannel
pattern1('ImgProc', 'HistEven_FourChannel'),
# ImgProc_HistEven_FourChannel
pattern1('ImgProc', 'HistEvenFourChannel'),
# ImgProc/Rotate
# ImgProc_Rotate
pattern1('ImgProc', 'Rotate'),
# ImgProc/SwapChannels
# ImgProc_SwapChannels
pattern1('ImgProc', 'SwapChannels'),
# ImgProc/AlphaComp
# ImgProc_AlphaComp
pattern1('ImgProc', 'AlphaComp'),
# ImgProc/ImagePyramid_build
pattern1('ImgProc', 'ImagePyramid_build'),
# ImgProc_ImagePyramidBuild
pattern1('ImgProc', 'ImagePyramidBuild'),
# ImgProc_ImagePyramid_getLayer
pattern1('ImgProc', 'ImagePyramidGetLayer'),
# ImgProc/ImagePyramid_getLayer
pattern1('ImgProc', 'ImagePyramid_getLayer'),
##############################################################
# MatOp
# MatOp/SetTo (8UC4 | 16UC1 | 16UC4 | 32FC1 | 32FC4)
pattern2('MatOp', 'SetTo', '8UC4'),
pattern2('MatOp', 'SetTo', '16UC1'),
pattern2('MatOp', 'SetTo', '16UC4'),
pattern2('MatOp', 'SetTo', '32FC1'),
pattern2('MatOp', 'SetTo', '32FC4'),
# MatOp/SetToMasked (8UC4 | 16UC1 | 16UC4 | 32FC1 | 32FC4)
pattern2('MatOp', 'SetToMasked', '8UC4'),
pattern2('MatOp', 'SetToMasked', '16UC1'),
pattern2('MatOp', 'SetToMasked', '16UC4'),
pattern2('MatOp', 'SetToMasked', '32FC1'),
pattern2('MatOp', 'SetToMasked', '32FC4'),
# MatOp/CopyToMasked (8UC1 | 8UC3 |8UC4 | 16UC1 | 16UC3 | 16UC4 | 32FC1 | 32FC3 | 32FC4)
pattern2('MatOp', 'CopyToMasked', '8UC1'),
pattern2('MatOp', 'CopyToMasked', '8UC3'),
pattern2('MatOp', 'CopyToMasked', '8UC4'),
pattern2('MatOp', 'CopyToMasked', '16UC1'),
pattern2('MatOp', 'CopyToMasked', '16UC3'),
pattern2('MatOp', 'CopyToMasked', '16UC4'),
pattern2('MatOp', 'CopyToMasked', '32FC1'),
pattern2('MatOp', 'CopyToMasked', '32FC3'),
pattern2('MatOp', 'CopyToMasked', '32FC4'),
# MatOp_SetTo (8UC4 | 16UC1 | 16UC4 | 32FC1 | 32FC4)
pattern3('MatOp', 'SetTo', '8U' , '4'),
pattern3('MatOp', 'SetTo', '16U', '1'),
pattern3('MatOp', 'SetTo', '16U', '4'),
pattern3('MatOp', 'SetTo', '32F', '1'),
pattern3('MatOp', 'SetTo', '32F', '4'),
# MatOp_SetToMasked (8UC4 | 16UC1 | 16UC4 | 32FC1 | 32FC4)
pattern3('MatOp', 'SetToMasked', '8U' , '4'),
pattern3('MatOp', 'SetToMasked', '16U', '1'),
pattern3('MatOp', 'SetToMasked', '16U', '4'),
pattern3('MatOp', 'SetToMasked', '32F', '1'),
pattern3('MatOp', 'SetToMasked', '32F', '4'),
# MatOp_CopyToMasked (8UC1 | 8UC3 |8UC4 | 16UC1 | 16UC3 | 16UC4 | 32FC1 | 32FC3 | 32FC4)
pattern3('MatOp', 'CopyToMasked', '8U' , '1'),
pattern3('MatOp', 'CopyToMasked', '8U' , '3'),
pattern3('MatOp', 'CopyToMasked', '8U' , '4'),
pattern3('MatOp', 'CopyToMasked', '16U', '1'),
pattern3('MatOp', 'CopyToMasked', '16U', '3'),
pattern3('MatOp', 'CopyToMasked', '16U', '4'),
pattern3('MatOp', 'CopyToMasked', '32F', '1'),
pattern3('MatOp', 'CopyToMasked', '32F', '3'),
pattern3('MatOp', 'CopyToMasked', '32F', '4'),
]
cublasPattern = pattern1('Core', 'GEMM')
@ -260,7 +239,7 @@ if __name__ == "__main__":
inputFile = open(sys.argv[1], 'r')
lines = inputFile.readlines()
inputFile.close()
for i in range(len(lines)):
if cublasPattern.match(lines[i]):

@ -0,0 +1,125 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;
using namespace cvtest;
using namespace testing;
void printOsInfo()
{
#if defined _WIN32
# if defined _WIN64
cout << "OS: Windows x64 \n" << endl;
# else
cout << "OS: Windows x32 \n" << endl;
# endif
#elif defined linux
# if defined _LP64
cout << "OS: Linux x64 \n" << endl;
# else
cout << "OS: Linux x32 \n" << endl;
# endif
#elif defined __APPLE__
# if defined _LP64
cout << "OS: Apple x64 \n" << endl;
# else
cout << "OS: Apple x32 \n" << endl;
# endif
#endif
}
void printCudaInfo()
{
#ifndef HAVE_CUDA
cout << "OpenCV was built without CUDA support \n" << endl;
#else
int driver;
cudaDriverGetVersion(&driver);
cout << "CUDA Driver version: " << driver << '\n';
cout << "CUDA Runtime version: " << CUDART_VERSION << '\n';
cout << endl;
cout << "GPU module was compiled for the following GPU archs:" << endl;
cout << " BIN: " << CUDA_ARCH_BIN << '\n';
cout << " PTX: " << CUDA_ARCH_PTX << '\n';
cout << endl;
int deviceCount = getCudaEnabledDeviceCount();
cout << "CUDA device count: " << deviceCount << '\n';
cout << endl;
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
cout << "Device [" << i << "] \n";
cout << "\t Name: " << info.name() << '\n';
cout << "\t Compute capability: " << info.majorVersion() << '.' << info.minorVersion()<< '\n';
cout << "\t Multi Processor Count: " << info.multiProcessorCount() << '\n';
cout << "\t Total memory: " << static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0) << " Mb \n";
cout << "\t Free memory: " << static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0) << " Mb \n";
if (!info.isCompatible())
cout << "\t !!! This device is NOT compatible with current GPU module build \n";
cout << endl;
}
#endif
}
int main(int argc, char** argv)
{
CommandLineParser cmd(argc, (const char**) argv,
"{ print_info_only | print_info_only | false | Print information about system and exit }"
"{ device | device | 0 | Device on which tests will be executed }"
"{ cpu | cpu | false | Run tests on cpu }"
);
printOsInfo();
printCudaInfo();
if (cmd.get<bool>("print_info_only"))
return 0;
int device = cmd.get<int>("device");
bool cpu = cmd.get<bool>("cpu");
#ifndef HAVE_CUDA
cpu = true;
#endif
if (cpu)
{
runOnGpu = false;
cout << "Run tests on CPU \n" << endl;
}
else
{
runOnGpu = true;
if (device < 0 || device >= getCudaEnabledDeviceCount())
{
cerr << "Incorrect device index - " << device << endl;
return -1;
}
DeviceInfo info(device);
if (!info.isCompatible())
{
cerr << "Device " << device << " [" << info.name() << "] is NOT compatible with current GPU module build" << endl;
return -1;
}
setDevice(device);
cout << "Run tests on device " << device << " [" << info.name() << "] \n" << endl;
}
InitGoogleTest(&argc, argv);
perf::TestBase::Init(argc, argv);
return RUN_ALL_TESTS();
}

@ -1,219 +1,263 @@
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// StereoBM
GPU_PERF_TEST_1(StereoBM, cv::gpu::DeviceInfo)
typedef pair<string, string> pair_string;
DEF_PARAM_TEST_1(ImagePair, pair_string);
PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(make_pair<string, string>("gpu/perf/aloe.jpg", "gpu/perf/aloeR.jpg")))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
declare.time(5.0);
cv::Mat img_l_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_l_host.empty());
const cv::Mat imgLeft = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgLeft.empty());
cv::Mat img_r_host = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_r_host.empty());
const cv::Mat imgRight = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgRight.empty());
cv::gpu::StereoBM_GPU bm(0, 256);
cv::gpu::GpuMat img_l(img_l_host);
cv::gpu::GpuMat img_r(img_r_host);
cv::gpu::GpuMat dst;
const int preset = 0;
const int ndisp = 256;
bm(img_l, img_r, dst);
if (runOnGpu)
{
cv::gpu::StereoBM_GPU d_bm(preset, ndisp);
declare.time(5.0);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
d_bm(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE()
TEST_CYCLE()
{
d_bm(d_imgLeft, d_imgRight, d_dst);
}
}
else
{
bm(img_l, img_r, dst);
cv::StereoBM bm(preset, ndisp);
cv::Mat dst;
bm(imgLeft, imgRight, dst);
TEST_CYCLE()
{
bm(imgLeft, imgRight, dst);
}
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, StereoBM, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// StereoBeliefPropagation
GPU_PERF_TEST_1(StereoBeliefPropagation, cv::gpu::DeviceInfo)
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation, Values(make_pair<string, string>("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
declare.time(10.0);
cv::Mat img_l_host = readImage("gpu/stereobp/aloe-L.png");
ASSERT_FALSE(img_l_host.empty());
const cv::Mat imgLeft = readImage(GetParam().first);
ASSERT_FALSE(imgLeft.empty());
cv::Mat img_r_host = readImage("gpu/stereobp/aloe-R.png");
ASSERT_FALSE(img_r_host.empty());
const cv::Mat imgRight = readImage(GetParam().second);
ASSERT_FALSE(imgRight.empty());
cv::gpu::StereoBeliefPropagation bp(64);
cv::gpu::GpuMat img_l(img_l_host);
cv::gpu::GpuMat img_r(img_r_host);
cv::gpu::GpuMat dst;
const int ndisp = 64;
bp(img_l, img_r, dst);
if (runOnGpu)
{
cv::gpu::StereoBeliefPropagation d_bp(ndisp);
declare.time(10.0);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
TEST_CYCLE()
d_bp(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE()
{
d_bp(d_imgLeft, d_imgRight, d_dst);
}
}
else
{
bp(img_l, img_r, dst);
FAIL();
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, StereoBeliefPropagation, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// StereoConstantSpaceBP
GPU_PERF_TEST_1(StereoConstantSpaceBP, cv::gpu::DeviceInfo)
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP, Values(make_pair<string, string>("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
declare.time(10.0);
cv::Mat img_l_host = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_l_host.empty());
const cv::Mat imgLeft = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgLeft.empty());
cv::Mat img_r_host = readImage("gpu/stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_r_host.empty());
const cv::Mat imgRight = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgRight.empty());
cv::gpu::StereoConstantSpaceBP csbp(128);
cv::gpu::GpuMat img_l(img_l_host);
cv::gpu::GpuMat img_r(img_r_host);
cv::gpu::GpuMat dst;
const int ndisp = 128;
csbp(img_l, img_r, dst);
if (runOnGpu)
{
cv::gpu::StereoConstantSpaceBP d_csbp(ndisp);
declare.time(10.0);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
d_csbp(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE()
TEST_CYCLE()
{
d_csbp(d_imgLeft, d_imgRight, d_dst);
}
}
else
{
csbp(img_l, img_r, dst);
FAIL();
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, StereoConstantSpaceBP, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// DisparityBilateralFilter
GPU_PERF_TEST_1(DisparityBilateralFilter, cv::gpu::DeviceInfo)
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(make_pair<string, string>("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
const cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
const cv::Mat disp = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(disp.empty());
cv::Mat img_host = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
const int ndisp = 128;
cv::Mat disp_host = readImage("gpu/stereobm/aloe-disp.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(disp_host.empty());
if (runOnGpu)
{
cv::gpu::DisparityBilateralFilter d_filter(ndisp);
cv::gpu::DisparityBilateralFilter f(128);
cv::gpu::GpuMat img(img_host);
cv::gpu::GpuMat disp(disp_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_disp(disp);
cv::gpu::GpuMat d_dst;
f(disp, img, dst);
d_filter(d_disp, d_img, d_dst);
TEST_CYCLE()
TEST_CYCLE()
{
d_filter(d_disp, d_img, d_dst);
}
}
else
{
f(disp, img, dst);
FAIL();
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, DisparityBilateralFilter, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// TransformPoints
IMPLEMENT_PARAM_CLASS(Count, int)
DEF_PARAM_TEST_1(Count, int);
GPU_PERF_TEST(TransformPoints, cv::gpu::DeviceInfo, Count)
PERF_TEST_P(Count, Calib3D_TransformPoints, Values(5000, 10000, 20000))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
const int count = GetParam();
int count = GET_PARAM(1);
cv::Mat src(1, count, CV_32FC3);
fillRandom(src, -100, 100);
cv::Mat src_host(1, count, CV_32FC3);
fill(src_host, -100, 100);
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::gpu::GpuMat src(src_host);
cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::transformPoints(src, rvec, tvec, dst);
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
}
}
else
{
cv::gpu::transformPoints(src, rvec, tvec, dst);
FAIL();
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, TransformPoints, testing::Combine(
ALL_DEVICES,
testing::Values<Count>(5000, 10000, 20000)));
//////////////////////////////////////////////////////////////////////
// ProjectPoints
GPU_PERF_TEST(ProjectPoints, cv::gpu::DeviceInfo, Count)
PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
const int count = GetParam();
int count = GET_PARAM(1);
cv::Mat src(1, count, CV_32FC3);
fillRandom(src, -100, 100);
cv::Mat src_host(1, count, CV_32FC3);
fill(src_host, -100, 100);
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
cv::gpu::GpuMat src(src_host);
cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::projectPoints(src, rvec, tvec, camera_mat, cv::Mat(), dst);
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
}
}
else
{
cv::gpu::projectPoints(src, rvec, tvec, camera_mat, cv::Mat(), dst);
cv::Mat dst;
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
TEST_CYCLE()
{
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
}
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, testing::Combine(
ALL_DEVICES,
testing::Values<Count>(5000, 10000, 20000)));
//////////////////////////////////////////////////////////////////////
// SolvePnPRansac
GPU_PERF_TEST(SolvePnPRansac, cv::gpu::DeviceInfo, Count)
PERF_TEST_P(Count, Calib3D_SolvePnPRansac, Values(5000, 10000, 20000))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(10.0);
int count = GET_PARAM(1);
const int count = GetParam();
cv::Mat object(1, count, CV_32FC3);
fill(object, -100, 100);
fillRandom(object, -100, 100);
cv::Mat camera_mat(3, 3, CV_32FC1);
fill(camera_mat, 0.5, 1);
fillRandom(camera_mat, 0.5, 1);
camera_mat.at<float>(0, 1) = 0.f;
camera_mat.at<float>(1, 0) = 0.f;
camera_mat.at<float>(2, 0) = 0.f;
camera_mat.at<float>(2, 1) = 0.f;
cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
std::vector<cv::Point2f> image_vec;
cv::Mat rvec_gold(1, 3, CV_32FC1);
fill(rvec_gold, 0, 1);
fillRandom(rvec_gold, 0, 1);
cv::Mat tvec_gold(1, 3, CV_32FC1);
fill(tvec_gold, 0, 1);
fillRandom(tvec_gold, 0, 1);
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
@ -221,82 +265,92 @@ GPU_PERF_TEST(SolvePnPRansac, cv::gpu::DeviceInfo, Count)
cv::Mat rvec;
cv::Mat tvec;
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
declare.time(3.0);
TEST_CYCLE()
if (runOnGpu)
{
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
TEST_CYCLE()
{
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
}
}
}
else
{
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, testing::Combine(
ALL_DEVICES,
testing::Values<Count>(5000, 10000, 20000)));
TEST_CYCLE()
{
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
}
}
}
//////////////////////////////////////////////////////////////////////
// ReprojectImageTo3D
GPU_PERF_TEST(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, MatDepth)
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
cv::Size size = GET_PARAM(1);
int depth = GET_PARAM(2);
cv::Mat src_host(size, depth);
fill(src_host, 5.0, 30.0);
cv::Mat src(size, depth);
fillRandom(src, 5.0, 30.0);
cv::Mat Q(4, 4, CV_32FC1);
fill(Q, 0.1, 1.0);
fillRandom(Q, 0.1, 1.0);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::reprojectImageTo3D(src, dst, Q);
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
}
}
else
{
cv::gpu::reprojectImageTo3D(src, dst, Q);
cv::Mat dst;
cv::reprojectImageTo3D(src, dst, Q);
TEST_CYCLE()
{
cv::reprojectImageTo3D(src, dst, Q);
}
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, ReprojectImageTo3D, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values<MatDepth>(CV_8U, CV_16S)));
//////////////////////////////////////////////////////////////////////
// DrawColorDisp
GPU_PERF_TEST(DrawColorDisp, cv::gpu::DeviceInfo, cv::Size, MatDepth)
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
cv::Mat src_host(size, type);
fill(src_host, 0, 255);
cv::Mat src(size, type);
fillRandom(src, 0, 255);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::drawColorDisp(src, dst, 255);
cv::gpu::drawColorDisp(d_src, d_dst, 255);
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::drawColorDisp(d_src, d_dst, 255);
}
}
else
{
cv::gpu::drawColorDisp(src, dst, 255);
FAIL();
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, DrawColorDisp, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16S))));
#endif
} // namespace

File diff suppressed because it is too large Load Diff

@ -1,209 +1,278 @@
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// SURF
GPU_PERF_TEST_1(SURF, cv::gpu::DeviceInfo)
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
DEF_PARAM_TEST_1(Image, string);
cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
PERF_TEST_P(Image, Features2D_SURF, Values<string>("gpu/perf/aloe.jpg"))
{
declare.time(50.0);
cv::gpu::SURF_GPU surf;
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::gpu::GpuMat img(img_host);
cv::gpu::GpuMat keypoints, descriptors;
if (runOnGpu)
{
cv::gpu::SURF_GPU d_surf;
surf(img, cv::gpu::GpuMat(), keypoints, descriptors);
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints, d_descriptors;
declare.time(2.0);
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE()
TEST_CYCLE()
{
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
}
}
else
{
surf(img, cv::gpu::GpuMat(), keypoints, descriptors);
cv::SURF surf;
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
surf(img, cv::noArray(), keypoints, descriptors);
TEST_CYCLE()
{
keypoints.clear();
surf(img, cv::noArray(), keypoints, descriptors);
}
}
}
INSTANTIATE_TEST_CASE_P(Features2D, SURF, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// FAST
GPU_PERF_TEST_1(FAST, cv::gpu::DeviceInfo)
PERF_TEST_P(Image, Features2D_FAST, Values<string>("gpu/perf/aloe.jpg"))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
cv::gpu::FAST_GPU fast(20);
if (runOnGpu)
{
cv::gpu::FAST_GPU d_fast(20);
cv::gpu::GpuMat img(img_host);
cv::gpu::GpuMat keypoints;
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints;
fast(img, cv::gpu::GpuMat(), keypoints);
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
TEST_CYCLE()
TEST_CYCLE()
{
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
}
}
else
{
fast(img, cv::gpu::GpuMat(), keypoints);
std::vector<cv::KeyPoint> keypoints;
cv::FAST(img, keypoints, 20);
TEST_CYCLE()
{
keypoints.clear();
cv::FAST(img, keypoints, 20);
}
}
}
INSTANTIATE_TEST_CASE_P(Features2D, FAST, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// ORB
GPU_PERF_TEST_1(ORB, cv::gpu::DeviceInfo)
PERF_TEST_P(Image, Features2D_ORB, Values<string>("gpu/perf/aloe.jpg"))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
if (runOnGpu)
{
cv::gpu::ORB_GPU d_orb(4000);
cv::gpu::ORB_GPU orb(4000);
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints, d_descriptors;
cv::gpu::GpuMat img(img_host);
cv::gpu::GpuMat keypoints, descriptors;
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE()
TEST_CYCLE()
{
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
}
}
else
{
orb(img, cv::gpu::GpuMat(), keypoints, descriptors);
cv::ORB orb(4000);
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
orb(img, cv::noArray(), keypoints, descriptors);
TEST_CYCLE()
{
keypoints.clear();
orb(img, cv::noArray(), keypoints, descriptors);
}
}
}
INSTANTIATE_TEST_CASE_P(Features2D, ORB, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// BruteForceMatcher_match
// BFMatch
IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
DEF_PARAM_TEST(DescSize_Norm, int, NormType);
GPU_PERF_TEST(BruteForceMatcher_match, cv::gpu::DeviceInfo, DescriptorSize, NormType)
PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
int desc_size = GET_PARAM(1);
int normType = GET_PARAM(2);
int desc_size = GET_PARAM(0);
int normType = GET_PARAM(1);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query_host(3000, desc_size, type);
fill(query_host, 0.0, 10.0);
cv::Mat train_host(3000, desc_size, type);
fill(train_host, 0.0, 10.0);
cv::Mat query(3000, desc_size, type);
fillRandom(query);
cv::gpu::BFMatcher_GPU matcher(normType);
cv::Mat train(3000, desc_size, type);
fillRandom(train);
cv::gpu::GpuMat query(query_host);
cv::gpu::GpuMat train(train_host);
cv::gpu::GpuMat trainIdx, distance;
if (runOnGpu)
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
matcher.matchSingle(query, train, trainIdx, distance);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_distance;
declare.time(3.0);
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
TEST_CYCLE()
TEST_CYCLE()
{
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
}
}
else
{
matcher.matchSingle(query, train, trainIdx, distance);
cv::BFMatcher matcher(normType);
std::vector<cv::DMatch> matches;
matcher.match(query, train, matches);
TEST_CYCLE()
{
matcher.match(query, train, matches);
}
}
}
INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_match, testing::Combine(
ALL_DEVICES,
testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
//////////////////////////////////////////////////////////////////////
// BruteForceMatcher_knnMatch
// BFKnnMatch
IMPLEMENT_PARAM_CLASS(K, int)
DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType);
GPU_PERF_TEST(BruteForceMatcher_knnMatch, cv::gpu::DeviceInfo, DescriptorSize, K, NormType)
PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine(
Values(64, 128, 256),
Values(2, 3),
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(30.0);
int desc_size = GET_PARAM(1);
int k = GET_PARAM(2);
int normType = GET_PARAM(3);
int desc_size = GET_PARAM(0);
int k = GET_PARAM(1);
int normType = GET_PARAM(2);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query_host(3000, desc_size, type);
fill(query_host, 0.0, 10.0);
cv::Mat train_host(3000, desc_size, type);
fill(train_host, 0.0, 10.0);
cv::Mat query(3000, desc_size, type);
fillRandom(query);
cv::gpu::BFMatcher_GPU matcher(normType);
cv::Mat train(3000, desc_size, type);
fillRandom(train);
cv::gpu::GpuMat query(query_host);
cv::gpu::GpuMat train(train_host);
cv::gpu::GpuMat trainIdx, distance, allDist;
if (runOnGpu)
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
matcher.knnMatchSingle(query, train, trainIdx, distance, allDist, k);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_distance, d_allDist;
declare.time(3.0);
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
TEST_CYCLE()
TEST_CYCLE()
{
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
}
}
else
{
matcher.knnMatchSingle(query, train, trainIdx, distance, allDist, k);
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.knnMatch(query, train, matches, k);
TEST_CYCLE()
{
matcher.knnMatch(query, train, matches, k);
}
}
}
INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_knnMatch, testing::Combine(
ALL_DEVICES,
testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
testing::Values(K(2), K(3)),
testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
//////////////////////////////////////////////////////////////////////
// BruteForceMatcher_radiusMatch
// BFRadiusMatch
GPU_PERF_TEST(BruteForceMatcher_radiusMatch, cv::gpu::DeviceInfo, DescriptorSize, NormType)
PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(30.0);
int desc_size = GET_PARAM(1);
int normType = GET_PARAM(2);
int desc_size = GET_PARAM(0);
int normType = GET_PARAM(1);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query_host(3000, desc_size, type);
fill(query_host, 0.0, 1.0);
cv::Mat train_host(3000, desc_size, type);
fill(train_host, 0.0, 1.0);
cv::Mat query(3000, desc_size, type);
fillRandom(query, 0.0, 1.0);
cv::gpu::BFMatcher_GPU matcher(normType);
cv::Mat train(3000, desc_size, type);
fillRandom(train, 0.0, 1.0);
cv::gpu::GpuMat query(query_host);
cv::gpu::GpuMat train(train_host);
cv::gpu::GpuMat trainIdx, nMatches, distance;
if (runOnGpu)
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
matcher.radiusMatchSingle(query, train, trainIdx, distance, nMatches, 2.0);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_nMatches, d_distance;
declare.time(3.0);
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
TEST_CYCLE()
TEST_CYCLE()
{
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
}
}
else
{
matcher.radiusMatchSingle(query, train, trainIdx, distance, nMatches, 2.0);
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(query, train, matches, 2.0);
TEST_CYCLE()
{
matcher.radiusMatch(query, train, matches, 2.0);
}
}
}
INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_radiusMatch, testing::Combine(
ALL_DEVICES,
testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
#endif
} // namespace

@ -1,308 +1,379 @@
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// Blur
IMPLEMENT_PARAM_CLASS(KernelSize, int)
DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int);
GPU_PERF_TEST(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), Values(3, 5, 7)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::blur(src, dst, cv::Size(ksize, ksize));
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
}
}
else
{
cv::gpu::blur(src, dst, cv::Size(ksize, ksize));
cv::Mat dst;
cv::blur(src, dst, cv::Size(ksize, ksize));
TEST_CYCLE()
{
cv::blur(src, dst, cv::Size(ksize, ksize));
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Blur, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7))));
//////////////////////////////////////////////////////////////////////
// Sobel
GPU_PERF_TEST(Sobel, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat buf;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::Sobel(src, dst, -1, 1, 1, buf, ksize);
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
}
}
else
{
cv::gpu::Sobel(src, dst, -1, 1, 1, buf, ksize);
cv::Mat dst;
cv::Sobel(src, dst, -1, 1, 1, ksize);
TEST_CYCLE()
{
cv::Sobel(src, dst, -1, 1, 1, ksize);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Sobel, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7), KernelSize(9), KernelSize(11), KernelSize(13), KernelSize(15))));
//////////////////////////////////////////////////////////////////////
// Scharr
GPU_PERF_TEST(Scharr, cv::gpu::DeviceInfo, cv::Size, MatType)
PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat buf;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::Scharr(src, dst, -1, 1, 0, buf);
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
}
}
else
{
cv::gpu::Scharr(src, dst, -1, 1, 0, buf);
cv::Mat dst;
cv::Scharr(src, dst, -1, 1, 0);
TEST_CYCLE()
{
cv::Scharr(src, dst, -1, 1, 0);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Scharr, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1))));
//////////////////////////////////////////////////////////////////////
// GaussianBlur
GPU_PERF_TEST(GaussianBlur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat buf;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::GaussianBlur(src, dst, cv::Size(ksize, ksize), buf, 0.5);
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
}
}
else
{
cv::gpu::GaussianBlur(src, dst, cv::Size(ksize, ksize), buf, 0.5);
cv::Mat dst;
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
TEST_CYCLE()
{
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, GaussianBlur, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7), KernelSize(9), KernelSize(11), KernelSize(13), KernelSize(15))));
//////////////////////////////////////////////////////////////////////
// Laplacian
GPU_PERF_TEST(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::Laplacian(src, dst, -1, ksize);
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
}
}
else
{
cv::gpu::Laplacian(src, dst, -1, ksize);
cv::Mat dst;
cv::Laplacian(src, dst, -1, ksize);
TEST_CYCLE()
{
cv::Laplacian(src, dst, -1, ksize);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Laplacian, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
testing::Values(KernelSize(1), KernelSize(3))));
//////////////////////////////////////////////////////////////////////
// Erode
GPU_PERF_TEST(Erode, cv::gpu::DeviceInfo, cv::Size, MatType)
PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat buf;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::erode(src, dst, ker, buf);
cv::gpu::erode(d_src, d_dst, ker, d_buf);
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::erode(d_src, d_dst, ker, d_buf);
}
}
else
{
cv::gpu::erode(src, dst, ker, buf);
cv::Mat dst;
cv::erode(src, dst, ker);
TEST_CYCLE()
{
cv::erode(src, dst, ker);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Erode, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
//////////////////////////////////////////////////////////////////////
// Dilate
GPU_PERF_TEST(Dilate, cv::gpu::DeviceInfo, cv::Size, MatType)
PERF_TEST_P(Sz_Type, Filters_Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat buf;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::dilate(src, dst, ker, buf);
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
}
}
else
{
cv::gpu::dilate(src, dst, ker, buf);
cv::Mat dst;
cv::dilate(src, dst, ker);
TEST_CYCLE()
{
cv::dilate(src, dst, ker);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Dilate, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
//////////////////////////////////////////////////////////////////////
// MorphologyEx
CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
#define ALL_MORPH_OPS testing::Values(MorphOp(cv::MORPH_OPEN), MorphOp(cv::MORPH_CLOSE), MorphOp(cv::MORPH_GRADIENT), MorphOp(cv::MORPH_TOPHAT), MorphOp(cv::MORPH_BLACKHAT))
#define ALL_MORPH_OPS ValuesIn(MorphOp::all())
GPU_PERF_TEST(MorphologyEx, cv::gpu::DeviceInfo, cv::Size, MatType, MorphOp)
DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, MorphOp);
PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), ALL_MORPH_OPS))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int morphOp = GET_PARAM(3);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int morphOp = GET_PARAM(2);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
cv::gpu::GpuMat buf1;
cv::gpu::GpuMat buf2;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf1;
cv::gpu::GpuMat d_buf2;
cv::gpu::morphologyEx(src, dst, morphOp, ker, buf1, buf2);
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
}
}
else
{
cv::gpu::morphologyEx(src, dst, morphOp, ker, buf1, buf2);
cv::Mat dst;
cv::morphologyEx(src, dst, morphOp, ker);
TEST_CYCLE()
{
cv::morphologyEx(src, dst, morphOp, ker);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, MorphologyEx, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
ALL_MORPH_OPS));
//////////////////////////////////////////////////////////////////////
// Filter2D
GPU_PERF_TEST(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
PERF_TEST_P(Sz_Type_KernelSz, Filters_Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
declare.time(20.0);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src_host(size, type);
fill(src_host, 0.0, 255.0);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat kernel(ksize, ksize, CV_32FC1);
fill(kernel, 0.0, 1.0);
fillRandom(kernel, 0.0, 1.0);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::filter2D(src, dst, -1, kernel);
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
TEST_CYCLE()
TEST_CYCLE()
{
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
}
}
else
{
cv::gpu::filter2D(src, dst, -1, kernel);
cv::Mat dst;
cv::filter2D(src, dst, -1, kernel);
TEST_CYCLE()
{
cv::filter2D(src, dst, -1, kernel);
}
}
}
INSTANTIATE_TEST_CASE_P(Filters, Filter2D, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7), KernelSize(9), KernelSize(11), KernelSize(13), KernelSize(15))));
#endif
} // namespace

File diff suppressed because it is too large Load Diff

@ -1,75 +1,141 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//M*/
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace testing;
namespace {
DEF_PARAM_TEST_1(Image, string);
GPU_PERF_TEST(ConnectedComponents, cv::gpu::DeviceInfo, cv::Size)
struct GreedyLabeling
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
struct dot
{
int x;
int y;
static dot make(int i, int j)
{
dot d; d.x = i; d.y = j;
return d;
}
};
struct InInterval
{
InInterval(const int& _lo, const int& _hi) : lo(-_lo), hi(_hi) {};
const int lo, hi;
bool operator() (const unsigned char a, const unsigned char b) const
{
int d = a - b;
return lo <= d && d <= hi;
}
private:
InInterval& operator=(const InInterval&);
};
GreedyLabeling(cv::Mat img)
: image(img), _labels(image.size(), CV_32SC1, cv::Scalar::all(-1)) {stack = new dot[image.cols * image.rows];}
~GreedyLabeling(){delete[] stack;}
void operator() (cv::Mat labels) const
{
labels.setTo(cv::Scalar::all(-1));
InInterval inInt(0, 2);
int cc = -1;
int* dist_labels = (int*)labels.data;
int pitch = static_cast<int>(labels.step1());
unsigned char* source = (unsigned char*)image.data;
int width = image.cols;
int height = image.rows;
for (int j = 0; j < image.rows; ++j)
for (int i = 0; i < image.cols; ++i)
{
if (dist_labels[j * pitch + i] != -1) continue;
cv::Mat image = readImage("gpu/labeling/aloe-disp.png", cv::IMREAD_GRAYSCALE);
dot* top = stack;
dot p = dot::make(i, j);
cc++;
// cv::threshold(image, image, 150, 255, CV_THRESH_BINARY);
dist_labels[j * pitch + i] = cc;
cv::gpu::GpuMat mask;
mask.create(image.rows, image.cols, CV_8UC1);
while (top >= stack)
{
int* dl = &dist_labels[p.y * pitch + p.x];
unsigned char* sp = &source[p.y * image.step1() + p.x];
cv::gpu::GpuMat components;
components.create(image.rows, image.cols, CV_32SC1);
dl[0] = cc;
cv::gpu::connectivityMask(cv::gpu::GpuMat(image), mask, cv::Scalar::all(0), cv::Scalar::all(2));
//right
if( p.x < (width - 1) && dl[ +1] == -1 && inInt(sp[0], sp[+1]))
*top++ = dot::make(p.x + 1, p.y);
ASSERT_NO_THROW(cv::gpu::labelComponents(mask, components));
//left
if( p.x > 0 && dl[-1] == -1 && inInt(sp[0], sp[-1]))
*top++ = dot::make(p.x - 1, p.y);
//bottom
if( p.y < (height - 1) && dl[+pitch] == -1 && inInt(sp[0], sp[+image.step1()]))
*top++ = dot::make(p.x, p.y + 1);
//top
if( p.y > 0 && dl[-pitch] == -1 && inInt(sp[0], sp[-static_cast<int>(image.step1())]))
*top++ = dot::make(p.x, p.y - 1);
p = *--top;
}
}
}
cv::Mat image;
cv::Mat _labels;
dot* stack;
};
PERF_TEST_P(Image, Labeling_ConnectedComponents, Values<string>("gpu/labeling/aloe-disp.png"))
{
declare.time(1.0);
TEST_CYCLE()
cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
if (runOnGpu)
{
cv::gpu::labelComponents(mask, components);
cv::gpu::GpuMat mask;
mask.create(image.rows, image.cols, CV_8UC1);
cv::gpu::GpuMat components;
components.create(image.rows, image.cols, CV_32SC1);
cv::gpu::connectivityMask(cv::gpu::GpuMat(image), mask, cv::Scalar::all(0), cv::Scalar::all(2));
ASSERT_NO_THROW(cv::gpu::labelComponents(mask, components));
TEST_CYCLE()
{
cv::gpu::labelComponents(mask, components);
}
}
}
else
{
GreedyLabeling host(image);
INSTANTIATE_TEST_CASE_P(Labeling, ConnectedComponents, testing::Combine(ALL_DEVICES, testing::Values(cv::Size(261, 262))));
host(host._labels);
declare.time(1.0);
TEST_CYCLE()
{
host(host._labels);
}
}
}
#endif
} // namespace

@ -1,20 +0,0 @@
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
int main(int argc, char **argv)
{
testing::InitGoogleTest(&argc, argv);
perf::TestBase::Init(argc, argv);
return RUN_ALL_TESTS();
}
#else
int main()
{
printf("OpenCV was built without CUDA support\n");
return 0;
}
#endif

@ -1,141 +1,169 @@
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// SetTo
GPU_PERF_TEST(SetTo, cv::gpu::DeviceInfo, cv::Size, MatType)
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(1, 3, 4)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
cv::gpu::GpuMat src(size, type);
cv::Scalar val(1, 2, 3, 4);
src.setTo(val);
if (runOnGpu)
{
cv::gpu::GpuMat d_src(size, type);
d_src.setTo(val);
TEST_CYCLE()
TEST_CYCLE()
{
d_src.setTo(val);
}
}
else
{
cv::Mat src(size, type);
src.setTo(val);
TEST_CYCLE()
{
src.setTo(val);
}
}
}
INSTANTIATE_TEST_CASE_P(MatOp, SetTo, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
//////////////////////////////////////////////////////////////////////
// SetToMasked
GPU_PERF_TEST(SetToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(1, 3, 4)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src_host(size, type);
fill(src_host, 0, 255);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat mask_host(size, CV_8UC1);
fill(mask_host, 0, 2);
cv::Mat mask(size, CV_8UC1);
fillRandom(mask, 0, 2);
cv::gpu::GpuMat src(src_host);
cv::Scalar val(1, 2, 3, 4);
cv::gpu::GpuMat mask(mask_host);
src.setTo(val, mask);
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_mask(mask);
d_src.setTo(val, d_mask);
TEST_CYCLE()
TEST_CYCLE()
{
d_src.setTo(val, d_mask);
}
}
else
{
src.setTo(val, mask);
TEST_CYCLE()
{
src.setTo(val, mask);
}
}
}
INSTANTIATE_TEST_CASE_P(MatOp, SetToMasked, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
//////////////////////////////////////////////////////////////////////
// CopyToMasked
GPU_PERF_TEST(CopyToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(1, 3, 4)))
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src_host(size, type);
fill(src_host, 0, 255);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat mask_host(size, CV_8UC1);
fill(mask_host, 0, 2);
cv::Mat mask(size, CV_8UC1);
fillRandom(mask, 0, 2);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat mask(mask_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_mask(mask);
cv::gpu::GpuMat d_dst;
src.copyTo(dst, mask);
d_src.copyTo(d_dst, d_mask);
TEST_CYCLE()
TEST_CYCLE()
{
d_src.copyTo(d_dst, d_mask);
}
}
else
{
cv::Mat dst;
src.copyTo(dst, mask);
TEST_CYCLE()
{
src.copyTo(dst, mask);
}
}
}
INSTANTIATE_TEST_CASE_P(MatOp, CopyToMasked, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
//////////////////////////////////////////////////////////////////////
// ConvertTo
GPU_PERF_TEST(ConvertTo, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth)
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
DEF_PARAM_TEST(Sz_2Depth, cv::Size, MatDepth, MatDepth);
cv::Size size = GET_PARAM(1);
int depth1 = GET_PARAM(2);
int depth2 = GET_PARAM(3);
PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(CV_8U, CV_16U, CV_32F, CV_64F)))
{
cv::Size size = GET_PARAM(0);
int depth1 = GET_PARAM(1);
int depth2 = GET_PARAM(2);
cv::Mat src_host(size, depth1);
fill(src_host, 0, 255);
cv::Mat src(size, depth1);
fillRandom(src);
cv::gpu::GpuMat src(src_host);
cv::gpu::GpuMat dst;
if (runOnGpu)
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
src.convertTo(dst, depth2, 0.5, 1.0);
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
TEST_CYCLE()
TEST_CYCLE()
{
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
}
}
else
{
cv::Mat dst;
src.convertTo(dst, depth2, 0.5, 1.0);
TEST_CYCLE()
{
src.convertTo(dst, depth2, 0.5, 1.0);
}
}
}
INSTANTIATE_TEST_CASE_P(MatOp, ConvertTo, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F)),
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F))));
#endif
} // namespace

@ -1,85 +1,131 @@
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace testing;
namespace {
///////////////////////////////////////////////////////////////
// HOG
GPU_PERF_TEST_1(HOG, cv::gpu::DeviceInfo)
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
DEF_PARAM_TEST_1(Image, string);
cv::Mat img_host = readImage("gpu/hog/road.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
PERF_TEST_P(Image, ObjDetect_HOG, Values<string>("gpu/hog/road.png"))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::gpu::GpuMat img(img_host);
std::vector<cv::Rect> found_locations;
cv::gpu::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
if (runOnGpu)
{
cv::gpu::GpuMat d_img(img);
cv::gpu::HOGDescriptor d_hog;
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations);
d_hog.detectMultiScale(d_img, found_locations);
TEST_CYCLE()
TEST_CYCLE()
{
d_hog.detectMultiScale(d_img, found_locations);
}
}
else
{
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations);
TEST_CYCLE()
{
hog.detectMultiScale(img, found_locations);
}
}
}
INSTANTIATE_TEST_CASE_P(ObjDetect, HOG, ALL_DEVICES);
///////////////////////////////////////////////////////////////
// HaarClassifier
GPU_PERF_TEST_1(HaarClassifier, cv::gpu::DeviceInfo)
typedef pair<string, string> pair_string;
DEF_PARAM_TEST_1(ImageAndCascade, pair_string);
PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::Mat img_host = readImage("gpu/haarcascade/group_1_640x480_VGA.pgm", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
if (runOnGpu)
{
cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
cv::gpu::CascadeClassifier_GPU cascade;
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_objects_buffer;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
d_cascade.detectMultiScale(d_img, d_objects_buffer);
cv::gpu::GpuMat img(img_host);
cv::gpu::GpuMat objects_buffer;
TEST_CYCLE()
{
d_cascade.detectMultiScale(d_img, d_objects_buffer);
}
}
else
{
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
cascade.detectMultiScale(img, objects_buffer);
std::vector<cv::Rect> rects;
TEST_CYCLE()
{
cascade.detectMultiScale(img, objects_buffer);
cascade.detectMultiScale(img, rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, rects);
}
}
}
INSTANTIATE_TEST_CASE_P(ObjDetect, HaarClassifier, ALL_DEVICES);
///////////////////////////////////////////////////////////////
// LBP cascade
//===================== LBP cascade ==========================//
GPU_PERF_TEST_1(LBPClassifier, cv::gpu::DeviceInfo)
PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
{
cv::gpu::DeviceInfo devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat img_host = readImage("gpu/haarcascade/group_1_640x480_VGA.pgm", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_host.empty());
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (runOnGpu)
{
cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_gpu_rects;
cv::gpu::GpuMat img(img_host);
cv::gpu::GpuMat gpu_rects;
cv::gpu::CascadeClassifier_GPU cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
d_cascade.detectMultiScale(d_img, d_gpu_rects);
cascade.detectMultiScale(img, gpu_rects);
TEST_CYCLE()
TEST_CYCLE()
{
d_cascade.detectMultiScale(d_img, d_gpu_rects);
}
}
else
{
cascade.detectMultiScale(img, gpu_rects);
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
std::vector<cv::Rect> rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, rects);
}
}
}
INSTANTIATE_TEST_CASE_P(ObjDetect, LBPClassifier, ALL_DEVICES);
#endif
} // namespace

@ -11,6 +11,10 @@
#include "cvconfig.h"
#ifdef HAVE_CUDA
#include <cuda_runtime.h>
#endif
#include "opencv2/ts/ts.hpp"
#include "opencv2/ts/ts_perf.hpp"
@ -18,8 +22,12 @@
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/video/video.hpp"
#include "opencv2/nonfree/nonfree.hpp"
#include "opencv2/legacy/legacy.hpp"
#include "perf_utility.hpp"
#include "utility.hpp"
#ifdef GTEST_CREATE_SHARED_LIBRARY
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined

@ -1,77 +0,0 @@
#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__
#define __OPENCV_PERF_GPU_UTILITY_HPP__
void fill(cv::Mat& m, double a, double b);
using perf::MatType;
using perf::MatDepth;
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
struct CvtColorInfo
{
int scn;
int dcn;
int code;
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
};
void PrintTo(const CvtColorInfo& info, std::ostream* os);
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << " = " << testing::PrintToString(static_cast< type >(param)); \
}
IMPLEMENT_PARAM_CLASS(Channels, int)
namespace cv { namespace gpu
{
void PrintTo(const cv::gpu::DeviceInfo& info, std::ostream* os);
}}
#define GPU_PERF_TEST(name, ...) \
struct name : perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > \
{ \
public: \
name() {} \
protected: \
void PerfTestBody(); \
}; \
TEST_P(name, perf){ RunPerfTestBody(); } \
void name :: PerfTestBody()
#define GPU_PERF_TEST_1(name, param_type) \
struct name : perf::TestBaseWithParam< param_type > \
{ \
public: \
name() {} \
protected: \
void PerfTestBody(); \
}; \
TEST_P(name, perf){ RunPerfTestBody(); } \
void name :: PerfTestBody()
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::szSXGA, perf::sz1080p, cv::Size(1800, 1500))
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
const std::vector<cv::gpu::DeviceInfo>& devices();
#define ALL_DEVICES testing::ValuesIn(devices())
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__

File diff suppressed because it is too large Load Diff

@ -4,12 +4,19 @@ using namespace std;
using namespace cv;
using namespace cv::gpu;
void fill(Mat& m, double a, double b)
bool runOnGpu = true;
void fillRandom(Mat& m, double a, double b)
{
RNG rng(123456789);
rng.fill(m, RNG::UNIFORM, Scalar::all(a), Scalar::all(b));
}
Mat readImage(const string& fileName, int flags)
{
return imread(perf::TestBase::getDataPath(fileName), flags);
}
void PrintTo(const CvtColorInfo& info, ostream* os)
{
static const char* str[] =
@ -184,37 +191,3 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
*os << str[info.code];
}
void cv::gpu::PrintTo(const DeviceInfo& info, ostream* os)
{
*os << info.name();
}
Mat readImage(const string& fileName, int flags)
{
return imread(perf::TestBase::getDataPath(fileName), flags);
}
const vector<DeviceInfo>& devices()
{
static vector<DeviceInfo> devs;
static bool first = true;
if (first)
{
int deviceCount = getCudaEnabledDeviceCount();
devs.reserve(deviceCount);
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
if (info.isCompatible())
devs.push_back(info);
}
first = false;
}
return devs;
}

@ -0,0 +1,45 @@
#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__
#define __OPENCV_PERF_GPU_UTILITY_HPP__
#include "opencv2/core/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/ts/ts_perf.hpp"
extern bool runOnGpu;
void fillRandom(cv::Mat& m, double a = 0.0, double b = 255.0);
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
using perf::MatType;
using perf::MatDepth;
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
#define ALL_BORDER_MODES testing::ValuesIn(BorderMode::all())
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
#define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
struct CvtColorInfo
{
int scn;
int dcn;
int code;
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
};
void PrintTo(const CvtColorInfo& info, std::ostream* os);
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name
#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name
DEF_PARAM_TEST_1(Sz, cv::Size);
typedef perf::Size_MatType Sz_Type;
DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth);
DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, int);
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::szSXGA, perf::sz720p, perf::sz1080p)
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__

@ -1,136 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
//////////////////////////////////////////////////////////////////////
// StereoBM
GPU_PERF_TEST_1(StereoBM, cv::gpu::DeviceInfo)
{
cv::Mat img_l = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_l.empty());
cv::Mat img_r = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img_r.empty());
cv::StereoBM bm(0, 256);
cv::Mat dst;
bm(img_l, img_r, dst);
declare.time(5.0);
TEST_CYCLE()
{
bm(img_l, img_r, dst);
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, StereoBM, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// ProjectPoints
IMPLEMENT_PARAM_CLASS(Count, int)
GPU_PERF_TEST(ProjectPoints, cv::gpu::DeviceInfo, Count)
{
int count = GET_PARAM(1);
cv::Mat src(1, count, CV_32FC3);
fill(src, -100, 100);
cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
cv::Mat dst;
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
TEST_CYCLE()
{
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, testing::Combine(
ALL_DEVICES,
testing::Values<Count>(5000, 10000, 20000)));
//////////////////////////////////////////////////////////////////////
// SolvePnPRansac
GPU_PERF_TEST(SolvePnPRansac, cv::gpu::DeviceInfo, Count)
{
int count = GET_PARAM(1);
cv::Mat object(1, count, CV_32FC3);
fill(object, -100, 100);
cv::Mat camera_mat(3, 3, CV_32FC1);
fill(camera_mat, 0.5, 1);
camera_mat.at<float>(0, 1) = 0.f;
camera_mat.at<float>(1, 0) = 0.f;
camera_mat.at<float>(2, 0) = 0.f;
camera_mat.at<float>(2, 1) = 0.f;
cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
std::vector<cv::Point2f> image_vec;
cv::Mat rvec_gold(1, 3, CV_32FC1);
fill(rvec_gold, 0, 1);
cv::Mat tvec_gold(1, 3, CV_32FC1);
fill(tvec_gold, 0, 1);
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
cv::Mat rvec;
cv::Mat tvec;
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
declare.time(10.0);
TEST_CYCLE()
{
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, testing::Combine(
ALL_DEVICES,
testing::Values<Count>(5000, 10000, 20000)));
//////////////////////////////////////////////////////////////////////
// ReprojectImageTo3D
GPU_PERF_TEST(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, MatDepth)
{
cv::Size size = GET_PARAM(1);
int depth = GET_PARAM(2);
cv::Mat src(size, depth);
fill(src, 5.0, 30.0);
cv::Mat Q(4, 4, CV_32FC1);
fill(Q, 0.1, 1.0);
cv::Mat dst;
cv::reprojectImageTo3D(src, dst, Q);
TEST_CYCLE()
{
cv::reprojectImageTo3D(src, dst, Q);
}
}
INSTANTIATE_TEST_CASE_P(Calib3D, ReprojectImageTo3D, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values<MatDepth>(CV_8U, CV_16S)));
#endif

File diff suppressed because it is too large Load Diff

@ -1 +0,0 @@
#include "perf_cpu_precomp.hpp"

@ -1,32 +0,0 @@
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# pragma GCC diagnostic ignored "-Wmissing-prototypes" //OSX
#endif
#ifndef __OPENCV_PERF_CPU_PRECOMP_HPP__
#define __OPENCV_PERF_CPU_PRECOMP_HPP__
#include <cstdio>
#include <iostream>
#include "cvconfig.h"
#include "opencv2/ts/ts.hpp"
#include "opencv2/ts/ts_perf.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/video/video.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/nonfree/nonfree.hpp"
#include "opencv2/legacy/legacy.hpp"
#include "perf_utility.hpp"
#ifdef GTEST_CREATE_SHARED_LIBRARY
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
#endif
#endif

@ -1,187 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
//////////////////////////////////////////////////////////////////////
// SURF
GPU_PERF_TEST_1(SURF, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::SURF surf;
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
surf(img, cv::noArray(), keypoints, descriptors);
declare.time(50.0);
TEST_CYCLE()
{
keypoints.clear();
surf(img, cv::noArray(), keypoints, descriptors);
}
}
INSTANTIATE_TEST_CASE_P(Features2D, SURF, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// FAST
GPU_PERF_TEST_1(FAST, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
std::vector<cv::KeyPoint> keypoints;
cv::FAST(img, keypoints, 20);
TEST_CYCLE()
{
keypoints.clear();
cv::FAST(img, keypoints, 20);
}
}
INSTANTIATE_TEST_CASE_P(Features2D, FAST, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// ORB
GPU_PERF_TEST_1(ORB, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::ORB orb(4000);
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
orb(img, cv::noArray(), keypoints, descriptors);
TEST_CYCLE()
{
keypoints.clear();
orb(img, cv::noArray(), keypoints, descriptors);
}
}
INSTANTIATE_TEST_CASE_P(Features2D, ORB, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// BruteForceMatcher_match
IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
GPU_PERF_TEST(BruteForceMatcher_match, cv::gpu::DeviceInfo, DescriptorSize, NormType)
{
int desc_size = GET_PARAM(1);
int normType = GET_PARAM(2);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fill(query, 0.0, 10.0);
cv::Mat train(3000, desc_size, type);
fill(train, 0.0, 10.0);
cv::BFMatcher matcher(normType);
std::vector<cv::DMatch> matches;
matcher.match(query, train, matches);
declare.time(20.0);
TEST_CYCLE()
{
matcher.match(query, train, matches);
}
}
INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_match, testing::Combine(
ALL_DEVICES,
testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
//////////////////////////////////////////////////////////////////////
// BruteForceMatcher_knnMatch
IMPLEMENT_PARAM_CLASS(K, int)
GPU_PERF_TEST(BruteForceMatcher_knnMatch, cv::gpu::DeviceInfo, DescriptorSize, K, NormType)
{
int desc_size = GET_PARAM(1);
int k = GET_PARAM(2);
int normType = GET_PARAM(3);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fill(query, 0.0, 10.0);
cv::Mat train(3000, desc_size, type);
fill(train, 0.0, 10.0);
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.knnMatch(query, train, matches, k);
declare.time(30.0);
TEST_CYCLE()
{
matcher.knnMatch(query, train, matches, k);
}
}
INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_knnMatch, testing::Combine(
ALL_DEVICES,
testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
testing::Values(K(2), K(3)),
testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
//////////////////////////////////////////////////////////////////////
// BruteForceMatcher_radiusMatch
GPU_PERF_TEST(BruteForceMatcher_radiusMatch, cv::gpu::DeviceInfo, DescriptorSize, NormType)
{
int desc_size = GET_PARAM(1);
int normType = GET_PARAM(2);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fill(query, 0.0, 1.0);
cv::Mat train(3000, desc_size, type);
fill(train, 0.0, 1.0);
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(query, train, matches, 2.0);
declare.time(30.0);
TEST_CYCLE()
{
matcher.radiusMatch(query, train, matches, 2.0);
}
}
INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_radiusMatch, testing::Combine(
ALL_DEVICES,
testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
#endif

@ -1,283 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
IMPLEMENT_PARAM_CLASS(KernelSize, int)
//////////////////////////////////////////////////////////////////////
// Blur
GPU_PERF_TEST(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat dst;
cv::blur(src, dst, cv::Size(ksize, ksize));
declare.time(20.0);
TEST_CYCLE()
{
cv::blur(src, dst, cv::Size(ksize, ksize));
}
}
INSTANTIATE_TEST_CASE_P(Filters, Blur, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7))));
//////////////////////////////////////////////////////////////////////
// Sobel
GPU_PERF_TEST(Sobel, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat dst;
cv::Sobel(src, dst, -1, 1, 1, ksize);
declare.time(20.0);
TEST_CYCLE()
{
cv::Sobel(src, dst, -1, 1, 1, ksize);
}
}
INSTANTIATE_TEST_CASE_P(Filters, Sobel, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7), KernelSize(9), KernelSize(11), KernelSize(13), KernelSize(15))));
//////////////////////////////////////////////////////////////////////
// Scharr
GPU_PERF_TEST(Scharr, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat dst;
cv::Scharr(src, dst, -1, 1, 0);
declare.time(20.0);
TEST_CYCLE()
{
cv::Scharr(src, dst, -1, 1, 0);
}
}
INSTANTIATE_TEST_CASE_P(Filters, Scharr, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1))));
//////////////////////////////////////////////////////////////////////
// GaussianBlur
GPU_PERF_TEST(GaussianBlur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat dst;
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
declare.time(20.0);
TEST_CYCLE()
{
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
}
}
INSTANTIATE_TEST_CASE_P(Filters, GaussianBlur, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7), KernelSize(9), KernelSize(11), KernelSize(13), KernelSize(15))));
//////////////////////////////////////////////////////////////////////
// Laplacian
GPU_PERF_TEST(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat dst;
cv::Laplacian(src, dst, -1, ksize);
declare.time(20.0);
TEST_CYCLE()
{
cv::Laplacian(src, dst, -1, ksize);
}
}
INSTANTIATE_TEST_CASE_P(Filters, Laplacian, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
testing::Values(KernelSize(1), KernelSize(3))));
//////////////////////////////////////////////////////////////////////
// Erode
GPU_PERF_TEST(Erode, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
cv::Mat dst;
cv::erode(src, dst, ker);
declare.time(20.0);
TEST_CYCLE()
{
cv::erode(src, dst, ker);
}
}
INSTANTIATE_TEST_CASE_P(Filters, Erode, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
//////////////////////////////////////////////////////////////////////
// Dilate
GPU_PERF_TEST(Dilate, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
cv::Mat dst;
cv::dilate(src, dst, ker);
declare.time(20.0);
TEST_CYCLE()
{
cv::dilate(src, dst, ker);
}
}
INSTANTIATE_TEST_CASE_P(Filters, Dilate, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
//////////////////////////////////////////////////////////////////////
// MorphologyEx
CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
#define ALL_MORPH_OPS testing::Values(MorphOp(cv::MORPH_OPEN), MorphOp(cv::MORPH_CLOSE), MorphOp(cv::MORPH_GRADIENT), MorphOp(cv::MORPH_TOPHAT), MorphOp(cv::MORPH_BLACKHAT))
GPU_PERF_TEST(MorphologyEx, cv::gpu::DeviceInfo, cv::Size, MatType, MorphOp)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int morphOp = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat dst;
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
cv::morphologyEx(src, dst, morphOp, ker);
declare.time(20.0);
TEST_CYCLE()
{
cv::morphologyEx(src, dst, morphOp, ker);
}
}
INSTANTIATE_TEST_CASE_P(Filters, MorphologyEx, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
ALL_MORPH_OPS));
//////////////////////////////////////////////////////////////////////
// Filter2D
GPU_PERF_TEST(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int ksize = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0.0, 255.0);
cv::Mat kernel(ksize, ksize, CV_32FC1);
fill(kernel, 0.0, 1.0);
cv::Mat dst;
cv::filter2D(src, dst, -1, kernel);
declare.time(20.0);
TEST_CYCLE()
{
cv::filter2D(src, dst, -1, kernel);
}
}
INSTANTIATE_TEST_CASE_P(Filters, Filter2D, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
testing::Values(KernelSize(3), KernelSize(5), KernelSize(7), KernelSize(9), KernelSize(11), KernelSize(13), KernelSize(15))));
#endif

@ -1,771 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
//////////////////////////////////////////////////////////////////////
// Remap
GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int interpolation = GET_PARAM(3);
int borderMode = GET_PARAM(4);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat xmap(size, CV_32FC1);
fill(xmap, 0, size.width);
cv::Mat ymap(size, CV_32FC1);
fill(ymap, 0, size.height);
cv::Mat dst;
cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
declare.time(20.0);
TEST_CYCLE()
{
cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Remap, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
//////////////////////////////////////////////////////////////////////
// Resize
IMPLEMENT_PARAM_CLASS(Scale, double)
GPU_PERF_TEST(Resize, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, Scale)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int interpolation = GET_PARAM(3);
double f = GET_PARAM(4);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat dst;
cv::resize(src, dst, cv::Size(), f, f, interpolation);
declare.time(20.0);
TEST_CYCLE()
{
cv::resize(src, dst, cv::Size(), f, f, interpolation);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR),
Interpolation(cv::INTER_CUBIC), Interpolation(cv::INTER_AREA)),
testing::Values(Scale(0.5), Scale(0.3), Scale(2.0))));
GPU_PERF_TEST(ResizeArea, cv::gpu::DeviceInfo, cv::Size, MatType, Scale)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int interpolation = cv::INTER_AREA;
double f = GET_PARAM(3);
cv::Mat src_host(size, type);
fill(src_host, 0, 255);
cv::Mat src(src_host);
cv::Mat dst;
cv::resize(src, dst, cv::Size(), f, f, interpolation);
declare.time(1.0);
TEST_CYCLE()
{
cv::resize(src, dst, cv::Size(), f, f, interpolation);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, ResizeArea, testing::Combine(
ALL_DEVICES,
testing::Values(perf::sz1080p, cv::Size(4096, 2048)),
testing::Values(MatType(CV_8UC1)/*, MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)*/),
testing::Values(Scale(0.2),Scale(0.1),Scale(0.05))));
//////////////////////////////////////////////////////////////////////
// WarpAffine
GPU_PERF_TEST(WarpAffine, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int interpolation = GET_PARAM(3);
int borderMode = GET_PARAM(4);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat dst;
const double aplha = CV_PI / 4;
double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
{std::sin(aplha), std::cos(aplha), 0}};
cv::Mat M(2, 3, CV_64F, (void*) mat);
cv::warpAffine(src, dst, M, size, interpolation, borderMode);
declare.time(20.0);
TEST_CYCLE()
{
cv::warpAffine(src, dst, M, size, interpolation, borderMode);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, WarpAffine, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
//////////////////////////////////////////////////////////////////////
// WarpPerspective
GPU_PERF_TEST(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int interpolation = GET_PARAM(3);
int borderMode = GET_PARAM(4);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat dst;
const double aplha = CV_PI / 4;
double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
{std::sin(aplha), std::cos(aplha), 0},
{0.0, 0.0, 1.0}};
cv::Mat M(3, 3, CV_64F, (void*) mat);
cv::warpPerspective(src, dst, M, size, interpolation, borderMode);
declare.time(20.0);
TEST_CYCLE()
{
cv::warpPerspective(src, dst, M, size, interpolation, borderMode);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, WarpPerspective, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
//////////////////////////////////////////////////////////////////////
// CopyMakeBorder
GPU_PERF_TEST(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, MatType, BorderMode)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
int borderType = GET_PARAM(3);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat dst;
cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
TEST_CYCLE()
{
cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
//////////////////////////////////////////////////////////////////////
// Threshold
CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
#define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))
GPU_PERF_TEST(Threshold, cv::gpu::DeviceInfo, cv::Size, MatDepth, ThreshOp)
{
cv::Size size = GET_PARAM(1);
int depth = GET_PARAM(2);
int threshOp = GET_PARAM(3);
cv::Mat src(size, depth);
fill(src, 0, 255);
cv::Mat dst;
cv::threshold(src, dst, 100.0, 255.0, threshOp);
TEST_CYCLE()
{
cv::threshold(src, dst, 100.0, 255.0, threshOp);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Threshold, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F)),
ALL_THRESH_OPS));
//////////////////////////////////////////////////////////////////////
// Integral
GPU_PERF_TEST(Integral, cv::gpu::DeviceInfo, cv::Size)
{
cv::Size size = GET_PARAM(1);
cv::Mat src(size, CV_8UC1);
fill(src, 0, 255);
cv::Mat dst;
cv::integral(src, dst);
TEST_CYCLE()
{
cv::integral(src, dst);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES));
//////////////////////////////////////////////////////////////////////
// HistEven_OneChannel
GPU_PERF_TEST(HistEven_OneChannel, cv::gpu::DeviceInfo, cv::Size, MatDepth)
{
cv::Size size = GET_PARAM(1);
int depth = GET_PARAM(2);
cv::Mat src(size, depth);
fill(src, 0, 255);
int hbins = 30;
float hranges[] = {0.0f, 180.0f};
cv::Mat hist;
int histSize[] = {hbins};
const float* ranges[] = {hranges};
int channels[] = {0};
cv::calcHist(&src, 1, channels, cv::Mat(), hist, 1, histSize, ranges);
TEST_CYCLE()
{
cv::calcHist(&src, 1, channels, cv::Mat(), hist, 1, histSize, ranges);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, HistEven_OneChannel, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S))));
//////////////////////////////////////////////////////////////////////
// EqualizeHist
GPU_PERF_TEST(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
{
cv::Size size = GET_PARAM(1);
cv::Mat src(size, CV_8UC1);
fill(src, 0, 255);
cv::Mat dst;
cv::equalizeHist(src, dst);
TEST_CYCLE()
{
cv::equalizeHist(src, dst);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES));
//////////////////////////////////////////////////////////////////////
// Canny
IMPLEMENT_PARAM_CLASS(AppertureSize, int)
IMPLEMENT_PARAM_CLASS(L2gradient, bool)
GPU_PERF_TEST(Canny, cv::gpu::DeviceInfo, AppertureSize, L2gradient)
{
int apperture_size = GET_PARAM(1);
bool useL2gradient = GET_PARAM(2);
cv::Mat image = readImage("perf/1280x1024.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
cv::Mat dst;
cv::Canny(image, dst, 50.0, 100.0, apperture_size, useL2gradient);
TEST_CYCLE()
{
cv::Canny(image, dst, 50.0, 100.0, apperture_size, useL2gradient);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine(
ALL_DEVICES,
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
//////////////////////////////////////////////////////////////////////
// MeanShiftFiltering
GPU_PERF_TEST_1(MeanShiftFiltering, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/meanshift/cones.png");
ASSERT_FALSE(img.empty());
cv::Mat dst;
cv::pyrMeanShiftFiltering(img, dst, 50, 50);
declare.time(15.0);
TEST_CYCLE()
{
cv::pyrMeanShiftFiltering(img, dst, 50, 50);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftFiltering, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////
// Convolve
IMPLEMENT_PARAM_CLASS(KSize, int)
IMPLEMENT_PARAM_CLASS(Ccorr, bool)
GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, KSize, Ccorr)
{
cv::Size size = GET_PARAM(1);
int templ_size = GET_PARAM(2);
bool ccorr = GET_PARAM(3);
ASSERT_FALSE(ccorr);
cv::Mat image(size, CV_32FC1);
image.setTo(1.0);
cv::Mat templ(templ_size, templ_size, CV_32FC1);
templ.setTo(1.0);
cv::Mat dst;
cv::filter2D(image, dst, image.depth(), templ);
declare.time(10.0);
TEST_CYCLE()
{
cv::filter2D(image, dst, image.depth(), templ);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(KSize(3), KSize(9), KSize(17), KSize(27), KSize(32), KSize(64)),
testing::Values(Ccorr(false), Ccorr(true))));
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate_8U
CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_CCOEFF_NORMED))
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size)
GPU_PERF_TEST(MatchTemplate_8U, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
{
cv::Size size = GET_PARAM(1);
cv::Size templ_size = GET_PARAM(2);
int cn = GET_PARAM(3);
int method = GET_PARAM(4);
cv::Mat image(size, CV_MAKE_TYPE(CV_8U, cn));
fill(image, 0, 255);
cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_8U, cn));
fill(templ, 0, 255);
cv::Mat dst;
cv::matchTemplate(image, templ, dst, method);
TEST_CYCLE()
{
cv::matchTemplate(image, templ, dst, method);
}
};
INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_8U, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)),
ALL_TEMPLATE_METHODS));
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate_32F
GPU_PERF_TEST(MatchTemplate_32F, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
{
cv::Size size = GET_PARAM(1);
cv::Size templ_size = GET_PARAM(2);
int cn = GET_PARAM(3);
int method = GET_PARAM(4);
cv::Mat image(size, CV_MAKE_TYPE(CV_32F, cn));
fill(image, 0, 255);
cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_32F, cn));
fill(templ, 0, 255);
cv::Mat dst;
cv::matchTemplate(image, templ, dst, method);
TEST_CYCLE()
{
cv::matchTemplate(image, templ, dst, method);
}
};
INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_32F, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)),
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
//////////////////////////////////////////////////////////////////////
// MulSpectrums
CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
GPU_PERF_TEST(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
{
cv::Size size = GET_PARAM(1);
int flag = GET_PARAM(2);
cv::Mat a(size, CV_32FC2);
fill(a, 0, 100);
cv::Mat b(size, CV_32FC2);
fill(b, 0, 100);
cv::Mat dst;
cv::mulSpectrums(a, b, dst, flag);
TEST_CYCLE()
{
cv::mulSpectrums(a, b, dst, flag);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, MulSpectrums, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
//////////////////////////////////////////////////////////////////////
// Dft
GPU_PERF_TEST(Dft, cv::gpu::DeviceInfo, cv::Size, DftFlags)
{
cv::Size size = GET_PARAM(1);
int flag = GET_PARAM(2);
cv::Mat src(size, CV_32FC2);
fill(src, 0, 100);
cv::Mat dst;
cv::dft(src, dst, flag);
declare.time(10.0);
TEST_CYCLE()
{
cv::dft(src, dst, flag);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, Dft, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))));
//////////////////////////////////////////////////////////////////////
// CornerHarris
IMPLEMENT_PARAM_CLASS(BlockSize, int)
IMPLEMENT_PARAM_CLASS(ApertureSize, int)
GPU_PERF_TEST(CornerHarris, cv::gpu::DeviceInfo, MatType, BorderMode, BlockSize, ApertureSize)
{
int type = GET_PARAM(1);
int borderType = GET_PARAM(2);
int blockSize = GET_PARAM(3);
int apertureSize = GET_PARAM(4);
cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
cv::Mat dst;
double k = 0.5;
cv::cornerHarris(img, dst, blockSize, apertureSize, k, borderType);
TEST_CYCLE()
{
cv::cornerHarris(img, dst, blockSize, apertureSize, k, borderType);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, testing::Combine(
ALL_DEVICES,
testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
//////////////////////////////////////////////////////////////////////
// CornerMinEigenVal
GPU_PERF_TEST(CornerMinEigenVal, cv::gpu::DeviceInfo, MatType, BorderMode, BlockSize, ApertureSize)
{
int type = GET_PARAM(1);
int borderType = GET_PARAM(2);
int blockSize = GET_PARAM(3);
int apertureSize = GET_PARAM(4);
cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
cv::Mat dst;
cv::cornerMinEigenVal(img, dst, blockSize, apertureSize, borderType);
TEST_CYCLE()
{
cv::cornerMinEigenVal(img, dst, blockSize, apertureSize, borderType);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigenVal, testing::Combine(
ALL_DEVICES,
testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
//////////////////////////////////////////////////////////////////////
// PyrDown
GPU_PERF_TEST(PyrDown, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat dst;
cv::pyrDown(src, dst);
TEST_CYCLE()
{
cv::pyrDown(src, dst);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, PyrDown, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
//////////////////////////////////////////////////////////////////////
// PyrUp
GPU_PERF_TEST(PyrUp, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat dst;
cv::pyrUp(src, dst);
TEST_CYCLE()
{
cv::pyrUp(src, dst);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
//////////////////////////////////////////////////////////////////////
// CvtColor
GPU_PERF_TEST(CvtColor, cv::gpu::DeviceInfo, cv::Size, MatDepth, CvtColorInfo)
{
cv::Size size = GET_PARAM(1);
int depth = GET_PARAM(2);
CvtColorInfo info = GET_PARAM(3);
cv::Mat src(size, CV_MAKETYPE(depth, info.scn));
fill(src, 0, 255);
cv::Mat dst;
cv::cvtColor(src, dst, info.code, info.dcn);
TEST_CYCLE()
{
cv::cvtColor(src, dst, info.code, info.dcn);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)),
testing::Values(CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA),
CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY),
CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
CvtColorInfo(3, 3, cv::COLOR_BGR2XYZ),
CvtColorInfo(3, 3, cv::COLOR_XYZ2BGR),
CvtColorInfo(3, 3, cv::COLOR_BGR2YCrCb),
CvtColorInfo(3, 3, cv::COLOR_YCrCb2BGR),
CvtColorInfo(3, 3, cv::COLOR_BGR2YUV),
CvtColorInfo(3, 3, cv::COLOR_YUV2BGR),
CvtColorInfo(3, 3, cv::COLOR_BGR2HSV),
CvtColorInfo(3, 3, cv::COLOR_HSV2BGR),
CvtColorInfo(3, 3, cv::COLOR_BGR2HLS),
CvtColorInfo(3, 3, cv::COLOR_HLS2BGR),
CvtColorInfo(3, 3, cv::COLOR_BGR2Lab),
CvtColorInfo(3, 3, cv::COLOR_RGB2Lab),
CvtColorInfo(3, 3, cv::COLOR_BGR2Luv),
CvtColorInfo(3, 3, cv::COLOR_RGB2Luv),
CvtColorInfo(3, 3, cv::COLOR_Lab2BGR),
CvtColorInfo(3, 3, cv::COLOR_Lab2RGB),
CvtColorInfo(3, 3, cv::COLOR_Luv2BGR),
CvtColorInfo(3, 3, cv::COLOR_Luv2RGB),
CvtColorInfo(1, 3, cv::COLOR_BayerBG2BGR),
CvtColorInfo(1, 3, cv::COLOR_BayerGB2BGR),
CvtColorInfo(1, 3, cv::COLOR_BayerRG2BGR),
CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR),
CvtColorInfo(4, 4, cv::COLOR_RGBA2mRGBA))));
//////////////////////////////////////////////////////////////////////
// HoughLines
IMPLEMENT_PARAM_CLASS(DoSort, bool)
GPU_PERF_TEST(HoughLines, cv::gpu::DeviceInfo, cv::Size, DoSort)
{
declare.time(30.0);
const cv::Size size = GET_PARAM(1);
const float rho = 1.0f;
const float theta = CV_PI / 180.0f;
const int threshold = 300;
cv::RNG rng(123456789);
cv::Mat src(size, CV_8UC1, cv::Scalar::all(0));
const int numLines = rng.uniform(500, 2000);
for (int i = 0; i < numLines; ++i)
{
cv::Point p1(rng.uniform(0, src.cols), rng.uniform(0, src.rows));
cv::Point p2(rng.uniform(0, src.cols), rng.uniform(0, src.rows));
cv::line(src, p1, p2, cv::Scalar::all(255), 2);
}
std::vector<cv::Vec2f> lines;
cv::HoughLines(src, lines, rho, theta, threshold);
TEST_CYCLE()
{
cv::HoughLines(src, lines, rho, theta, threshold);
}
}
INSTANTIATE_TEST_CASE_P(ImgProc, HoughLines, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(DoSort(false), DoSort(true))));
#endif

@ -1,158 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//M*/
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
struct GreedyLabeling
{
struct dot
{
int x;
int y;
static dot make(int i, int j)
{
dot d; d.x = i; d.y = j;
return d;
}
};
struct InInterval
{
InInterval(const int& _lo, const int& _hi) : lo(-_lo), hi(_hi) {};
const int lo, hi;
bool operator() (const unsigned char a, const unsigned char b) const
{
int d = a - b;
return lo <= d && d <= hi;
}
};
GreedyLabeling(cv::Mat img)
: image(img), _labels(image.size(), CV_32SC1, cv::Scalar::all(-1)) {stack = new dot[image.cols * image.rows];}
~GreedyLabeling(){delete[] stack;}
void operator() (cv::Mat labels) const
{
labels.setTo(cv::Scalar::all(-1));
InInterval inInt(0, 2);
int cc = -1;
int* dist_labels = (int*)labels.data;
int pitch = labels.step1();
unsigned char* source = (unsigned char*)image.data;
int width = image.cols;
int height = image.rows;
for (int j = 0; j < image.rows; ++j)
for (int i = 0; i < image.cols; ++i)
{
if (dist_labels[j * pitch + i] != -1) continue;
dot* top = stack;
dot p = dot::make(i, j);
cc++;
dist_labels[j * pitch + i] = cc;
while (top >= stack)
{
int* dl = &dist_labels[p.y * pitch + p.x];
unsigned char* sp = &source[p.y * image.step1() + p.x];
dl[0] = cc;
//right
if( p.x < (width - 1) && dl[ +1] == -1 && inInt(sp[0], sp[+1]))
*top++ = dot::make(p.x + 1, p.y);
//left
if( p.x > 0 && dl[-1] == -1 && inInt(sp[0], sp[-1]))
*top++ = dot::make(p.x - 1, p.y);
//bottom
if( p.y < (height - 1) && dl[+pitch] == -1 && inInt(sp[0], sp[+image.step1()]))
*top++ = dot::make(p.x, p.y + 1);
//top
if( p.y > 0 && dl[-pitch] == -1 && inInt(sp[0], sp[-image.step1()]))
*top++ = dot::make(p.x, p.y - 1);
p = *--top;
}
}
}
cv::Mat image;
cv::Mat _labels;
dot* stack;
};
}
GPU_PERF_TEST(ConnectedComponents, cv::gpu::DeviceInfo, cv::Size)
{
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
cv::Mat image = readImage("gpu/labeling/aloe-disp.png", cv::IMREAD_GRAYSCALE);
GreedyLabeling host(image);
host(host._labels);
declare.time(1.0);
TEST_CYCLE()
{
host(host._labels);
}
}
INSTANTIATE_TEST_CASE_P(Labeling, ConnectedComponents, testing::Combine(ALL_DEVICES, testing::Values(cv::Size(261, 262))));
#endif

@ -1,20 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
int main(int argc, char **argv)
{
testing::InitGoogleTest(&argc, argv);
perf::TestBase::Init(argc, argv);
return RUN_ALL_TESTS();
}
#else
int main()
{
printf("OpenCV was built without CUDA support\n");
return 0;
}
#endif

@ -1,124 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
//////////////////////////////////////////////////////////////////////
// SetTo
GPU_PERF_TEST(SetTo, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
cv::Scalar val(1, 2, 3, 4);
src.setTo(val);
TEST_CYCLE()
{
src.setTo(val);
}
}
INSTANTIATE_TEST_CASE_P(MatOp, SetTo, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
//////////////////////////////////////////////////////////////////////
// SetToMasked
GPU_PERF_TEST(SetToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat mask(size, CV_8UC1);
fill(mask, 0, 2);
cv::Scalar val(1, 2, 3, 4);
src.setTo(val, mask);
TEST_CYCLE()
{
src.setTo(val, mask);
}
}
INSTANTIATE_TEST_CASE_P(MatOp, SetToMasked, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
//////////////////////////////////////////////////////////////////////
// CopyToMasked
GPU_PERF_TEST(CopyToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
{
cv::Size size = GET_PARAM(1);
int type = GET_PARAM(2);
cv::Mat src(size, type);
fill(src, 0, 255);
cv::Mat mask(size, CV_8UC1);
fill(mask, 0, 2);
cv::Mat dst;
src.copyTo(dst, mask);
TEST_CYCLE()
{
src.copyTo(dst, mask);
}
}
INSTANTIATE_TEST_CASE_P(MatOp, CopyToMasked, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
//////////////////////////////////////////////////////////////////////
// ConvertTo
GPU_PERF_TEST(ConvertTo, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth)
{
cv::Size size = GET_PARAM(1);
int depth1 = GET_PARAM(2);
int depth2 = GET_PARAM(3);
cv::Mat src(size, depth1);
fill(src, 0, 255);
cv::Mat dst;
src.convertTo(dst, depth2, 0.5, 1.0);
TEST_CYCLE()
{
src.convertTo(dst, depth2, 0.5, 1.0);
}
}
INSTANTIATE_TEST_CASE_P(MatOp, ConvertTo, testing::Combine(
ALL_DEVICES,
GPU_TYPICAL_MAT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F)),
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F))));
#endif

@ -1,74 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
///////////////////////////////////////////////////////////////
// HOG
GPU_PERF_TEST_1(HOG, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/hog/road.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
std::vector<cv::Rect> found_locations;
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations);
TEST_CYCLE()
{
hog.detectMultiScale(img, found_locations);
}
}
INSTANTIATE_TEST_CASE_P(ObjDetect, HOG, ALL_DEVICES);
///////////////////////////////////////////////////////////////
// HaarClassifier
GPU_PERF_TEST_1(HaarClassifier, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/haarcascade/group_1_640x480_VGA.pgm", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
std::vector<cv::Rect> rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, rects);
}
}
INSTANTIATE_TEST_CASE_P(ObjDetect, HaarClassifier, ALL_DEVICES);
//===================== LBP cascade ==========================//
GPU_PERF_TEST_1(LBPClassifier, cv::gpu::DeviceInfo)
{
cv::Mat img = readImage("gpu/haarcascade/group_1_640x480_VGA.pgm", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
std::vector<cv::Rect> rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, rects);
}
}
INSTANTIATE_TEST_CASE_P(ObjDetect, LBPClassifier, ALL_DEVICES);
#endif

@ -1,220 +0,0 @@
#include "perf_cpu_precomp.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;
void fill(Mat& m, double a, double b)
{
RNG rng(123456789);
rng.fill(m, RNG::UNIFORM, a, b);
}
void PrintTo(const CvtColorInfo& info, ostream* os)
{
static const char* str[] =
{
"BGR2BGRA",
"BGRA2BGR",
"BGR2RGBA",
"RGBA2BGR",
"BGR2RGB",
"BGRA2RGBA",
"BGR2GRAY",
"RGB2GRAY",
"GRAY2BGR",
"GRAY2BGRA",
"BGRA2GRAY",
"RGBA2GRAY",
"BGR2BGR565",
"RGB2BGR565",
"BGR5652BGR",
"BGR5652RGB",
"BGRA2BGR565",
"RGBA2BGR565",
"BGR5652BGRA",
"BGR5652RGBA",
"GRAY2BGR565",
"BGR5652GRAY",
"BGR2BGR555",
"RGB2BGR555",
"BGR5552BGR",
"BGR5552RGB",
"BGRA2BGR555",
"RGBA2BGR555",
"BGR5552BGRA",
"BGR5552RGBA",
"GRAY2BGR555",
"BGR5552GRAY",
"BGR2XYZ",
"RGB2XYZ",
"XYZ2BGR",
"XYZ2RGB",
"BGR2YCrCb",
"RGB2YCrCb",
"YCrCb2BGR",
"YCrCb2RGB",
"BGR2HSV",
"RGB2HSV",
"",
"",
"BGR2Lab",
"RGB2Lab",
"BayerBG2BGR",
"BayerGB2BGR",
"BayerRG2BGR",
"BayerGR2BGR",
"BGR2Luv",
"RGB2Luv",
"BGR2HLS",
"RGB2HLS",
"HSV2BGR",
"HSV2RGB",
"Lab2BGR",
"Lab2RGB",
"Luv2BGR",
"Luv2RGB",
"HLS2BGR",
"HLS2RGB",
"BayerBG2BGR_VNG",
"BayerGB2BGR_VNG",
"BayerRG2BGR_VNG",
"BayerGR2BGR_VNG",
"BGR2HSV_FULL",
"RGB2HSV_FULL",
"BGR2HLS_FULL",
"RGB2HLS_FULL",
"HSV2BGR_FULL",
"HSV2RGB_FULL",
"HLS2BGR_FULL",
"HLS2RGB_FULL",
"LBGR2Lab",
"LRGB2Lab",
"LBGR2Luv",
"LRGB2Luv",
"Lab2LBGR",
"Lab2LRGB",
"Luv2LBGR",
"Luv2LRGB",
"BGR2YUV",
"RGB2YUV",
"YUV2BGR",
"YUV2RGB",
"BayerBG2GRAY",
"BayerGB2GRAY",
"BayerRG2GRAY",
"BayerGR2GRAY",
//YUV 4:2:0 formats family
"YUV2RGB_NV12",
"YUV2BGR_NV12",
"YUV2RGB_NV21",
"YUV2BGR_NV21",
"YUV2RGBA_NV12",
"YUV2BGRA_NV12",
"YUV2RGBA_NV21",
"YUV2BGRA_NV21",
"YUV2RGB_YV12",
"YUV2BGR_YV12",
"YUV2RGB_IYUV",
"YUV2BGR_IYUV",
"YUV2RGBA_YV12",
"YUV2BGRA_YV12",
"YUV2RGBA_IYUV",
"YUV2BGRA_IYUV",
"YUV2GRAY_420",
//YUV 4:2:2 formats family
"YUV2RGB_UYVY",
"YUV2BGR_UYVY",
"YUV2RGB_VYUY",
"YUV2BGR_VYUY",
"YUV2RGBA_UYVY",
"YUV2BGRA_UYVY",
"YUV2RGBA_VYUY",
"YUV2BGRA_VYUY",
"YUV2RGB_YUY2",
"YUV2BGR_YUY2",
"YUV2RGB_YVYU",
"YUV2BGR_YVYU",
"YUV2RGBA_YUY2",
"YUV2BGRA_YUY2",
"YUV2RGBA_YVYU",
"YUV2BGRA_YVYU",
"YUV2GRAY_UYVY",
"YUV2GRAY_YUY2",
// alpha premultiplication
"RGBA2mRGBA",
"mRGBA2RGBA",
"COLORCVT_MAX"
};
*os << str[info.code];
}
void cv::gpu::PrintTo(const DeviceInfo& info, ostream* os)
{
*os << info.name();
}
Mat readImage(const string& fileName, int flags)
{
return imread(perf::TestBase::getDataPath(fileName), flags);
}
const vector<DeviceInfo>& devices()
{
static vector<DeviceInfo> devs;
static bool first = true;
if (first)
{
int deviceCount = getCudaEnabledDeviceCount();
devs.reserve(deviceCount);
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
if (info.isCompatible())
devs.push_back(info);
}
first = false;
}
return devs;
}

@ -1,77 +0,0 @@
#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__
#define __OPENCV_PERF_GPU_UTILITY_HPP__
void fill(cv::Mat& m, double a, double b);
using perf::MatType;
using perf::MatDepth;
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
struct CvtColorInfo
{
int scn;
int dcn;
int code;
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
};
void PrintTo(const CvtColorInfo& info, std::ostream* os);
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << " = " << testing::PrintToString(static_cast< type >(param)); \
}
IMPLEMENT_PARAM_CLASS(Channels, int)
namespace cv { namespace gpu
{
void PrintTo(const cv::gpu::DeviceInfo& info, std::ostream* os);
}}
#define GPU_PERF_TEST(name, ...) \
struct name : perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > \
{ \
public: \
name() {} \
protected: \
void PerfTestBody(); \
}; \
TEST_P(name, perf){ RunPerfTestBody(); } \
void name :: PerfTestBody()
#define GPU_PERF_TEST_1(name, param_type) \
struct name : perf::TestBaseWithParam< param_type > \
{ \
public: \
name() {} \
protected: \
void PerfTestBody(); \
}; \
TEST_P(name, perf){ RunPerfTestBody(); } \
void name :: PerfTestBody()
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::szSXGA, perf::sz1080p, cv::Size(1800, 1500))
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
const std::vector<cv::gpu::DeviceInfo>& devices();
#define ALL_DEVICES testing::ValuesIn(devices())
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__

@ -1,466 +0,0 @@
#include "perf_cpu_precomp.hpp"
#ifdef HAVE_CUDA
//////////////////////////////////////////////////////
// GoodFeaturesToTrack
IMPLEMENT_PARAM_CLASS(MinDistance, double)
GPU_PERF_TEST(GoodFeaturesToTrack, cv::gpu::DeviceInfo, MinDistance)
{
double minDistance = GET_PARAM(1);
cv::Mat image = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
cv::Mat corners;
cv::goodFeaturesToTrack(image, corners, 8000, 0.01, minDistance);
TEST_CYCLE()
{
cv::goodFeaturesToTrack(image, corners, 8000, 0.01, minDistance);
}
}
INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(
ALL_DEVICES,
testing::Values(MinDistance(0.0), MinDistance(3.0))));
//////////////////////////////////////////////////////
// PyrLKOpticalFlowSparse
IMPLEMENT_PARAM_CLASS(GraySource, bool)
IMPLEMENT_PARAM_CLASS(Points, int)
IMPLEMENT_PARAM_CLASS(WinSize, int)
IMPLEMENT_PARAM_CLASS(Levels, int)
IMPLEMENT_PARAM_CLASS(Iters, int)
GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, GraySource, Points, WinSize, Levels, Iters)
{
bool useGray = GET_PARAM(1);
int points = GET_PARAM(2);
int win_size = GET_PARAM(3);
int levels = GET_PARAM(4);
int iters = GET_PARAM(5);
cv::Mat frame0 = readImage("gpu/opticalflow/frame0.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage("gpu/opticalflow/frame1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
ASSERT_FALSE(frame1.empty());
cv::Mat gray_frame;
if (useGray)
gray_frame = frame0;
else
cv::cvtColor(frame0, gray_frame, cv::COLOR_BGR2GRAY);
cv::Mat pts;
cv::goodFeaturesToTrack(gray_frame, pts, points, 0.01, 0.0);
cv::Mat nextPts;
cv::Mat status;
cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, cv::noArray(),
cv::Size(win_size, win_size), levels - 1,
cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, iters, 0.01));
declare.time(20.0);
TEST_CYCLE()
{
cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, cv::noArray(),
cv::Size(win_size, win_size), levels - 1,
cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, iters, 0.01));
}
}
INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
ALL_DEVICES,
testing::Values(GraySource(true), GraySource(false)),
testing::Values(Points(1000), Points(2000), Points(4000), Points(8000)),
testing::Values(WinSize(9), WinSize(13), WinSize(17), WinSize(21)),
testing::Values(Levels(1), Levels(2), Levels(3)),
testing::Values(Iters(1), Iters(10), Iters(30))));
//////////////////////////////////////////////////////
// FarnebackOpticalFlowTest
GPU_PERF_TEST_1(FarnebackOpticalFlowTest, cv::gpu::DeviceInfo)
{
cv::Mat frame0 = readImage("gpu/opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
cv::Mat flow;
int numLevels = 5;
double pyrScale = 0.5;
int winSize = 13;
int numIters = 10;
int polyN = 5;
double polySigma = 1.1;
int flags = 0;
cv::calcOpticalFlowFarneback(frame0, frame1, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
declare.time(10);
TEST_CYCLE()
{
cv::calcOpticalFlowFarneback(frame0, frame1, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
}
}
INSTANTIATE_TEST_CASE_P(Video, FarnebackOpticalFlowTest, ALL_DEVICES);
//////////////////////////////////////////////////////
// FGDStatModel
namespace cv
{
template<> void Ptr<CvBGStatModel>::delete_obj()
{
cvReleaseBGStatModel(&obj);
}
}
GPU_PERF_TEST(FGDStatModel, cv::gpu::DeviceInfo, std::string)
{
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened());
cv::Mat frame;
cap >> frame;
ASSERT_FALSE(frame.empty());
IplImage ipl_frame = frame;
cv::Ptr<CvBGStatModel> model(cvCreateFGDStatModel(&ipl_frame));
declare.time(60);
for (int i = 0; i < 10; ++i)
{
cap >> frame;
ASSERT_FALSE(frame.empty());
ipl_frame = frame;
startTimer();
next();
cvUpdateBGStatModel(&ipl_frame, model);
stopTimer();
}
}
INSTANTIATE_TEST_CASE_P(Video, FGDStatModel, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi"))));
//////////////////////////////////////////////////////
// MOG
IMPLEMENT_PARAM_CLASS(LearningRate, double)
GPU_PERF_TEST(MOG, cv::gpu::DeviceInfo, std::string, Channels, LearningRate)
{
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
int cn = GET_PARAM(2);
double learningRate = GET_PARAM(3);
cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened());
cv::Mat frame;
cv::BackgroundSubtractorMOG mog;
cv::Mat foreground;
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
mog(frame, foreground, learningRate);
for (int i = 0; i < 10; ++i)
{
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
startTimer(); next();
mog(frame, foreground, learningRate);
stopTimer();
}
}
INSTANTIATE_TEST_CASE_P(Video, MOG, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
testing::Values(Channels(1), Channels(3)/*, Channels(4)*/),
testing::Values(LearningRate(0.0), LearningRate(0.01))));
//////////////////////////////////////////////////////
// MOG2
GPU_PERF_TEST(MOG2_update, cv::gpu::DeviceInfo, std::string, Channels)
{
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
int cn = GET_PARAM(2);
cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened());
cv::Mat frame;
cv::BackgroundSubtractorMOG2 mog2;
cv::Mat foreground;
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
mog2(frame, foreground);
for (int i = 0; i < 10; ++i)
{
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
startTimer(); next();
mog2(frame, foreground);
stopTimer();
}
}
INSTANTIATE_TEST_CASE_P(Video, MOG2_update, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
testing::Values(Channels(1), Channels(3)/*, Channels(4)*/)));
GPU_PERF_TEST(MOG2_getBackgroundImage, cv::gpu::DeviceInfo, std::string, Channels)
{
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
int cn = GET_PARAM(2);
cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened());
cv::Mat frame;
cv::BackgroundSubtractorMOG2 mog2;
cv::Mat foreground;
for (int i = 0; i < 10; ++i)
{
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
mog2(frame, foreground);
}
cv::Mat background;
mog2.getBackgroundImage(background);
TEST_CYCLE()
{
mog2.getBackgroundImage(background);
}
}
INSTANTIATE_TEST_CASE_P(Video, MOG2_getBackgroundImage, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
testing::Values(/*Channels(1),*/ Channels(3)/*, Channels(4)*/)));
//////////////////////////////////////////////////////
// GMG
IMPLEMENT_PARAM_CLASS(MaxFeatures, int)
GPU_PERF_TEST(GMG, cv::gpu::DeviceInfo, std::string, Channels, MaxFeatures)
{
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
int cn = GET_PARAM(2);
int maxFeatures = GET_PARAM(3);
cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened());
cv::Mat frame;
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
cv::Mat fgmask;
cv::Mat zeros(frame.size(), CV_8UC1, cv::Scalar::all(0));
cv::BackgroundSubtractorGMG gmg;
gmg.set("maxFeatures", maxFeatures);
gmg.initialize(frame.size(), 0.0, 255.0);
gmg(frame, fgmask);
for (int i = 0; i < 150; ++i)
{
cap >> frame;
if (frame.empty())
{
cap.open(inputFile);
cap >> frame;
}
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
startTimer(); next();
gmg(frame, fgmask);
stopTimer();
}
}
INSTANTIATE_TEST_CASE_P(Video, GMG, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
testing::Values(Channels(1), Channels(3), Channels(4)),
testing::Values(MaxFeatures(20), MaxFeatures(40), MaxFeatures(60))));
//////////////////////////////////////////////////////
// VideoWriter
#ifdef WIN32
GPU_PERF_TEST(VideoWriter, cv::gpu::DeviceInfo, std::string)
{
const double FPS = 25.0;
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
std::string outputFile = cv::tempfile(".avi");
cv::VideoCapture reader(inputFile);
ASSERT_TRUE( reader.isOpened() );
cv::VideoWriter writer;
cv::Mat frame;
declare.time(30);
for (int i = 0; i < 10; ++i)
{
reader >> frame;
ASSERT_FALSE(frame.empty());
if (!writer.isOpened())
writer.open(outputFile, CV_FOURCC('X', 'V', 'I', 'D'), FPS, frame.size());
startTimer(); next();
writer.write(frame);
stopTimer();
}
}
INSTANTIATE_TEST_CASE_P(Video, VideoWriter, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi"))));
#endif // WIN32
//////////////////////////////////////////////////////
// VideoReader
GPU_PERF_TEST(VideoReader, cv::gpu::DeviceInfo, std::string)
{
std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
cv::VideoCapture reader(inputFile);
ASSERT_TRUE( reader.isOpened() );
cv::Mat frame;
reader >> frame;
declare.time(20);
TEST_CYCLE_N(10)
{
reader >> frame;
}
}
INSTANTIATE_TEST_CASE_P(Video, VideoReader, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi"))));
#endif

@ -420,16 +420,16 @@ void cv::gpu::BFMatcher_GPU::matchConvert(const Mat& trainIdx, const Mat& imgIdx
const float* distance_ptr = distance.ptr<float>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
{
int trainIdx = *trainIdx_ptr;
int _trainIdx = *trainIdx_ptr;
if (trainIdx == -1)
if (_trainIdx == -1)
continue;
int imgIdx = *imgIdx_ptr;
int _imgIdx = *imgIdx_ptr;
float distance = *distance_ptr;
float _distance = *distance_ptr;
DMatch m(queryIdx, trainIdx, imgIdx, distance);
DMatch m(queryIdx, _trainIdx, _imgIdx, _distance);
matches.push_back(m);
}
@ -558,13 +558,13 @@ void cv::gpu::BFMatcher_GPU::knnMatchConvert(const Mat& trainIdx, const Mat& dis
for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr)
{
int trainIdx = *trainIdx_ptr;
int _trainIdx = *trainIdx_ptr;
if (trainIdx != -1)
if (_trainIdx != -1)
{
float distance = *distance_ptr;
float _distance = *distance_ptr;
DMatch m(queryIdx, trainIdx, 0, distance);
DMatch m(queryIdx, _trainIdx, 0, _distance);
curMatches.push_back(m);
}
@ -680,15 +680,15 @@ void cv::gpu::BFMatcher_GPU::knnMatch2Convert(const Mat& trainIdx, const Mat& im
for (int i = 0; i < 2; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
{
int trainIdx = *trainIdx_ptr;
int _trainIdx = *trainIdx_ptr;
if (trainIdx != -1)
if (_trainIdx != -1)
{
int imgIdx = *imgIdx_ptr;
int _imgIdx = *imgIdx_ptr;
float distance = *distance_ptr;
float _distance = *distance_ptr;
DMatch m(queryIdx, trainIdx, imgIdx, distance);
DMatch m(queryIdx, _trainIdx, _imgIdx, _distance);
curMatches.push_back(m);
}
@ -868,25 +868,25 @@ void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat&
const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
const float* distance_ptr = distance.ptr<float>(queryIdx);
const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
if (nMatches == 0)
if (nMatched == 0)
{
if (!compactResult)
matches.push_back(vector<DMatch>());
continue;
}
matches.push_back(vector<DMatch>(nMatches));
matches.push_back(vector<DMatch>(nMatched));
vector<DMatch>& curMatches = matches.back();
for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++distance_ptr)
for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++distance_ptr)
{
int trainIdx = *trainIdx_ptr;
int _trainIdx = *trainIdx_ptr;
float distance = *distance_ptr;
float _distance = *distance_ptr;
DMatch m(queryIdx, trainIdx, 0, distance);
DMatch m(queryIdx, _trainIdx, 0, _distance);
curMatches[i] = m;
}
@ -1009,9 +1009,9 @@ void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat&
const int* imgIdx_ptr = imgIdx.ptr<int>(queryIdx);
const float* distance_ptr = distance.ptr<float>(queryIdx);
const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
if (nMatches == 0)
if (nMatched == 0)
{
if (!compactResult)
matches.push_back(vector<DMatch>());
@ -1020,9 +1020,9 @@ void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat&
matches.push_back(vector<DMatch>());
vector<DMatch>& curMatches = matches.back();
curMatches.reserve(nMatches);
curMatches.reserve(nMatched);
for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
{
int _trainIdx = *trainIdx_ptr;
int _imgIdx = *imgIdx_ptr;

@ -56,14 +56,14 @@ void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat
#else
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace transform_points
namespace transform_points
{
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
}
namespace project_points
namespace project_points
{
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
}
@ -154,11 +154,11 @@ namespace
class TransformHypothesesGenerator
{
public:
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
const Mat& camera_mat_, int num_points_, int subset_size_,
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
const Mat& camera_mat_, int num_points_, int subset_size_,
Mat rot_matrices_, Mat transl_vectors_)
: object(&object_), image(&image_), dist_coef(&dist_coef_), camera_mat(&camera_mat_),
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
: object(&object_), image(&image_), dist_coef(&dist_coef_), camera_mat(&camera_mat_),
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
transl_vectors(transl_vectors_) {}
void operator()(const BlockedRange& range) const
@ -211,9 +211,10 @@ namespace
void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess,
int num_iters, float max_dist, int min_inlier_count,
int num_iters, float max_dist, int min_inlier_count,
vector<int>* inliers)
{
(void)min_inlier_count;
CV_Assert(object.rows == 1 && object.cols > 0 && object.type() == CV_32FC3);
CV_Assert(image.rows == 1 && image.cols > 0 && image.type() == CV_32FC2);
CV_Assert(object.cols == image.cols);
@ -236,7 +237,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
Mat transl_vectors(1, num_iters * 3, CV_32F);
// Generate set of hypotheses using small subsets of the input data
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
num_points, subset_size, rot_matrices, transl_vectors);
parallel_for(BlockedRange(0, num_iters), body);
@ -246,7 +247,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
GpuMat d_hypothesis_scores(1, num_iters, CV_32S);
solve_pnp_ransac::computeHypothesisScores(
num_iters, num_points, rot_matrices.ptr<float>(), transl_vectors.ptr<float3>(),
d_object.ptr<float3>(), d_image_normalized.ptr<float2>(), max_dist * max_dist,
d_object.ptr<float3>(), d_image_normalized.ptr<float2>(), max_dist * max_dist,
d_hypothesis_scores.ptr<int>());
// Find the best hypothesis index

@ -143,7 +143,7 @@ public:
}
unsigned int process(const GpuMat& image, GpuMat& objectsBuf, float scaleFactor, int minNeighbors,
bool findLargestObject, bool visualizeInPlace, cv::Size minSize, cv::Size maxObjectSize)
bool findLargestObject, bool visualizeInPlace, cv::Size minSize, cv::Size /*maxObjectSize*/)
{
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U);
@ -380,12 +380,12 @@ public:
LbpCascade(){}
virtual ~LbpCascade(){}
virtual unsigned int process(const GpuMat& image, GpuMat& objects, float scaleFactor, int groupThreshold, bool findLargestObject,
bool visualizeInPlace, cv::Size minObjectSize, cv::Size maxObjectSize)
virtual unsigned int process(const GpuMat& image, GpuMat& objects, float scaleFactor, int groupThreshold, bool /*findLargestObject*/,
bool /*visualizeInPlace*/, cv::Size minObjectSize, cv::Size maxObjectSize)
{
CV_Assert(scaleFactor > 1 && image.depth() == CV_8U);
const int defaultObjSearchNum = 100;
// const int defaultObjSearchNum = 100;
const float grouping_eps = 0.2f;
if( !objects.empty() && objects.depth() == CV_32S)

@ -316,7 +316,7 @@ namespace cv { namespace gpu { namespace device
}
}
changed = Emulation::sycthOr(changed);
changed = Emulation::syncthreadsOr(changed);
if (!changed)
break;
@ -474,7 +474,7 @@ namespace cv { namespace gpu { namespace device
}
}
}
} while (Emulation::sycthOr(changed));
} while (Emulation::syncthreadsOr(changed));
}
__global__ void flatten(const DevMem2D edges, DevMem2Di comps)

@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device
{
__shared__ int s_queues[4][32 * PIXELS_PER_THREAD];
__shared__ int s_qsize[4];
__shared__ int s_start[4];
__shared__ int s_globStart[4];
const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
@ -73,9 +73,10 @@ namespace cv { namespace gpu { namespace device
__syncthreads();
// fill the queue
const uchar* srcRow = src.ptr(y);
for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x)
{
if (src(y, xx))
if (srcRow[xx])
{
const unsigned int val = (y << 16) | xx;
const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1);
@ -89,36 +90,34 @@ namespace cv { namespace gpu { namespace device
if (threadIdx.x == 0 && threadIdx.y == 0)
{
// find how many items are stored in each list
int total_size = 0;
int totalSize = 0;
for (int i = 0; i < blockDim.y; ++i)
{
s_start[i] = total_size;
total_size += s_qsize[i];
s_globStart[i] = totalSize;
totalSize += s_qsize[i];
}
// calculate the offset in the global list
const int global_offset = atomicAdd(&g_counter, total_size);
const int globalOffset = atomicAdd(&g_counter, totalSize);
for (int i = 0; i < blockDim.y; ++i)
s_start[i] += global_offset;
s_globStart[i] += globalOffset;
}
__syncthreads();
// copy local queues to global queue
const int qsize = s_qsize[threadIdx.y];
for(int i = threadIdx.x; i < qsize; i += blockDim.x)
{
const unsigned int val = s_queues[threadIdx.y][i];
list[s_start[threadIdx.y] + i] = val;
}
int gidx = s_globStart[threadIdx.y] + threadIdx.x;
for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x)
list[gidx] = s_queues[threadIdx.y][i];
}
int buildPointList_gpu(DevMem2Db src, unsigned int* list)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 4);
const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y));
@ -130,10 +129,10 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
int total_count;
cudaSafeCall( cudaMemcpy(&total_count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return total_count;
return totalCount;
}
////////////////////////////////////////////////////////////////////////
@ -144,24 +143,26 @@ namespace cv { namespace gpu { namespace device
const int n = blockIdx.x;
const float ang = n * theta;
float sin_ang;
float cos_ang;
sincosf(ang, &sin_ang, &cos_ang);
float sinVal;
float cosVal;
sincosf(ang, &sinVal, &cosVal);
sinVal *= irho;
cosVal *= irho;
const float tabSin = sin_ang * irho;
const float tabCos = cos_ang * irho;
const int shift = (numrho - 1) / 2;
int* accumRow = accum.ptr(n + 1);
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
const unsigned int qvalue = list[i];
const unsigned int val = list[i];
const int x = (qvalue & 0x0000FFFF);
const int y = (qvalue >> 16) & 0x0000FFFF;
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = __float2int_rn(x * tabCos + y * tabSin);
r += (numrho - 1) / 2;
int r = __float2int_rn(x * cosVal + y * sinVal);
r += shift;
::atomicAdd(accum.ptr(n + 1) + r + 1, 1);
::atomicAdd(accumRow + r + 1, 1);
}
}
@ -177,30 +178,32 @@ namespace cv { namespace gpu { namespace device
const int n = blockIdx.x;
const float ang = n * theta;
float sin_ang;
float cos_ang;
sincosf(ang, &sin_ang, &cos_ang);
float sinVal;
float cosVal;
sincosf(ang, &sinVal, &cosVal);
sinVal *= irho;
cosVal *= irho;
const float tabSin = sin_ang * irho;
const float tabCos = cos_ang * irho;
const int shift = (numrho - 1) / 2;
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
const unsigned int qvalue = list[i];
const unsigned int val = list[i];
const int x = (qvalue & 0x0000FFFF);
const int y = (qvalue >> 16) & 0x0000FFFF;
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = __float2int_rn(x * tabCos + y * tabSin);
r += (numrho - 1) / 2;
int r = __float2int_rn(x * cosVal + y * sinVal);
r += shift;
Emulation::smem::atomicAdd(&smem[r + 1], 1);
}
__syncthreads();
for (int i = threadIdx.x; i < numrho; i += blockDim.x)
accum(n + 1, i) = smem[i];
int* accumRow = accum.ptr(n + 1);
for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
accumRow[i] = smem[i];
}
void linesAccum_gpu(const unsigned int* list, int count, DevMem2Di accum, float rho, float theta, size_t sharedMemPerBlock, bool has20)
@ -225,21 +228,21 @@ namespace cv { namespace gpu { namespace device
////////////////////////////////////////////////////////////////////////
// linesGetResult
__global__ void linesGetResult(const DevMem2Di accum, float2* out, int* votes, const int maxSize, const float threshold, const float theta, const float rho, const int numrho)
__global__ void linesGetResult(const DevMem2Di accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const float threshold, const int numrho)
{
__shared__ int smem[8][32];
int r = blockIdx.x * (blockDim.x - 2) + threadIdx.x;
int n = blockIdx.y * (blockDim.y - 2) + threadIdx.y;
const int x = blockIdx.x * (blockDim.x - 2) + threadIdx.x;
const int y = blockIdx.y * (blockDim.y - 2) + threadIdx.y;
if (r >= accum.cols || n >= accum.rows)
if (x >= accum.cols || y >= accum.rows)
return;
smem[threadIdx.y][threadIdx.x] = accum(n, r);
smem[threadIdx.y][threadIdx.x] = accum(y, x);
__syncthreads();
r -= 1;
n -= 1;
const int r = x - 1;
const int n = y - 1;
if (threadIdx.x == 0 || threadIdx.x == blockDim.x - 1 || threadIdx.y == 0 || threadIdx.y == blockDim.y - 1 || r >= accum.cols - 2 || n >= accum.rows - 2)
return;
@ -264,32 +267,32 @@ namespace cv { namespace gpu { namespace device
int linesGetResult_gpu(DevMem2Di accum, float2* out, int* votes, int maxSize, float rho, float theta, float threshold, bool doSort)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols, block.x - 2), divUp(accum.rows, block.y - 2));
linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, threshold, theta, rho, accum.cols - 2);
linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int total_count;
cudaSafeCall( cudaMemcpy(&total_count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
total_count = ::min(total_count, maxSize);
totalCount = ::min(totalCount, maxSize);
if (doSort && total_count > 0)
if (doSort && totalCount > 0)
{
thrust::device_ptr<float2> out_ptr(out);
thrust::device_ptr<int> votes_ptr(votes);
thrust::sort_by_key(votes_ptr, votes_ptr + total_count, out_ptr, thrust::greater<int>());
thrust::device_ptr<float2> outPtr(out);
thrust::device_ptr<int> votesPtr(votes);
thrust::sort_by_key(votesPtr, votesPtr + totalCount, outPtr, thrust::greater<int>());
}
return total_count;
return totalCount;
}
}
}}}

@ -0,0 +1,385 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/gpu/device/common.hpp"
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
// Utility function to extract unsigned chars from an unsigned integer
__device__ uchar4 int_to_uchar4(unsigned int in)
{
uchar4 bytes;
bytes.x = (in && 0x000000ff) >> 0;
bytes.y = (in && 0x0000ff00) >> 8;
bytes.z = (in && 0x00ff0000) >> 16;
bytes.w = (in && 0xff000000) >> 24;
return bytes;
}
__global__ void shfl_integral_horizontal(const PtrStep_<uint4> img, PtrStep_<uint4> integral)
{
#if __CUDA_ARCH__ >= 300
__shared__ int sums[128];
const int id = threadIdx.x;
const int lane_id = id % warpSize;
const int warp_id = id / warpSize;
const uint4 data = img(blockIdx.x, id);
const uchar4 a = int_to_uchar4(data.x);
const uchar4 b = int_to_uchar4(data.y);
const uchar4 c = int_to_uchar4(data.z);
const uchar4 d = int_to_uchar4(data.w);
int result[16];
result[0] = a.x;
result[1] = result[0] + a.y;
result[2] = result[1] + a.z;
result[3] = result[2] + a.w;
result[4] = result[3] + b.x;
result[5] = result[4] + b.y;
result[6] = result[5] + b.z;
result[7] = result[6] + b.w;
result[8] = result[7] + c.x;
result[9] = result[8] + c.y;
result[10] = result[9] + c.z;
result[11] = result[10] + c.w;
result[12] = result[11] + d.x;
result[13] = result[12] + d.y;
result[14] = result[13] + d.z;
result[15] = result[14] + d.w;
int sum = result[15];
// the prefix sum for each thread's 16 value is computed,
// now the final sums (result[15]) need to be shared
// with the other threads and add. To do this,
// the __shfl_up() instruction is used and a shuffle scan
// operation is performed to distribute the sums to the correct
// threads
#pragma unroll
for (int i = 1; i < 32; i *= 2)
{
const int n = __shfl_up(sum, i, 32);
if (lane_id >= i)
{
#pragma unroll
for (int i = 0; i < 16; ++i)
result[i] += n;
sum += n;
}
}
// Now the final sum for the warp must be shared
// between warps. This is done by each warp
// having a thread store to shared memory, then
// having some other warp load the values and
// compute a prefix sum, again by using __shfl_up.
// The results are uniformly added back to the warps.
// last thread in the warp holding sum of the warp
// places that in shared
if (threadIdx.x % warpSize == warpSize - 1)
sums[warp_id] = result[15];
__syncthreads();
if (warp_id == 0)
{
int warp_sum = sums[lane_id];
#pragma unroll
for (int i = 1; i <= 32; i *= 2)
{
const int n = __shfl_up(warp_sum, i, 32);
if (lane_id >= i)
warp_sum += n;
}
sums[lane_id] = warp_sum;
}
__syncthreads();
int blockSum = 0;
// fold in unused warp
if (warp_id > 0)
{
blockSum = sums[warp_id - 1];
#pragma unroll
for (int i = 0; i < 16; ++i)
result[i] += blockSum;
}
// assemble result
// Each thread has 16 values to write, which are
// now integer data (to avoid overflow). Instead of
// each thread writing consecutive uint4s, the
// approach shown here experiments using
// the shuffle command to reformat the data
// inside the registers so that each thread holds
// consecutive data to be written so larger contiguous
// segments can be assembled for writing.
/*
For example data that needs to be written as
GMEM[16] <- x0 x1 x2 x3 y0 y1 y2 y3 z0 z1 z2 z3 w0 w1 w2 w3
but is stored in registers (r0..r3), in four threads (0..3) as:
threadId 0 1 2 3
r0 x0 y0 z0 w0
r1 x1 y1 z1 w1
r2 x2 y2 z2 w2
r3 x3 y3 z3 w3
after apply __shfl_xor operations to move data between registers r1..r3:
threadId 00 01 10 11
x0 y0 z0 w0
xor(01)->y1 x1 w1 z1
xor(10)->z2 w2 x2 y2
xor(11)->w3 z3 y3 x3
and now x0..x3, and z0..z3 can be written out in order by all threads.
In the current code, each register above is actually representing
four integers to be written as uint4's to GMEM.
*/
result[4] = __shfl_xor(result[4] , 1, 32);
result[5] = __shfl_xor(result[5] , 1, 32);
result[6] = __shfl_xor(result[6] , 1, 32);
result[7] = __shfl_xor(result[7] , 1, 32);
result[8] = __shfl_xor(result[8] , 2, 32);
result[9] = __shfl_xor(result[9] , 2, 32);
result[10] = __shfl_xor(result[10], 2, 32);
result[11] = __shfl_xor(result[11], 2, 32);
result[12] = __shfl_xor(result[12], 3, 32);
result[13] = __shfl_xor(result[13], 3, 32);
result[14] = __shfl_xor(result[14], 3, 32);
result[15] = __shfl_xor(result[15], 3, 32);
uint4* integral_row = integral.ptr(blockIdx.x);
uint4 output;
///////
if (threadIdx.x % 4 == 0)
output = make_uint4(result[0], result[1], result[2], result[3]);
if (threadIdx.x % 4 == 1)
output = make_uint4(result[4], result[5], result[6], result[7]);
if (threadIdx.x % 4 == 2)
output = make_uint4(result[8], result[9], result[10], result[11]);
if (threadIdx.x % 4 == 3)
output = make_uint4(result[12], result[13], result[14], result[15]);
integral_row[threadIdx.x % 4 + (threadIdx.x / 4) * 16] = output;
///////
if (threadIdx.x % 4 == 2)
output = make_uint4(result[0], result[1], result[2], result[3]);
if (threadIdx.x % 4 == 3)
output = make_uint4(result[4], result[5], result[6], result[7]);
if (threadIdx.x % 4 == 0)
output = make_uint4(result[8], result[9], result[10], result[11]);
if (threadIdx.x % 4 == 1)
output = make_uint4(result[12], result[13], result[14], result[15]);
integral_row[(threadIdx.x + 2) % 4 + (threadIdx.x / 4) * 16 + 8] = output;
// continuning from the above example,
// this use of __shfl_xor() places the y0..y3 and w0..w3 data
// in order.
#pragma unroll
for (int i = 0; i < 16; ++i)
result[i] = __shfl_xor(result[i], 1, 32);
if (threadIdx.x % 4 == 0)
output = make_uint4(result[0], result[1], result[2], result[3]);
if (threadIdx.x % 4 == 1)
output = make_uint4(result[4], result[5], result[6], result[7]);
if (threadIdx.x % 4 == 2)
output = make_uint4(result[8], result[9], result[10], result[11]);
if (threadIdx.x % 4 == 3)
output = make_uint4(result[12], result[13], result[14], result[15]);
integral_row[threadIdx.x % 4 + (threadIdx.x / 4) * 16 + 4] = output;
///////
if (threadIdx.x % 4 == 2)
output = make_uint4(result[0], result[1], result[2], result[3]);
if (threadIdx.x % 4 == 3)
output = make_uint4(result[4], result[5], result[6], result[7]);
if (threadIdx.x % 4 == 0)
output = make_uint4(result[8], result[9], result[10], result[11]);
if (threadIdx.x % 4 == 1)
output = make_uint4(result[12], result[13], result[14], result[15]);
integral_row[(threadIdx.x + 2) % 4 + (threadIdx.x / 4) * 16 + 12] = output;
#endif
}
// This kernel computes columnwise prefix sums. When the data input is
// the row sums from above, this completes the integral image.
// The approach here is to have each block compute a local set of sums.
// First , the data covered by the block is loaded into shared memory,
// then instead of performing a sum in shared memory using __syncthreads
// between stages, the data is reformatted so that the necessary sums
// occur inside warps and the shuffle scan operation is used.
// The final set of sums from the block is then propgated, with the block
// computing "down" the image and adding the running sum to the local
// block sums.
__global__ void shfl_integral_vertical(DevMem2D_<unsigned int> integral)
{
#if __CUDA_ARCH__ >= 300
__shared__ unsigned int sums[32][9];
const int tidx = blockIdx.x * blockDim.x + threadIdx.x;
const int lane_id = tidx % 8;
if (tidx >= integral.cols)
return;
sums[threadIdx.x][threadIdx.y] = 0;
__syncthreads();
unsigned int stepSum = 0;
for (int y = threadIdx.y; y < integral.rows; y += blockDim.y)
{
unsigned int* p = integral.ptr(y) + tidx;
unsigned int sum = *p;
sums[threadIdx.x][threadIdx.y] = sum;
__syncthreads();
// place into SMEM
// shfl scan reduce the SMEM, reformating so the column
// sums are computed in a warp
// then read out properly
const int j = threadIdx.x % 8;
const int k = threadIdx.x / 8 + threadIdx.y * 4;
int partial_sum = sums[k][j];
for (int i = 1; i <= 8; i *= 2)
{
int n = __shfl_up(partial_sum, i, 32);
if (lane_id >= i)
partial_sum += n;
}
sums[k][j] = partial_sum;
__syncthreads();
if (threadIdx.y > 0)
sum += sums[threadIdx.x][threadIdx.y - 1];
sum += stepSum;
stepSum += sums[threadIdx.x][blockDim.y - 1];
__syncthreads();
*p = sum;
}
#endif
}
void shfl_integral_gpu(DevMem2Db img, DevMem2D_<unsigned int> integral, cudaStream_t stream)
{
{
// each thread handles 16 values, use 1 block/row
const int block = img.cols / 16;
// launch 1 block / row
const int grid = img.rows;
cudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
shfl_integral_horizontal<<<grid, block, 0, stream>>>((DevMem2D_<uint4>) img, (DevMem2D_<uint4>) integral);
cudaSafeCall( cudaGetLastError() );
}
{
const dim3 block(32, 8);
const dim3 grid(divUp(integral.cols, block.x), 1);
shfl_integral_vertical<<<grid, block, 0, stream>>>(integral);
cudaSafeCall( cudaGetLastError() );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}}}

@ -279,7 +279,7 @@ namespace cv { namespace gpu { namespace device
rect.z = __float2int_rn(windowW * scale);
rect.w = __float2int_rn(windowH * scale);
int res = Emulation::smem::atomicInc(classified, (unsigned int)objects.cols);
int res = atomicInc(classified, (unsigned int)objects.cols);
objects(0, res) = rect;
}
}

@ -315,7 +315,7 @@ void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, vect
double scale = 1.;
int levels = 0;
for (levels = 0; levels < conf_out.size(); levels++)
for (levels = 0; levels < (int)conf_out.size(); levels++)
{
scale = conf_out[levels].scale;
level_scale.push_back(scale);
@ -332,8 +332,8 @@ void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, vect
for (size_t i = 0; i < level_scale.size(); i++)
{
double scale = level_scale[i];
Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
double _scale = level_scale[i];
Size sz(cvRound(img.cols / _scale), cvRound(img.rows / _scale));
GpuMat smaller_img;
if (sz == img.size())

@ -57,11 +57,27 @@ namespace cv { namespace gpu { namespace device
namespace hough
{
int buildPointList_gpu(DevMem2Db src, unsigned int* list);
void linesAccum_gpu(const unsigned int* list, int count, DevMem2Di accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
int linesGetResult_gpu(DevMem2Di accum, float2* out, int* votes, int maxSize, float rho, float theta, float threshold, bool doSort);
}
}}}
//////////////////////////////////////////////////////////
// HoughLines
void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines)
{
GpuMat accum, buf;
HoughLines(src, lines, accum, buf, rho, theta, threshold, doSort, maxLines);
}
void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, GpuMat& accum, GpuMat& buf, float rho, float theta, int threshold, bool doSort, int maxLines)
{
HoughLinesTransform(src, accum, buf, rho, theta);
HoughLinesGet(accum, lines, rho, theta, threshold, doSort, maxLines);
}
void cv::gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf, float rho, float theta)
{
using namespace cv::gpu::device::hough;
@ -80,23 +96,23 @@ void cv::gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf,
CV_Assert(numangle > 0 && numrho > 0);
ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum);
accum.setTo(cv::Scalar::all(0));
accum.setTo(Scalar::all(0));
cv::gpu::DeviceInfo devInfo;
DeviceInfo devInfo;
if (count > 0)
linesAccum_gpu(buf.ptr<unsigned int>(), count, accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(cv::gpu::FEATURE_SET_COMPUTE_20));
linesAccum_gpu(buf.ptr<unsigned int>(), count, accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
}
void cv::gpu::HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines)
{
using namespace cv::gpu::device;
using namespace cv::gpu::device::hough;
CV_Assert(accum.type() == CV_32SC1);
ensureSizeIsEnough(2, maxLines, CV_32FC2, lines);
int count = hough::linesGetResult_gpu(accum, lines.ptr<float2>(0), lines.ptr<int>(1), maxLines, rho, theta, threshold, doSort);
int count = linesGetResult_gpu(accum, lines.ptr<float2>(0), lines.ptr<int>(1), maxLines, rho, theta, threshold, doSort);
if (count > 0)
lines.cols = count;
@ -104,18 +120,6 @@ void cv::gpu::HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float
lines.release();
}
void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines)
{
cv::gpu::GpuMat accum, buf;
HoughLines(src, lines, accum, buf, rho, theta, threshold, doSort, maxLines);
}
void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, GpuMat& accum, GpuMat& buf, float rho, float theta, int threshold, bool doSort, int maxLines)
{
HoughLinesTransform(src, accum, buf, rho, theta);
HoughLinesGet(accum, lines, rho, theta, threshold, doSort, maxLines);
}
void cv::gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines_, OutputArray h_votes_)
{
if (d_lines.empty())
@ -129,14 +133,14 @@ void cv::gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines_, Ou
CV_Assert(d_lines.rows == 2 && d_lines.type() == CV_32FC2);
h_lines_.create(1, d_lines.cols, CV_32FC2);
cv::Mat h_lines = h_lines_.getMat();
Mat h_lines = h_lines_.getMat();
d_lines.row(0).download(h_lines);
if (h_votes_.needed())
{
h_votes_.create(1, d_lines.cols, CV_32SC1);
cv::Mat h_votes = h_votes_.getMat();
cv::gpu::GpuMat d_votes(1, d_lines.cols, CV_32SC1, const_cast<int*>(d_lines.ptr<int>(1)));
Mat h_votes = h_votes_.getMat();
GpuMat d_votes(1, d_lines.cols, CV_32SC1, const_cast<int*>(d_lines.ptr<int>(1)));
d_votes.download(h_votes);
}
}

@ -223,7 +223,7 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyz, const Mat& Q,
using namespace cv::gpu::device::imgproc;
typedef void (*func_t)(const DevMem2Db disp, DevMem2Db xyz, const float* q, cudaStream_t stream);
static const func_t funcs[2][4] =
static const func_t funcs[2][4] =
{
{reprojectImageTo3D_gpu<uchar, float3>, 0, 0, reprojectImageTo3D_gpu<short, float3>},
{reprojectImageTo3D_gpu<uchar, float4>, 0, 0, reprojectImageTo3D_gpu<short, float4>}
@ -261,6 +261,12 @@ namespace
}
}
#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__ > 4
typedef Npp32s __attribute__((__may_alias__)) Npp32s_a;
#else
typedef Npp32s Npp32s_a;
#endif
void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value, Stream& s)
{
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
@ -308,7 +314,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
case CV_32FC1:
{
Npp32f val = saturate_cast<Npp32f>(value[0]);
Npp32s nVal = *(reinterpret_cast<Npp32s*>(&val));
Npp32s nVal = *(reinterpret_cast<Npp32s_a*>(&val));
nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
break;
@ -527,32 +533,86 @@ void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s)
integralBuffered(src, sum, buffer, s);
}
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
void shfl_integral_gpu(DevMem2Db img, DevMem2D_<unsigned int> integral, cudaStream_t stream);
}
}}}
void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s)
{
CV_Assert(src.type() == CV_8UC1);
if (sum.cols != src.cols + 1 && sum.rows != src.rows + 1)
sum.create(src.rows + 1, src.cols + 1, CV_32S);
NcvSize32u roiSize;
roiSize.width = src.cols;
roiSize.height = src.rows;
cudaStream_t stream = StreamAccessor::getStream(s);
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
DeviceInfo info;
Ncv32u bufSize;
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
if (info.supports(WARP_SHUFFLE_FUNCTIONS))
{
GpuMat src16;
cudaStream_t stream = StreamAccessor::getStream(s);
if (src.cols % 16 == 0)
src16 = src;
else
{
ensureSizeIsEnough(src.rows, ((src.cols + 15) / 16) * 16, src.type(), buffer);
NppStStreamHandler h(stream);
GpuMat inner = buffer(Rect(0, 0, src.cols, src.rows));
ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step),
sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
if (s)
{
s.enqueueMemSet(buffer, Scalar::all(0));
s.enqueueCopy(src, inner);
}
else
{
buffer.setTo(Scalar::all(0));
src.copyTo(inner);
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
src16 = buffer;
}
sum.create(src16.rows + 1, src16.cols + 1, CV_32SC1);
if (s)
s.enqueueMemSet(sum, Scalar::all(0));
else
sum.setTo(Scalar::all(0));
GpuMat inner = sum(Rect(1, 1, src16.cols, src16.rows));
cv::gpu::device::imgproc::shfl_integral_gpu(src16, inner, stream);
if (src16.cols != src.cols)
sum = sum(Rect(0, 0, src.cols + 1, src.rows + 1));
}
else
{
sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
NcvSize32u roiSize;
roiSize.width = src.cols;
roiSize.height = src.rows;
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
Ncv32u bufSize;
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
NppStStreamHandler h(stream);
ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step),
sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
//////////////////////////////////////////////////////////////////////////////
@ -1334,7 +1394,7 @@ Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size /*templ_size
int width = (result_size.width + 2) / 3;
int height = (result_size.height + 2) / 3;
width = std::min(width, result_size.width);
height = std::min(height, result_size.height);
height = std::min(height, result_size.height);
return Size(width, height);
}
@ -1374,7 +1434,7 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
cufftHandle planR2C, planC2R;
cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));
cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));
cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));
cufftSafeCall( cufftSetStream(planR2C, StreamAccessor::getStream(stream)) );
cufftSafeCall( cufftSetStream(planC2R, StreamAccessor::getStream(stream)) );

@ -52,9 +52,9 @@ void cv::gpu::matchTemplate(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&)
#else
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace match_template
namespace match_template
{
void matchTemplateNaive_CCORR_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
void matchTemplateNaive_CCORR_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
@ -71,47 +71,47 @@ namespace cv { namespace gpu { namespace device
void matchTemplatePrepared_CCOFF_8U(int w, int h, const DevMem2D_<unsigned int> image_sum, unsigned int templ_sum, DevMem2Df result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_8UC2(
int w, int h,
const DevMem2D_<unsigned int> image_sum_r,
const DevMem2D_<unsigned int> image_sum_g,
const DevMem2D_<unsigned int> image_sum_r,
const DevMem2D_<unsigned int> image_sum_g,
unsigned int templ_sum_r,
unsigned int templ_sum_g,
unsigned int templ_sum_g,
DevMem2Df result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_8UC3(
int w, int h,
const DevMem2D_<unsigned int> image_sum_r,
int w, int h,
const DevMem2D_<unsigned int> image_sum_r,
const DevMem2D_<unsigned int> image_sum_g,
const DevMem2D_<unsigned int> image_sum_b,
unsigned int templ_sum_r,
unsigned int templ_sum_g,
unsigned int templ_sum_b,
unsigned int templ_sum_r,
unsigned int templ_sum_g,
unsigned int templ_sum_b,
DevMem2Df result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_8UC4(
int w, int h,
const DevMem2D_<unsigned int> image_sum_r,
int w, int h,
const DevMem2D_<unsigned int> image_sum_r,
const DevMem2D_<unsigned int> image_sum_g,
const DevMem2D_<unsigned int> image_sum_b,
const DevMem2D_<unsigned int> image_sum_a,
unsigned int templ_sum_r,
unsigned int templ_sum_g,
unsigned int templ_sum_b,
unsigned int templ_sum_a,
unsigned int templ_sum_r,
unsigned int templ_sum_g,
unsigned int templ_sum_b,
unsigned int templ_sum_a,
DevMem2Df result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_NORMED_8U(
int w, int h, const DevMem2D_<unsigned int> image_sum,
int w, int h, const DevMem2D_<unsigned int> image_sum,
const DevMem2D_<unsigned long long> image_sqsum,
unsigned int templ_sum, unsigned long long templ_sqsum,
DevMem2Df result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
int w, int h,
int w, int h,
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
DevMem2Df result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
int w, int h,
int w, int h,
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
const DevMem2D_<unsigned int> image_sum_b, const DevMem2D_<unsigned long long> image_sqsum_b,
@ -120,7 +120,7 @@ namespace cv { namespace gpu { namespace device
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
DevMem2Df result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
int w, int h,
int w, int h,
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
const DevMem2D_<unsigned int> image_sum_b, const DevMem2D_<unsigned long long> image_sqsum_b,
@ -131,7 +131,7 @@ namespace cv { namespace gpu { namespace device
unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
DevMem2Df result, cudaStream_t stream);
void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
unsigned long long templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream);
void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cudaStream_t stream);
@ -140,17 +140,17 @@ namespace cv { namespace gpu { namespace device
using namespace ::cv::gpu::device::match_template;
namespace
namespace
{
// Evaluates optimal template's area threshold. If
// template's area is less than the threshold, we use naive match
// Evaluates optimal template's area threshold. If
// template's area is less than the threshold, we use naive match
// template version, otherwise FFT-based (if available)
int getTemplateThreshold(int method, int depth)
{
switch (method)
{
case CV_TM_CCORR:
case CV_TM_CCORR:
if (depth == CV_32F) return 250;
if (depth == CV_8U) return 300;
break;
@ -162,10 +162,10 @@ namespace
return 0;
}
void matchTemplate_CCORR_32F(
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
{
{
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
if (templ.size().area() < getTemplateThreshold(CV_TM_CCORR, CV_32F))
{
@ -223,10 +223,11 @@ namespace
normalize_8U(templ.cols, templ.rows, buf.image_sqsums[0], templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
}
void matchTemplate_SQDIFF_32F(
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
{
(void)buf;
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
matchTemplateNaive_SQDIFF_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
}
@ -362,7 +363,7 @@ namespace
{
case 2:
matchTemplatePrepared_CCOFF_NORMED_8UC2(
templ.cols, templ.rows,
templ.cols, templ.rows,
buf.image_sums[0], buf.image_sqsums[0],
buf.image_sums[1], buf.image_sqsums[1],
(unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
@ -371,7 +372,7 @@ namespace
break;
case 3:
matchTemplatePrepared_CCOFF_NORMED_8UC3(
templ.cols, templ.rows,
templ.cols, templ.rows,
buf.image_sums[0], buf.image_sqsums[0],
buf.image_sums[1], buf.image_sqsums[1],
buf.image_sums[2], buf.image_sqsums[2],
@ -382,7 +383,7 @@ namespace
break;
case 4:
matchTemplatePrepared_CCOFF_NORMED_8UC4(
templ.cols, templ.rows,
templ.cols, templ.rows,
buf.image_sums[0], buf.image_sqsums[0],
buf.image_sums[1], buf.image_sqsums[1],
buf.image_sums[2], buf.image_sqsums[2],
@ -391,7 +392,7 @@ namespace
(unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
(unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2],
(unsigned int)templ_sum[3], (unsigned long long)templ_sqsum[3],
result, StreamAccessor::getStream(stream));
result, StreamAccessor::getStream(stream));
break;
default:
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");

@ -67,7 +67,11 @@
// Guaranteed size cross-platform classifier structures
//
//==============================================================================
#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__ > 4
typedef Ncv32f __attribute__((__may_alias__)) Ncv32f_a;
#else
typedef Ncv32f Ncv32f_a;
#endif
struct HaarFeature64
{
@ -87,7 +91,7 @@ struct HaarFeature64
__host__ NCVStatus setWeight(Ncv32f weight)
{
((Ncv32f*)&(this->_ui2.y))[0] = weight;
((Ncv32f_a*)&(this->_ui2.y))[0] = weight;
return NCV_SUCCESS;
}
@ -102,7 +106,7 @@ struct HaarFeature64
__device__ __host__ Ncv32f getWeight(void)
{
return *(Ncv32f*)(&this->_ui2.y);
return *(Ncv32f_a*)(&this->_ui2.y);
}
};
@ -168,14 +172,13 @@ public:
}
};
struct HaarClassifierNodeDescriptor32
{
uint1 _ui1;
__host__ NCVStatus create(Ncv32f leafValue)
{
*(Ncv32f *)&this->_ui1 = leafValue;
*(Ncv32f_a *)&this->_ui1 = leafValue;
return NCV_SUCCESS;
}
@ -187,7 +190,7 @@ struct HaarClassifierNodeDescriptor32
__host__ Ncv32f getLeafValueHost(void)
{
return *(Ncv32f *)&this->_ui1.x;
return *(Ncv32f_a *)&this->_ui1.x;
}
#ifdef __CUDACC__
@ -203,6 +206,11 @@ struct HaarClassifierNodeDescriptor32
}
};
#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__ > 4
typedef Ncv32u __attribute__((__may_alias__)) Ncv32u_a;
#else
typedef Ncv32u Ncv32u_a;
#endif
struct HaarClassifierNode128
{
@ -216,19 +224,19 @@ struct HaarClassifierNode128
__host__ NCVStatus setThreshold(Ncv32f t)
{
this->_ui4.y = *(Ncv32u *)&t;
this->_ui4.y = *(Ncv32u_a *)&t;
return NCV_SUCCESS;
}
__host__ NCVStatus setLeftNodeDesc(HaarClassifierNodeDescriptor32 nl)
{
this->_ui4.z = *(Ncv32u *)&nl;
this->_ui4.z = *(Ncv32u_a *)&nl;
return NCV_SUCCESS;
}
__host__ NCVStatus setRightNodeDesc(HaarClassifierNodeDescriptor32 nr)
{
this->_ui4.w = *(Ncv32u *)&nr;
this->_ui4.w = *(Ncv32u_a *)&nr;
return NCV_SUCCESS;
}
@ -239,7 +247,7 @@ struct HaarClassifierNode128
__host__ __device__ Ncv32f getThreshold(void)
{
return *(Ncv32f*)&this->_ui4.y;
return *(Ncv32f_a*)&this->_ui4.y;
}
__host__ __device__ HaarClassifierNodeDescriptor32 getLeftNodeDesc(void)
@ -264,7 +272,7 @@ struct HaarStage64
__host__ NCVStatus setStageThreshold(Ncv32f t)
{
this->_ui2.x = *(Ncv32u *)&t;
this->_ui2.x = *(Ncv32u_a *)&t;
return NCV_SUCCESS;
}
@ -290,7 +298,7 @@ struct HaarStage64
__host__ __device__ Ncv32f getStageThreshold(void)
{
return *(Ncv32f*)&this->_ui2.x;
return *(Ncv32f_a*)&this->_ui2.x;
}
__host__ __device__ Ncv32u getStartClassifierRootNodeOffset(void)

@ -1423,7 +1423,7 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
(d_hierSums.ptr() + partSumOffsets[i],
partSumNums[i], NULL,
d_hierSums.ptr() + partSumOffsets[i+1],
NULL);
0);
}
else
{
@ -1433,7 +1433,7 @@ NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,
(d_hierSums.ptr() + partSumOffsets[i],
partSumNums[i], NULL,
NULL,
NULL);
0);
}
ncvAssertCUDALastErrorReturn(NPPST_CUDA_KERNEL_EXECUTION_ERROR);
@ -1557,16 +1557,21 @@ NCVStatus nppsStCompact_32s(Ncv32s *d_src, Ncv32u srcLen,
}
#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__ > 4
typedef Ncv32u __attribute__((__may_alias__)) Ncv32u_a;
#else
typedef Ncv32u Ncv32u_a;
#endif
NCVStatus nppsStCompact_32f(Ncv32f *d_src, Ncv32u srcLen,
Ncv32f *d_dst, Ncv32u *p_dstLen,
Ncv32f elemRemove, Ncv8u *pBuffer,
Ncv32u bufSize, cudaDeviceProp &devProp)
{
return nppsStCompact_32u((Ncv32u *)d_src, srcLen, (Ncv32u *)d_dst, p_dstLen,
*(Ncv32u *)&elemRemove, pBuffer, bufSize, devProp);
*(Ncv32u_a *)&elemRemove, pBuffer, bufSize, devProp);
}
NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,
Ncv32u *h_dst, Ncv32u *dstLen, Ncv32u elemRemove)
{
@ -1602,17 +1607,16 @@ NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,
NCVStatus nppsStCompact_32s_host(Ncv32s *h_src, Ncv32u srcLen,
Ncv32s *h_dst, Ncv32u *dstLen, Ncv32s elemRemove)
{
return nppsStCompact_32u_host((Ncv32u *)h_src, srcLen, (Ncv32u *)h_dst, dstLen, *(Ncv32u *)&elemRemove);
return nppsStCompact_32u_host((Ncv32u *)h_src, srcLen, (Ncv32u *)h_dst, dstLen, *(Ncv32u_a *)&elemRemove);
}
NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen,
Ncv32f *h_dst, Ncv32u *dstLen, Ncv32f elemRemove)
{
return nppsStCompact_32u_host((Ncv32u *)h_src, srcLen, (Ncv32u *)h_dst, dstLen, *(Ncv32u *)&elemRemove);
return nppsStCompact_32u_host((Ncv32u *)h_src, srcLen, (Ncv32u *)h_dst, dstLen, *(Ncv32u_a *)&elemRemove);
}
//==============================================================================
//
// Filter.cu

@ -51,11 +51,11 @@ namespace cv { namespace gpu { namespace device
struct Emulation
{
static __device__ __forceinline__ int sycthOr(int pred)
static __device__ __forceinline__ int syncthreadsOr(int pred)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
// just campilation stab
return false;
return 0;
#else
return __syncthreads_or(pred);
#endif

@ -119,7 +119,6 @@ namespace
int depth = src.depth();
int num_channels = src.channels();
Size size = src.size();
if (depth == CV_64F)
{

@ -49,36 +49,36 @@ void cv::gpu::detail::VideoDecoder::create(const VideoReader_GPU::FormatInfo& vi
{
release();
cudaVideoCodec codec = static_cast<cudaVideoCodec>(videoFormat.codec);
cudaVideoChromaFormat chromaFormat = static_cast<cudaVideoChromaFormat>(videoFormat.chromaFormat);
cudaVideoCodec _codec = static_cast<cudaVideoCodec>(videoFormat.codec);
cudaVideoChromaFormat _chromaFormat = static_cast<cudaVideoChromaFormat>(videoFormat.chromaFormat);
cudaVideoCreateFlags videoCreateFlags = (codec == cudaVideoCodec_JPEG || codec == cudaVideoCodec_MPEG2) ?
cudaVideoCreateFlags videoCreateFlags = (_codec == cudaVideoCodec_JPEG || _codec == cudaVideoCodec_MPEG2) ?
cudaVideoCreate_PreferCUDA :
cudaVideoCreate_PreferCUVID;
// Validate video format. These are the currently supported formats via NVCUVID
CV_Assert(cudaVideoCodec_MPEG1 == codec ||
cudaVideoCodec_MPEG2 == codec ||
cudaVideoCodec_MPEG4 == codec ||
cudaVideoCodec_VC1 == codec ||
cudaVideoCodec_H264 == codec ||
cudaVideoCodec_JPEG == codec ||
cudaVideoCodec_YUV420== codec ||
cudaVideoCodec_YV12 == codec ||
cudaVideoCodec_NV12 == codec ||
cudaVideoCodec_YUYV == codec ||
cudaVideoCodec_UYVY == codec );
CV_Assert(cudaVideoChromaFormat_Monochrome == chromaFormat ||
cudaVideoChromaFormat_420 == chromaFormat ||
cudaVideoChromaFormat_422 == chromaFormat ||
cudaVideoChromaFormat_444 == chromaFormat);
CV_Assert(cudaVideoCodec_MPEG1 == _codec ||
cudaVideoCodec_MPEG2 == _codec ||
cudaVideoCodec_MPEG4 == _codec ||
cudaVideoCodec_VC1 == _codec ||
cudaVideoCodec_H264 == _codec ||
cudaVideoCodec_JPEG == _codec ||
cudaVideoCodec_YUV420== _codec ||
cudaVideoCodec_YV12 == _codec ||
cudaVideoCodec_NV12 == _codec ||
cudaVideoCodec_YUYV == _codec ||
cudaVideoCodec_UYVY == _codec );
CV_Assert(cudaVideoChromaFormat_Monochrome == _chromaFormat ||
cudaVideoChromaFormat_420 == _chromaFormat ||
cudaVideoChromaFormat_422 == _chromaFormat ||
cudaVideoChromaFormat_444 == _chromaFormat);
// Fill the decoder-create-info struct from the given video-format struct.
std::memset(&createInfo_, 0, sizeof(CUVIDDECODECREATEINFO));
// Create video decoder
createInfo_.CodecType = codec;
createInfo_.CodecType = _codec;
createInfo_.ulWidth = videoFormat.width;
createInfo_.ulHeight = videoFormat.height;
createInfo_.ulNumDecodeSurfaces = FrameQueue::MaximumSize;
@ -87,7 +87,7 @@ void cv::gpu::detail::VideoDecoder::create(const VideoReader_GPU::FormatInfo& vi
while (createInfo_.ulNumDecodeSurfaces * videoFormat.width * videoFormat.height > 16 * 1024 * 1024)
createInfo_.ulNumDecodeSurfaces--;
createInfo_.ChromaFormat = chromaFormat;
createInfo_.ChromaFormat = _chromaFormat;
createInfo_.OutputFormat = cudaVideoSurfaceFormat_NV12;
createInfo_.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;

@ -39,7 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
@ -49,87 +49,128 @@ using namespace cv::gpu;
using namespace cvtest;
using namespace testing;
void print_info()
void printOsInfo()
{
printf("\n");
#if defined _WIN32
# if defined _WIN64
puts("OS: Windows 64");
cout << "OS: Windows x64 \n" << endl;
# else
puts("OS: Windows 32");
cout << "OS: Windows x32 \n" << endl;
# endif
#elif defined linux
# if defined _LP64
puts("OS: Linux 64");
cout << "OS: Linux x64 \n" << endl;
# else
puts("OS: Linux 32");
cout << "OS: Linux x32 \n" << endl;
# endif
#elif defined __APPLE__
# if defined _LP64
puts("OS: Apple 64");
cout << "OS: Apple x64 \n" << endl;
# else
puts("OS: Apple 32");
cout << "OS: Apple x32 \n" << endl;
# endif
#endif
}
int deviceCount = getCudaEnabledDeviceCount();
void printCudaInfo()
{
#ifndef HAVE_CUDA
cout << "OpenCV was built without CUDA support \n" << endl;
#else
int driver;
cudaDriverGetVersion(&driver);
printf("CUDA Driver version: %d\n", driver);
printf("CUDA Runtime version: %d\n", CUDART_VERSION);
printf("CUDA device count: %d\n\n", deviceCount);
cout << "CUDA Driver version: " << driver << '\n';
cout << "CUDA Runtime version: " << CUDART_VERSION << '\n';
cout << endl;
cout << "GPU module was compiled for the following GPU archs:" << endl;
cout << " BIN: " << CUDA_ARCH_BIN << '\n';
cout << " PTX: " << CUDA_ARCH_PTX << '\n';
cout << endl;
int deviceCount = getCudaEnabledDeviceCount();
cout << "CUDA device count: " << deviceCount << '\n';
cout << endl;
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
printf("Device %d:\n", i);
printf(" Name: %s\n", info.name().c_str());
printf(" Compute capability version: %d.%d\n", info.majorVersion(), info.minorVersion());
printf(" Total memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0));
printf(" Free memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0));
if (info.isCompatible())
puts(" This device is compatible with current GPU module build\n");
else
puts(" This device is NOT compatible with current GPU module build\n");
}
cout << "Device [" << i << "] \n";
cout << "\t Name: " << info.name() << '\n';
cout << "\t Compute capability: " << info.majorVersion() << '.' << info.minorVersion()<< '\n';
cout << "\t Multi Processor Count: " << info.multiProcessorCount() << '\n';
cout << "\t Total memory: " << static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0) << " Mb \n";
cout << "\t Free memory: " << static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0) << " Mb \n";
if (!info.isCompatible())
cout << "\t !!! This device is NOT compatible with current GPU module build \n";
puts("GPU module was compiled for the following GPU archs:");
printf(" BIN: %s\n", CUDA_ARCH_BIN);
printf(" PTX: %s\n\n", CUDA_ARCH_PTX);
cout << endl;
}
#endif
}
enum OutputLevel
int main(int argc, char** argv)
{
OutputLevelNone,
OutputLevelCompact,
OutputLevelFull
};
try
{
CommandLineParser cmd(argc, (const char**)argv,
"{ print_info_only | print_info_only | false | Print information about system and exit }"
"{ device | device | -1 | Device on which tests will be executed (-1 means all devices) }"
"{ nvtest_output_level | nvtest_output_level | compact | NVidia test verbosity level }"
);
extern OutputLevel nvidiaTestOutputLevel;
printOsInfo();
printCudaInfo();
int main(int argc, char** argv)
{
TS::ptr()->init("gpu");
InitGoogleTest(&argc, argv);
if (cmd.get<bool>("print_info_only"))
return 0;
const char* keys ="{ nvtest_output_level | nvtest_output_level | compact | NVidia test verbosity level }";
int device = cmd.get<int>("device");
if (device < 0)
{
DeviceManager::instance().loadAll();
CommandLineParser parser(argc, (const char**)argv, keys);
cout << "Run tests on all supported devices \n" << endl;
}
else
{
DeviceManager::instance().load(device);
DeviceInfo info(device);
cout << "Run tests on device " << device << " [" << info.name() << "] \n" << endl;
}
string outputLevel = parser.get<string>("nvtest_output_level", "none");
string outputLevel = cmd.get<string>("nvtest_output_level");
if (outputLevel == "none")
nvidiaTestOutputLevel = OutputLevelNone;
else if (outputLevel == "compact")
nvidiaTestOutputLevel = OutputLevelCompact;
else if (outputLevel == "full")
nvidiaTestOutputLevel = OutputLevelFull;
if (outputLevel == "none")
nvidiaTestOutputLevel = OutputLevelNone;
else if (outputLevel == "compact")
nvidiaTestOutputLevel = OutputLevelCompact;
else if (outputLevel == "full")
nvidiaTestOutputLevel = OutputLevelFull;
print_info();
TS::ptr()->init("gpu");
InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
return RUN_ALL_TESTS();
}
catch (const exception& e)
{
cerr << e.what() << endl;
return -1;
}
catch (...)
{
cerr << "Unknown error" << endl;
return -1;
}
return 0;
}
#else // HAVE_CUDA

@ -1,7 +1,7 @@
#ifndef __main_test_nvidia_h__
#define __main_test_nvidia_h__
#include<string>
#include <string>
enum OutputLevel
{
@ -10,6 +10,8 @@ enum OutputLevel
OutputLevelFull
};
extern OutputLevel nvidiaTestOutputLevel;
bool nvidia_NPPST_Integral_Image(const std::string& test_data_path, OutputLevel outputLevel);
bool nvidia_NPPST_Squared_Integral_Image(const std::string& test_data_path, OutputLevel outputLevel);
bool nvidia_NPPST_RectStdDev(const std::string& test_data_path, OutputLevel outputLevel);

@ -245,8 +245,8 @@ bool TestHaarCascadeApplication::process()
int devId;
ncvAssertCUDAReturn(cudaGetDevice(&devId), false);
cudaDeviceProp devProp;
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), false);
cudaDeviceProp _devProp;
ncvAssertCUDAReturn(cudaGetDeviceProperties(&_devProp, devId), false);
ncvStat = ncvApplyHaarClassifierCascade_device(
d_integralImage, d_rectStdDev, d_pixelMask,
@ -254,7 +254,7 @@ bool TestHaarCascadeApplication::process()
haar, h_HaarStages, d_HaarStages, d_HaarNodes, d_HaarFeatures, false,
searchRoiU, 1, 1.0f,
*this->allocatorGPU.get(), *this->allocatorCPU.get(),
devProp, 0);
_devProp, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
NCVMatrixAlloc<Ncv32u> h_pixelMask_d(*this->allocatorCPU.get(), this->width, this->height);

@ -1,4 +1,6 @@
#pragma warning (disable : 4408 4201 4100)
#if defined _MSC_VER && _MSC_VER >= 1200
# pragma warning (disable : 4408 4201 4100)
#endif
#include <cstdio>

@ -39,7 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -329,7 +331,7 @@ TEST_P(ReprojectImageTo3D, Accuracy)
cv::gpu::GpuMat dst;
cv::gpu::reprojectImageTo3D(loadMat(disp, useRoi), dst, Q, 3);
cv::Mat dst_gold;
cv::reprojectImageTo3D(disp, dst_gold, Q, false);
@ -343,3 +345,5 @@ INSTANTIATE_TEST_CASE_P(GPU_Calib3D, ReprojectImageTo3D, testing::Combine(
WHOLE_SUBMAT));
} // namespace
#endif // HAVE_CUDA

@ -39,7 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA

@ -39,7 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -98,3 +100,5 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine(
WHOLE_SUBMAT));
} // namespace
#endif // HAVE_CUDA

@ -39,7 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -3396,3 +3398,5 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Reduce, testing::Combine(
WHOLE_SUBMAT));
} // namespace
#endif // HAVE_CUDA

@ -39,7 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -984,3 +986,5 @@ INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
testing::Values(UseMask(false), UseMask(true))));
} // namespace
#endif // HAVE_CUDA

@ -39,7 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -552,3 +554,5 @@ INSTANTIATE_TEST_CASE_P(GPU_Filter, Filter2D, testing::Combine(
WHOLE_SUBMAT));
} // namespace
#endif // HAVE_CUDA

@ -39,11 +39,11 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#include <iostream>
using namespace std;
#ifdef HAVE_CUDA
using namespace std;
using namespace cv;
struct CompactPoints : testing::TestWithParam<gpu::DeviceInfo>
@ -85,3 +85,5 @@ TEST_P(CompactPoints, CanCompactizeSmallInput)
}
INSTANTIATE_TEST_CASE_P(GPU_GlobalMotion, CompactPoints, ALL_DEVICES);
#endif // HAVE_CUDA

@ -40,7 +40,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -323,3 +325,5 @@ INSTANTIATE_TEST_CASE_P(GPU_GpuMat, ConvertTo, testing::Combine(
WHOLE_SUBMAT));
} // namespace
#endif // HAVE_CUDA

@ -39,7 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -1127,62 +1129,68 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CornerMinEigen, testing::Combine(
///////////////////////////////////////////////////////////////////////////////////////////////////////
// HoughLines
PARAM_TEST_CASE(HoughLines, cv::gpu::DeviceInfo, std::string)
PARAM_TEST_CASE(HoughLines, cv::gpu::DeviceInfo, cv::Size, UseRoi)
{
};
void generateLines(cv::Mat& img)
{
img.setTo(cv::Scalar::all(0));
void drawLines(cv::Mat& dst, const std::vector<cv::Vec2f>& lines)
{
for (size_t i = 0; i < lines.size(); ++i)
cv::line(img, cv::Point(20, 0), cv::Point(20, img.rows), cv::Scalar::all(255));
cv::line(img, cv::Point(0, 50), cv::Point(img.cols, 50), cv::Scalar::all(255));
cv::line(img, cv::Point(0, 0), cv::Point(img.cols, img.rows), cv::Scalar::all(255));
cv::line(img, cv::Point(img.cols, 0), cv::Point(0, img.rows), cv::Scalar::all(255));
}
void drawLines(cv::Mat& dst, const std::vector<cv::Vec2f>& lines)
{
float rho = lines[i][0], theta = lines[i][1];
cv::Point pt1, pt2;
double a = std::cos(theta), b = std::sin(theta);
double x0 = a*rho, y0 = b*rho;
pt1.x = cvRound(x0 + 1000*(-b));
pt1.y = cvRound(y0 + 1000*(a));
pt2.x = cvRound(x0 - 1000*(-b));
pt2.y = cvRound(y0 - 1000*(a));
cv::line(dst, pt1, pt2, cv::Scalar::all(255));
dst.setTo(cv::Scalar::all(0));
for (size_t i = 0; i < lines.size(); ++i)
{
float rho = lines[i][0], theta = lines[i][1];
cv::Point pt1, pt2;
double a = std::cos(theta), b = std::sin(theta);
double x0 = a*rho, y0 = b*rho;
pt1.x = cvRound(x0 + 1000*(-b));
pt1.y = cvRound(y0 + 1000*(a));
pt2.x = cvRound(x0 - 1000*(-b));
pt2.y = cvRound(y0 - 1000*(a));
cv::line(dst, pt1, pt2, cv::Scalar::all(255));
}
}
}
};
TEST_P(HoughLines, Accuracy)
{
const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
const std::string fileName = GET_PARAM(1);
const cv::Size size = GET_PARAM(1);
const bool useRoi = GET_PARAM(2);
const float rho = 1.0f;
const float theta = static_cast<float>(CV_PI / 180);
const int threshold = 50;
cv::Mat img = readImage(fileName, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
const float theta = 1.5f * CV_PI / 180.0f;
const int threshold = 100;
cv::Mat edges;
cv::Canny(img, edges, 50, 200);
cv::Mat src(size, CV_8UC1);
generateLines(src);
cv::gpu::GpuMat d_lines;
cv::gpu::HoughLines(loadMat(edges), d_lines, rho, theta, threshold);
cv::gpu::HoughLines(loadMat(src, useRoi), d_lines, rho, theta, threshold);
std::vector<cv::Vec2f> lines;
cv::gpu::HoughLinesDownload(d_lines, lines);
cv::Mat dst(img.size(), CV_8UC1, cv::Scalar::all(0));
drawLines(dst, lines);
std::vector<cv::Vec2f> lines_gold;
cv::HoughLines(edges, lines_gold, rho, theta, threshold);
cv::Mat dst_gold(img.size(), CV_8UC1, cv::Scalar::all(0));
drawLines(dst_gold, lines_gold);
cv::Mat dst(size, CV_8UC1);
drawLines(dst, lines);
ASSERT_MAT_NEAR(dst_gold, dst, 0.0);
ASSERT_MAT_NEAR(src, dst, 0.0);
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HoughLines, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("../cv/shared/pic1.png"),
std::string("../cv/shared/pic3.png"),
std::string("../cv/shared/pic5.png"),
std::string("../cv/shared/pic6.png"))));
DIFFERENT_SIZES,
WHOLE_SUBMAT));
} // namespace
#endif // HAVE_CUDA

@ -39,9 +39,7 @@
// the use of this software, even if advised of the possibility of such damage.
//M*/
#include "precomp.hpp"
#include <string>
#include <iostream>
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
@ -141,10 +139,9 @@ namespace {
if ( (_labels.at<int>(j,i) == gpu.at<int>(j,i + 1)) && (diff.at<int>(j, i) != diff.at<int>(j,i + 1)))
{
outliers++;
// std::cout << j << " " << i << " " << _labels.at<int>(j,i) << " " << gpu.at<int>(j,i + 1) << " " << diff.at<int>(j, i) << " " << diff.at<int>(j,i + 1) << std::endl;
}
}
ASSERT_FALSE(outliers);
ASSERT_TRUE(outliers < gpu.cols + gpu.rows);
}
cv::Mat image;
@ -164,7 +161,7 @@ struct Labeling : testing::TestWithParam<cv::gpu::DeviceInfo>
cv::Mat loat_image()
{
return cv::imread(std::string( cvtest::TS::ptr()->get_data_path() ) + "labeling/IMG_0727.JPG");
return cv::imread(std::string( cvtest::TS::ptr()->get_data_path() ) + "labeling/label.png");
}
};
@ -191,12 +188,8 @@ TEST_P(Labeling, ConnectedComponents)
ASSERT_NO_THROW(cv::gpu::labelComponents(mask, components));
host.checkCorrectness(cv::Mat(components));
cv::imshow("test", image);
cv::waitKey(0);
cv::imshow("test", host._labels);
cv::waitKey(0);
}
INSTANTIATE_TEST_CASE_P(ConnectedComponents, Labeling, ALL_DEVICES);
#endif
#endif // HAVE_CUDA

@ -39,21 +39,15 @@
//
//M*/
#include <main_test_nvidia.h>
#include "precomp.hpp"
#include "test_precomp.hpp"
OutputLevel nvidiaTestOutputLevel = OutputLevelCompact;
#ifdef HAVE_CUDA
using namespace cvtest;
using namespace testing;
//enum OutputLevel
//{
// OutputLevelNone,
// OutputLevelCompact,
// OutputLevelFull
//};
struct NVidiaTest : TestWithParam<cv::gpu::DeviceInfo>
{
cv::gpu::DeviceInfo devInfo;
@ -73,8 +67,6 @@ struct NVidiaTest : TestWithParam<cv::gpu::DeviceInfo>
struct NPPST : NVidiaTest {};
struct NCV : NVidiaTest {};
OutputLevel nvidiaTestOutputLevel = OutputLevelCompact;
//TEST_P(NPPST, Integral)
//{
// bool res = nvidia_NPPST_Integral_Image(path, nvidiaTestOutputLevel);

@ -39,8 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include <string>
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
namespace {
@ -302,13 +303,13 @@ PARAM_TEST_CASE(LBP_Read_classifier, cv::gpu::DeviceInfo, int)
TEST_P(LBP_Read_classifier, Accuracy)
{
cv::gpu::CascadeClassifier_GPU classifier;
cv::gpu::CascadeClassifier_GPU classifier;
std::string classifierXmlPath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/lbpcascade_frontalface.xml";
ASSERT_TRUE(classifier.load(classifierXmlPath));
}
INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_Read_classifier,
testing::Combine(ALL_DEVICES, testing::Values<int>(0)));
INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_Read_classifier,
testing::Combine(ALL_DEVICES, testing::Values<int>(0)));
PARAM_TEST_CASE(LBP_classify, cv::gpu::DeviceInfo, int)
@ -344,31 +345,34 @@ TEST_P(LBP_classify, Accuracy)
for (; it != rects.end(); ++it)
cv::rectangle(markedImage, *it, CV_RGB(0, 0, 255));
cv::gpu::CascadeClassifier_GPU gpuClassifier;
cv::gpu::CascadeClassifier_GPU gpuClassifier;
ASSERT_TRUE(gpuClassifier.load(classifierXmlPath));
cv::gpu::GpuMat gpu_rects;
cv::gpu::GpuMat tested(grey);
int count = gpuClassifier.detectMultiScale(tested, gpu_rects);
#if defined (LOG_CASCADE_STATISTIC)
cv::Mat downloaded(gpu_rects);
const cv::Rect* faces = downloaded.ptr<cv::Rect>();
const cv::Rect* faces = downloaded.ptr<cv::Rect>();
for (int i = 0; i < count; i++)
{
cv::Rect r = faces[i];
#if defined (LOG_CASCADE_STATISTIC)
std::cout << r.x << " " << r.y << " " << r.width << " " << r.height << std::endl;
std::cout << r.x << " " << r.y << " " << r.width << " " << r.height << std::endl;
cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
#endif
}
#endif
#if defined (LOG_CASCADE_STATISTIC)
cv::imshow("Res", markedImage); cv::waitKey();
cv::imshow("Res", markedImage); cv::waitKey();
#endif
(void)count;
}
INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_classify,
testing::Combine(ALL_DEVICES, testing::Values<int>(0)));
testing::Combine(ALL_DEVICES, testing::Values<int>(0)));
} // namespace
#endif // HAVE_CUDA

@ -39,4 +39,4 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"

@ -57,8 +57,10 @@
#include <limits>
#include <algorithm>
#include <iterator>
#include <stdexcept>
#include "cvconfig.h"
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/calib3d/calib3d.hpp"
@ -72,6 +74,7 @@
#include "utility.hpp"
#include "interpolation.hpp"
#include "main_test_nvidia.h"
#ifdef HAVE_CUDA
#include <cuda.h>

@ -39,7 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA

@ -39,7 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA

@ -39,8 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include <iostream>
#include "test_precomp.hpp"
#ifdef HAVE_CUDA

@ -39,7 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA

@ -39,7 +39,9 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
//#define DUMP
@ -865,3 +867,5 @@ TEST_P(VideoReader, Regression)
INSTANTIATE_TEST_CASE_P(GPU_Video, VideoReader, testing::Combine(
ALL_DEVICES,
testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi"))));
#endif // HAVE_CUDA

@ -39,7 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA

@ -39,7 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
#ifdef HAVE_CUDA

@ -39,13 +39,14 @@
//
//M*/
#include "precomp.hpp"
#include "test_precomp.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;
using namespace cvtest;
using namespace testing;
using namespace testing::internal;
//////////////////////////////////////////////////////////////////////
// random generators
@ -108,12 +109,12 @@ GpuMat loadMat(const Mat& m, bool useRoi)
//////////////////////////////////////////////////////////////////////
// Image load
Mat readImage(const string& fileName, int flags)
Mat readImage(const std::string& fileName, int flags)
{
return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
return imread(TS::ptr()->get_data_path() + fileName, flags);
}
Mat readImageType(const string& fname, int type)
Mat readImageType(const std::string& fname, int type)
{
Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
if (CV_MAT_CN(type) == 4)
@ -134,50 +135,51 @@ bool supportFeature(const DeviceInfo& info, FeatureSet feature)
return TargetArchs::builtWith(feature) && info.supports(feature);
}
const vector<DeviceInfo>& devices()
DeviceManager& DeviceManager::instance()
{
static vector<DeviceInfo> devs;
static bool first = true;
static DeviceManager obj;
return obj;
}
if (first)
{
int deviceCount = getCudaEnabledDeviceCount();
void DeviceManager::load(int i)
{
devices_.clear();
devices_.reserve(1);
devs.reserve(deviceCount);
ostringstream msg;
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
if (info.isCompatible())
devs.push_back(info);
}
if (i < 0 || i >= getCudaEnabledDeviceCount())
{
msg << "Incorrect device number - " << i;
throw runtime_error(msg.str());
}
DeviceInfo info(i);
first = false;
if (!info.isCompatible())
{
msg << "Device " << i << " [" << info.name() << "] is NOT compatible with current GPU module build";
throw runtime_error(msg.str());
}
return devs;
devices_.push_back(info);
}
vector<DeviceInfo> devices(FeatureSet feature)
void DeviceManager::loadAll()
{
const vector<DeviceInfo>& d = devices();
int deviceCount = getCudaEnabledDeviceCount();
vector<DeviceInfo> devs_filtered;
devices_.clear();
devices_.reserve(deviceCount);
if (TargetArchs::builtWith(feature))
for (int i = 0; i < deviceCount; ++i)
{
devs_filtered.reserve(d.size());
for (size_t i = 0, size = d.size(); i < size; ++i)
DeviceInfo info(i);
if (info.isCompatible())
{
const DeviceInfo& info = d[i];
if (info.supports(feature))
devs_filtered.push_back(info);
devices_.push_back(info);
}
}
return devs_filtered;
}
//////////////////////////////////////////////////////////////////////
@ -250,7 +252,7 @@ void minMaxLocGold(const Mat& src, double* minVal_, double* maxVal_, Point* minL
namespace
{
template <typename T, typename OutT> string printMatValImpl(const Mat& m, Point p)
template <typename T, typename OutT> std::string printMatValImpl(const Mat& m, Point p)
{
const int cn = m.channels();
@ -269,9 +271,9 @@ namespace
return ostr.str();
}
string printMatVal(const Mat& m, Point p)
std::string printMatVal(const Mat& m, Point p)
{
typedef string (*func_t)(const Mat& m, Point p);
typedef std::string (*func_t)(const Mat& m, Point p);
static const func_t funcs[] =
{

@ -80,14 +80,21 @@ cv::Mat readImageType(const std::string& fname, int type);
//! return true if device supports specified feature and gpu module was built with support the feature.
bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
//! return all devices compatible with current gpu module build.
const std::vector<cv::gpu::DeviceInfo>& devices();
class DeviceManager
{
public:
static DeviceManager& instance();
void load(int i);
void loadAll();
//! return all devices compatible with current gpu module build which support specified feature.
std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature);
const std::vector<cv::gpu::DeviceInfo>& values() const { return devices_; }
private:
std::vector<cv::gpu::DeviceInfo> devices_;
};
#define ALL_DEVICES testing::ValuesIn(devices())
#define DEVICES(feature) testing::ValuesIn(devices(feature))
#define ALL_DEVICES testing::ValuesIn(DeviceManager::instance().values())
//////////////////////////////////////////////////////////////////////
// Additional assertion

@ -193,6 +193,12 @@ elseif(APPLE)
endif()
endif()
if(IOS)
add_definitions(-DHAVE_IOS=1)
list(APPEND highgui_srcs src/cap_ios_abstract_camera.mm src/cap_ios_photo_camera.mm src/cap_ios_video_camera.mm)
list(APPEND HIGHGUI_LIBRARIES "-framework Accelerate" "-framework AVFoundation" "-framework CoreGraphics" "-framework CoreImage" "-framework CoreMedia" "-framework CoreVideo" "-framework QuartzCore" "-framework AssetsLibrary")
endif()
if(WIN32)
link_directories("${OpenCV_SOURCE_DIR}/3rdparty/lib") # for ffmpeg wrapper only
include_directories(AFTER SYSTEM "${OpenCV_SOURCE_DIR}/3rdparty/include") # for directshow in VS2005 and multi-monitor support on MinGW

@ -0,0 +1,163 @@
/*
* cap_ios.h
* For iOS video I/O
* by Eduard Feicho on 29/07/12
* Copyright 2012. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#import <UIKit/UIKit.h>
#import <Accelerate/Accelerate.h>
#import <AVFoundation/AVFoundation.h>
#import <ImageIO/ImageIO.h>
#include "opencv2/core/core.hpp"
/////////////////////////////////////// CvAbstractCamera /////////////////////////////////////
@class CvAbstractCamera;
@interface CvAbstractCamera : NSObject
{
AVCaptureSession* captureSession;
AVCaptureConnection* videoCaptureConnection;
AVCaptureVideoPreviewLayer *captureVideoPreviewLayer;
UIDeviceOrientation currentDeviceOrientation;
BOOL cameraAvailable;
BOOL captureSessionLoaded;
BOOL running;
BOOL useAVCaptureVideoPreviewLayer;
AVCaptureDevicePosition defaultAVCaptureDevicePosition;
AVCaptureVideoOrientation defaultAVCaptureVideoOrientation;
NSString *const defaultAVCaptureSessionPreset;
int defaultFPS;
UIView* parentView;
int imageWidth;
int imageHeight;
}
@property (nonatomic, retain) AVCaptureSession* captureSession;
@property (nonatomic, retain) AVCaptureConnection* videoCaptureConnection;
@property (nonatomic, readonly) BOOL running;
@property (nonatomic, readonly) BOOL captureSessionLoaded;
@property (nonatomic, assign) int defaultFPS;
@property (nonatomic, assign) AVCaptureDevicePosition defaultAVCaptureDevicePosition;
@property (nonatomic, assign) AVCaptureVideoOrientation defaultAVCaptureVideoOrientation;
@property (nonatomic, assign) BOOL useAVCaptureVideoPreviewLayer;
@property (nonatomic, strong) NSString *const defaultAVCaptureSessionPreset;
@property (nonatomic, assign) int imageWidth;
@property (nonatomic, assign) int imageHeight;
@property (nonatomic, retain) UIView* parentView;
- (void)start;
- (void)stop;
- (void)switchCameras;
- (id)initWithParentView:(UIView*)parent;
- (void)createCaptureOutput;
- (void)createVideoPreviewLayer;
- (void)updateOrientation;
@end
///////////////////////////////// CvVideoCamera ///////////////////////////////////////////
@class CvVideoCamera;
@protocol CvVideoCameraDelegate <NSObject>
#ifdef __cplusplus
// delegate method for processing image frames
- (void)processImage:(cv::Mat&)image;
#endif
@end
@interface CvVideoCamera : CvAbstractCamera<AVCaptureVideoDataOutputSampleBufferDelegate>
{
AVCaptureVideoDataOutput *videoDataOutput;
dispatch_queue_t videoDataOutputQueue;
CALayer *customPreviewLayer;
BOOL grayscaleMode;
BOOL recordVideo;
AVAssetWriterInput* recordAssetWriterInput;
AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor;
AVAssetWriter* recordAssetWriter;
CMTime lastSampleTime;
}
@property (nonatomic, assign) id<CvVideoCameraDelegate> delegate;
@property (nonatomic, assign) BOOL grayscaleMode;
@property (nonatomic, assign) BOOL recordVideo;
@property (nonatomic, retain) AVAssetWriterInput* recordAssetWriterInput;
@property (nonatomic, retain) AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor;
@property (nonatomic, retain) AVAssetWriter* recordAssetWriter;
- (void)adjustLayoutToInterfaceOrientation:(UIInterfaceOrientation)interfaceOrientation;
- (void)layoutPreviewLayer;
- (void)saveVideo;
- (NSURL *)videoFileURL;
@end
///////////////////////////////// CvPhotoCamera ///////////////////////////////////////////
@class CvPhotoCamera;
@protocol CvPhotoCameraDelegate <NSObject>
- (void)photoCamera:(CvPhotoCamera*)photoCamera capturedImage:(UIImage *)image;
- (void)photoCameraCancel:(CvPhotoCamera*)photoCamera;
@end
@interface CvPhotoCamera : CvAbstractCamera
{
AVCaptureStillImageOutput *stillImageOutput;
}
@property (nonatomic, assign) id<CvPhotoCameraDelegate> delegate;
- (void)takePicture;
@end

@ -0,0 +1,408 @@
/*
* cap_ios_abstract_camera.mm
* For iOS video I/O
* by Eduard Feicho on 29/07/12
* Copyright 2012. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#import "opencv2/highgui/cap_ios.h"
#include "precomp.hpp"
#pragma mark - Private Interface
@interface CvAbstractCamera ()
@property (nonatomic, retain) AVCaptureVideoPreviewLayer* captureVideoPreviewLayer;
- (void)deviceOrientationDidChange:(NSNotification*)notification;
- (void)startCaptureSession;
- (void)setDesiredCameraPosition:(AVCaptureDevicePosition)desiredPosition;
- (void)updateSize;
@end
#pragma mark - Implementation
@implementation CvAbstractCamera
#pragma mark Public
@synthesize imageWidth;
@synthesize imageHeight;
@synthesize defaultFPS;
@synthesize defaultAVCaptureDevicePosition;
@synthesize defaultAVCaptureVideoOrientation;
@synthesize defaultAVCaptureSessionPreset;
@synthesize captureSession;
@synthesize captureVideoPreviewLayer;
@synthesize videoCaptureConnection;
@synthesize running;
@synthesize captureSessionLoaded;
@synthesize useAVCaptureVideoPreviewLayer;
@synthesize parentView;
#pragma mark - Constructors
- (id)init;
{
self = [super init];
if (self) {
// react to device orientation notifications
[[NSNotificationCenter defaultCenter] addObserver:self
selector:@selector(deviceOrientationDidChange:)
name:UIDeviceOrientationDidChangeNotification
object:nil];
[[UIDevice currentDevice] beginGeneratingDeviceOrientationNotifications];
currentDeviceOrientation = [[UIDevice currentDevice] orientation];
// check if camera available
cameraAvailable = [UIImagePickerController isSourceTypeAvailable:UIImagePickerControllerSourceTypeCamera];
NSLog(@"camera available: %@", (cameraAvailable == YES ? @"YES" : @"NO") );
running = NO;
// set camera default configuration
self.defaultAVCaptureDevicePosition = AVCaptureDevicePositionFront;
self.defaultAVCaptureVideoOrientation = AVCaptureVideoOrientationLandscapeLeft;
self.defaultFPS = 15;
self.defaultAVCaptureSessionPreset = AVCaptureSessionPreset352x288;
self.parentView = nil;
self.useAVCaptureVideoPreviewLayer = NO;
}
return self;
}
- (id)initWithParentView:(UIView*)parent;
{
self = [super init];
if (self) {
// react to device orientation notifications
[[NSNotificationCenter defaultCenter] addObserver:self
selector:@selector(deviceOrientationDidChange:)
name:UIDeviceOrientationDidChangeNotification
object:nil];
[[UIDevice currentDevice] beginGeneratingDeviceOrientationNotifications];
currentDeviceOrientation = [[UIDevice currentDevice] orientation];
// check if camera available
cameraAvailable = [UIImagePickerController isSourceTypeAvailable:UIImagePickerControllerSourceTypeCamera];
NSLog(@"camera available: %@", (cameraAvailable == YES ? @"YES" : @"NO") );
running = NO;
// set camera default configuration
self.defaultAVCaptureDevicePosition = AVCaptureDevicePositionFront;
self.defaultAVCaptureVideoOrientation = AVCaptureVideoOrientationLandscapeLeft;
self.defaultFPS = 15;
self.defaultAVCaptureSessionPreset = AVCaptureSessionPreset640x480;
self.parentView = parent;
self.useAVCaptureVideoPreviewLayer = YES;
}
return self;
}
- (void)dealloc;
{
[[NSNotificationCenter defaultCenter] removeObserver:self];
[[UIDevice currentDevice] endGeneratingDeviceOrientationNotifications];
}
#pragma mark - Public interface
- (void)start;
{
if (![NSThread isMainThread]) {
NSLog(@"[Camera] Warning: Call start only from main thread");
[self performSelectorOnMainThread:@selector(start) withObject:nil waitUntilDone:NO];
return;
}
if (running == YES) {
return;
}
running = YES;
// TOOD update image size data before actually starting (needed for recording)
[self updateSize];
if (cameraAvailable) {
[self startCaptureSession];
}
}
- (void)pause;
{
running = NO;
[self.captureSession stopRunning];
}
- (void)stop;
{
running = NO;
// Release any retained subviews of the main view.
// e.g. self.myOutlet = nil;
[self.captureSession stopRunning];
self.captureSession = nil;
self.captureVideoPreviewLayer = nil;
self.videoCaptureConnection = nil;
captureSessionLoaded = NO;
}
// use front/back camera
- (void)switchCameras;
{
BOOL was_running = self.running;
if (was_running) {
[self stop];
}
if (self.defaultAVCaptureDevicePosition == AVCaptureDevicePositionFront) {
self.defaultAVCaptureDevicePosition = AVCaptureDevicePositionBack;
} else {
self.defaultAVCaptureDevicePosition = AVCaptureDevicePositionFront;
}
if (was_running) {
[self start];
}
}
#pragma mark - Device Orientation Changes
- (void)deviceOrientationDidChange:(NSNotification*)notification
{
UIDeviceOrientation orientation = [UIDevice currentDevice].orientation;
switch (orientation)
{
case UIDeviceOrientationPortrait:
case UIDeviceOrientationPortraitUpsideDown:
case UIDeviceOrientationLandscapeLeft:
case UIDeviceOrientationLandscapeRight:
currentDeviceOrientation = orientation;
break;
case UIDeviceOrientationFaceUp:
case UIDeviceOrientationFaceDown:
default:
break;
}
NSLog(@"deviceOrientationDidChange: %d", orientation);
[self updateOrientation];
}
#pragma mark - Private Interface
- (void)createCaptureSession;
{
// set a av capture session preset
self.captureSession = [[AVCaptureSession alloc] init];
if ([self.captureSession canSetSessionPreset:self.defaultAVCaptureSessionPreset]) {
[self.captureSession setSessionPreset:self.defaultAVCaptureSessionPreset];
} else if ([self.captureSession canSetSessionPreset:AVCaptureSessionPresetLow]) {
[self.captureSession setSessionPreset:AVCaptureSessionPresetLow];
} else {
NSLog(@"[Camera] Error: could not set session preset");
}
}
- (void)createCaptureDevice;
{
// setup the device
AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
[self setDesiredCameraPosition:self.defaultAVCaptureDevicePosition];
NSLog(@"[Camera] device connected? %@", device.connected ? @"YES" : @"NO");
NSLog(@"[Camera] device position %@", (device.position == AVCaptureDevicePositionBack) ? @"back" : @"front");
}
- (void)createVideoPreviewLayer;
{
self.captureVideoPreviewLayer = [[AVCaptureVideoPreviewLayer alloc] initWithSession:self.captureSession];
if ([self.captureVideoPreviewLayer isOrientationSupported]) {
[self.captureVideoPreviewLayer setOrientation:self.defaultAVCaptureVideoOrientation];
}
if (parentView != nil) {
self.captureVideoPreviewLayer.frame = self.parentView.bounds;
self.captureVideoPreviewLayer.videoGravity = AVLayerVideoGravityResizeAspectFill;
[self.parentView.layer addSublayer:self.captureVideoPreviewLayer];
}
NSLog(@"[Camera] created AVCaptureVideoPreviewLayer");
}
- (void)setDesiredCameraPosition:(AVCaptureDevicePosition)desiredPosition;
{
for (AVCaptureDevice *device in [AVCaptureDevice devicesWithMediaType:AVMediaTypeVideo]) {
if ([device position] == desiredPosition) {
[self.captureSession beginConfiguration];
NSError* error;
AVCaptureDeviceInput *input = [AVCaptureDeviceInput deviceInputWithDevice:device error:&error];
if (!input) {
NSLog(@"error creating input %@", [error localizedDescription]);
}
// support for autofocus
if ([device isFocusModeSupported:AVCaptureFocusModeContinuousAutoFocus]) {
NSError *error = nil;
if ([device lockForConfiguration:&error]) {
device.focusMode = AVCaptureFocusModeContinuousAutoFocus;
[device unlockForConfiguration];
} else {
NSLog(@"unable to lock device for autofocos configuration %@", [error localizedDescription]);
}
}
[self.captureSession addInput:input];
for (AVCaptureInput *oldInput in self.captureSession.inputs) {
[self.captureSession removeInput:oldInput];
}
[self.captureSession addInput:input];
[self.captureSession commitConfiguration];
break;
}
}
}
- (void)startCaptureSession
{
if (!cameraAvailable) {
return;
}
if (self.captureSessionLoaded == NO) {
[self createCaptureSession];
[self createCaptureDevice];
[self createCaptureOutput];
// setup preview layer
if (self.useAVCaptureVideoPreviewLayer) {
[self createVideoPreviewLayer];
} else {
[self createCustomVideoPreview];
}
captureSessionLoaded = YES;
}
[self.captureSession startRunning];
}
- (void)createCaptureOutput;
{
[NSException raise:NSInternalInconsistencyException
format:@"You must override %@ in a subclass", NSStringFromSelector(_cmd)];
}
- (void)createCustomVideoPreview;
{
[NSException raise:NSInternalInconsistencyException
format:@"You must override %@ in a subclass", NSStringFromSelector(_cmd)];
}
- (void)updateOrientation;
{
// nothing to do here
}
- (void)updateSize;
{
if ([self.defaultAVCaptureSessionPreset isEqualToString:AVCaptureSessionPresetPhoto]) {
//TODO: find the correct resolution
self.imageWidth = 640;
self.imageHeight = 480;
} else if ([self.defaultAVCaptureSessionPreset isEqualToString:AVCaptureSessionPresetHigh]) {
//TODO: find the correct resolution
self.imageWidth = 640;
self.imageHeight = 480;
} else if ([self.defaultAVCaptureSessionPreset isEqualToString:AVCaptureSessionPresetMedium]) {
//TODO: find the correct resolution
self.imageWidth = 640;
self.imageHeight = 480;
} else if ([self.defaultAVCaptureSessionPreset isEqualToString:AVCaptureSessionPresetLow]) {
//TODO: find the correct resolution
self.imageWidth = 640;
self.imageHeight = 480;
} else if ([self.defaultAVCaptureSessionPreset isEqualToString:AVCaptureSessionPreset352x288]) {
self.imageWidth = 352;
self.imageHeight = 288;
} else if ([self.defaultAVCaptureSessionPreset isEqualToString:AVCaptureSessionPreset640x480]) {
self.imageWidth = 640;
self.imageHeight = 480;
} else if ([self.defaultAVCaptureSessionPreset isEqualToString:AVCaptureSessionPreset1280x720]) {
self.imageWidth = 1280;
self.imageHeight = 720;
} else {
self.imageWidth = 640;
self.imageHeight = 480;
}
}
@end

@ -0,0 +1,165 @@
/*
* cap_ios_photo_camera.mm
* For iOS video I/O
* by Eduard Feicho on 29/07/12
* Copyright 2012. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#import "opencv2/highgui/cap_ios.h"
#include "precomp.hpp"
#pragma mark - Private Interface
@interface CvPhotoCamera ()
@property (nonatomic, retain) AVCaptureStillImageOutput* stillImageOutput;
@end
#pragma mark - Implementation
@implementation CvPhotoCamera
#pragma mark Public
@synthesize stillImageOutput;
@synthesize delegate;
#pragma mark - Public interface
- (void)takePicture
{
if (cameraAvailable == NO) {
return;
}
cameraAvailable = NO;
[self.stillImageOutput captureStillImageAsynchronouslyFromConnection:self.videoCaptureConnection
completionHandler:
^(CMSampleBufferRef imageSampleBuffer, NSError *error)
{
if (error == nil && imageSampleBuffer != NULL)
{
// TODO check
// NSNumber* imageOrientation = [UIImage cgImageOrientationForUIDeviceOrientation:currentDeviceOrientation];
// CMSetAttachment(imageSampleBuffer, kCGImagePropertyOrientation, imageOrientation, 1);
NSData *jpegData = [AVCaptureStillImageOutput jpegStillImageNSDataRepresentation:imageSampleBuffer];
dispatch_async(dispatch_get_main_queue(), ^{
[self.captureSession stopRunning];
// Make sure we create objects on the main thread in the main context
UIImage* newImage = [UIImage imageWithData:jpegData];
//UIImageOrientation orientation = [newImage imageOrientation];
// TODO: only apply rotation, don't scale, since we can set this directly in the camera
/*
switch (orientation) {
case UIImageOrientationUp:
case UIImageOrientationDown:
newImage = [newImage imageWithAppliedRotationAndMaxSize:CGSizeMake(640.0, 480.0)];
break;
case UIImageOrientationLeft:
case UIImageOrientationRight:
newImage = [newImage imageWithMaxSize:CGSizeMake(640.0, 480.0)];
default:
break;
}
*/
// We have captured the image, we can allow the user to take another picture
cameraAvailable = YES;
NSLog(@"CvPhotoCamera captured image");
if (self.delegate) {
[self.delegate photoCamera:self capturedImage:newImage];
}
[self.captureSession startRunning];
});
}
}];
}
- (void)stop;
{
[super stop];
self.stillImageOutput = nil;
}
#pragma mark - Private Interface
- (void)createStillImageOutput;
{
// setup still image output with jpeg codec
self.stillImageOutput = [[AVCaptureStillImageOutput alloc] init];
NSDictionary *outputSettings = [NSDictionary dictionaryWithObjectsAndKeys:AVVideoCodecJPEG, AVVideoCodecKey, nil];
[self.stillImageOutput setOutputSettings:outputSettings];
[self.captureSession addOutput:self.stillImageOutput];
for (AVCaptureConnection *connection in self.stillImageOutput.connections) {
for (AVCaptureInputPort *port in [connection inputPorts]) {
if ([port.mediaType isEqual:AVMediaTypeVideo]) {
self.videoCaptureConnection = connection;
break;
}
}
if (self.videoCaptureConnection) {
break;
}
}
NSLog(@"[Camera] still image output created");
}
- (void)createCaptureOutput;
{
[self createStillImageOutput];
}
- (void)createCustomVideoPreview;
{
//do nothing, always use AVCaptureVideoPreviewLayer
}
@end

@ -0,0 +1,585 @@
/*
* cap_ios_video_camera.mm
* For iOS video I/O
* by Eduard Feicho on 29/07/12
* Copyright 2012. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#import "opencv2/highgui/cap_ios.h"
#include "precomp.hpp"
#import <AssetsLibrary/AssetsLibrary.h>
static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;};
#pragma mark - Private Interface
@interface CvVideoCamera ()
- (void)createVideoDataOutput;
- (void)createVideoFileOutput;
@property (nonatomic, retain) CALayer *customPreviewLayer;
@property (nonatomic, retain) AVCaptureVideoDataOutput *videoDataOutput;
@end
#pragma mark - Implementation
@implementation CvVideoCamera
@synthesize delegate;
@synthesize grayscaleMode;
@synthesize customPreviewLayer;
@synthesize videoDataOutput;
@synthesize recordVideo;
//@synthesize videoFileOutput;
@synthesize recordAssetWriterInput;
@synthesize recordPixelBufferAdaptor;
@synthesize recordAssetWriter;
#pragma mark - Constructors
- (id)initWithParentView:(UIView*)parent;
{
self = [super initWithParentView:parent];
if (self) {
self.useAVCaptureVideoPreviewLayer = NO;
self.recordVideo = NO;
}
return self;
}
#pragma mark - Public interface
- (void)start;
{
[super start];
if (self.recordVideo == YES) {
NSError* error;
if ([[NSFileManager defaultManager] fileExistsAtPath:[self videoFileString]]) {
[[NSFileManager defaultManager] removeItemAtPath:[self videoFileString] error:&error];
}
if (error == nil) {
NSLog(@"[Camera] Delete file %@", [self videoFileString]);
}
}
}
- (void)stop;
{
[super stop];
self.videoDataOutput = nil;
if (videoDataOutputQueue) {
dispatch_release(videoDataOutputQueue);
}
if (self.recordVideo == YES) {
if (self.recordAssetWriter.status == AVAssetWriterStatusWriting) {
[self.recordAssetWriter finishWriting];
NSLog(@"[Camera] recording stopped");
} else {
NSLog(@"[Camera] Recording Error: asset writer status is not writing");
}
self.recordAssetWriter = nil;
self.recordAssetWriterInput = nil;
self.recordPixelBufferAdaptor = nil;
}
[self.customPreviewLayer removeFromSuperlayer];
self.customPreviewLayer = nil;
}
// TODO fix
- (void)adjustLayoutToInterfaceOrientation:(UIInterfaceOrientation)interfaceOrientation;
{
NSLog(@"layout preview layer");
if (self.parentView != nil) {
CALayer* layer = self.customPreviewLayer;
CGRect bounds = self.customPreviewLayer.bounds;
int rotation_angle = 0;
bool flip_bounds = false;
switch (interfaceOrientation) {
case UIInterfaceOrientationPortrait:
NSLog(@"to Portrait");
rotation_angle = 270;
break;
case UIInterfaceOrientationPortraitUpsideDown:
rotation_angle = 90;
NSLog(@"to UpsideDown");
break;
case UIInterfaceOrientationLandscapeLeft:
rotation_angle = 0;
NSLog(@"to LandscapeLeft");
break;
case UIInterfaceOrientationLandscapeRight:
rotation_angle = 180;
NSLog(@"to LandscapeRight");
break;
default:
break; // leave the layer in its last known orientation
}
switch (defaultAVCaptureVideoOrientation) {
case AVCaptureVideoOrientationLandscapeRight:
rotation_angle += 180;
break;
case AVCaptureVideoOrientationPortraitUpsideDown:
rotation_angle += 270;
break;
case AVCaptureVideoOrientationPortrait:
rotation_angle += 90;
case AVCaptureVideoOrientationLandscapeLeft:
break;
default:
break;
}
rotation_angle = rotation_angle % 360;
if (rotation_angle == 90 || rotation_angle == 270) {
flip_bounds = true;
}
if (flip_bounds) {
NSLog(@"flip bounds");
bounds = CGRectMake(0, 0, bounds.size.height, bounds.size.width);
}
layer.position = CGPointMake(self.parentView.frame.size.width/2., self.parentView.frame.size.height/2.);
self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height);
layer.affineTransform = CGAffineTransformMakeRotation( DegreesToRadians(rotation_angle) );
layer.bounds = bounds;
}
}
// TODO fix
- (void)layoutPreviewLayer;
{
NSLog(@"layout preview layer");
if (self.parentView != nil) {
CALayer* layer = self.customPreviewLayer;
CGRect bounds = self.customPreviewLayer.bounds;
int rotation_angle = 0;
bool flip_bounds = false;
switch (currentDeviceOrientation) {
case UIDeviceOrientationPortrait:
rotation_angle = 270;
break;
case UIDeviceOrientationPortraitUpsideDown:
rotation_angle = 90;
break;
case UIDeviceOrientationLandscapeLeft:
NSLog(@"left");
rotation_angle = 180;
break;
case UIDeviceOrientationLandscapeRight:
NSLog(@"right");
rotation_angle = 0;
break;
case UIDeviceOrientationFaceUp:
case UIDeviceOrientationFaceDown:
default:
break; // leave the layer in its last known orientation
}
switch (defaultAVCaptureVideoOrientation) {
case AVCaptureVideoOrientationLandscapeRight:
rotation_angle += 180;
break;
case AVCaptureVideoOrientationPortraitUpsideDown:
rotation_angle += 270;
break;
case AVCaptureVideoOrientationPortrait:
rotation_angle += 90;
case AVCaptureVideoOrientationLandscapeLeft:
break;
default:
break;
}
rotation_angle = rotation_angle % 360;
if (rotation_angle == 90 || rotation_angle == 270) {
flip_bounds = true;
}
if (flip_bounds) {
NSLog(@"flip bounds");
bounds = CGRectMake(0, 0, bounds.size.height, bounds.size.width);
}
layer.position = CGPointMake(self.parentView.frame.size.width/2., self.parentView.frame.size.height/2.);
layer.affineTransform = CGAffineTransformMakeRotation( DegreesToRadians(rotation_angle) );
layer.bounds = bounds;
}
}
#pragma mark - Private Interface
- (void)createVideoDataOutput;
{
// Make a video data output
self.videoDataOutput = [AVCaptureVideoDataOutput new];
// In grayscale mode we want YUV (YpCbCr 4:2:0) so we can directly access the graylevel intensity values (Y component)
// In color mode we, BGRA format is used
OSType format = self.grayscaleMode ? kCVPixelFormatType_420YpCbCr8BiPlanarFullRange : kCVPixelFormatType_32BGRA;
self.videoDataOutput.videoSettings = [NSDictionary dictionaryWithObject:[NSNumber numberWithUnsignedInt:format]
forKey:(id)kCVPixelBufferPixelFormatTypeKey];
// discard if the data output queue is blocked (as we process the still image)
[self.videoDataOutput setAlwaysDiscardsLateVideoFrames:YES];
if ( [self.captureSession canAddOutput:self.videoDataOutput] ) {
[self.captureSession addOutput:self.videoDataOutput];
}
[[self.videoDataOutput connectionWithMediaType:AVMediaTypeVideo] setEnabled:YES];
// set default FPS
if ([self.videoDataOutput connectionWithMediaType:AVMediaTypeVideo].supportsVideoMinFrameDuration) {
[self.videoDataOutput connectionWithMediaType:AVMediaTypeVideo].videoMinFrameDuration = CMTimeMake(1, self.defaultFPS);
}
if ([self.videoDataOutput connectionWithMediaType:AVMediaTypeVideo].supportsVideoMaxFrameDuration) {
[self.videoDataOutput connectionWithMediaType:AVMediaTypeVideo].videoMaxFrameDuration = CMTimeMake(1, self.defaultFPS);
}
// set video mirroring for front camera (more intuitive)
if ([self.videoDataOutput connectionWithMediaType:AVMediaTypeVideo].supportsVideoMirroring) {
if (self.defaultAVCaptureDevicePosition == AVCaptureDevicePositionFront) {
[self.videoDataOutput connectionWithMediaType:AVMediaTypeVideo].videoMirrored = YES;
} else {
[self.videoDataOutput connectionWithMediaType:AVMediaTypeVideo].videoMirrored = NO;
}
}
// set default video orientation
if ([self.videoDataOutput connectionWithMediaType:AVMediaTypeVideo].supportsVideoOrientation) {
[self.videoDataOutput connectionWithMediaType:AVMediaTypeVideo].videoOrientation = self.defaultAVCaptureVideoOrientation;
}
// create a custom preview layer
self.customPreviewLayer = [CALayer layer];
self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height);
[self layoutPreviewLayer];
// create a serial dispatch queue used for the sample buffer delegate as well as when a still image is captured
// a serial dispatch queue must be used to guarantee that video frames will be delivered in order
// see the header doc for setSampleBufferDelegate:queue: for more information
videoDataOutputQueue = dispatch_queue_create("VideoDataOutputQueue", DISPATCH_QUEUE_SERIAL);
[self.videoDataOutput setSampleBufferDelegate:self queue:videoDataOutputQueue];
NSLog(@"[Camera] created AVCaptureVideoDataOutput at %d FPS", self.defaultFPS);
}
- (void)createVideoFileOutput;
{
/* Video File Output in H.264, via AVAsserWriter */
NSLog(@"Create Video with dimensions %dx%d", self.imageWidth, self.imageHeight);
NSDictionary *outputSettings
= [NSDictionary dictionaryWithObjectsAndKeys:[NSNumber numberWithInt:self.imageWidth], AVVideoWidthKey,
[NSNumber numberWithInt:self.imageHeight], AVVideoHeightKey,
AVVideoCodecH264, AVVideoCodecKey,
nil
];
self.recordAssetWriterInput = [AVAssetWriterInput assetWriterInputWithMediaType:AVMediaTypeVideo outputSettings:outputSettings];
int pixelBufferFormat = (self.grayscaleMode == YES) ? kCVPixelFormatType_420YpCbCr8BiPlanarFullRange : kCVPixelFormatType_32BGRA;
self.recordPixelBufferAdaptor =
[[AVAssetWriterInputPixelBufferAdaptor alloc]
initWithAssetWriterInput:self.recordAssetWriterInput
sourcePixelBufferAttributes:[NSDictionary dictionaryWithObjectsAndKeys:[NSNumber numberWithInt:pixelBufferFormat], kCVPixelBufferPixelFormatTypeKey, nil]];
NSError* error = nil;
NSLog(@"Create AVAssetWriter with url: %@", [self videoFileURL]);
self.recordAssetWriter = [AVAssetWriter assetWriterWithURL:[self videoFileURL]
fileType:AVFileTypeMPEG4
error:&error];
if (error != nil) {
NSLog(@"[Camera] Unable to create AVAssetWriter: %@", error);
}
[self.recordAssetWriter addInput:self.recordAssetWriterInput];
self.recordAssetWriterInput.expectsMediaDataInRealTime = YES;
NSLog(@"[Camera] created AVAssetWriter");
}
- (void)createCaptureOutput;
{
[self createVideoDataOutput];
if (self.recordVideo == YES) {
[self createVideoFileOutput];
}
}
- (void)createCustomVideoPreview;
{
[self.parentView.layer addSublayer:self.customPreviewLayer];
}
#pragma mark - Protocol AVCaptureVideoDataOutputSampleBufferDelegate
- (void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection
{
if (self.delegate) {
// convert from Core Media to Core Video
CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
CVPixelBufferLockBaseAddress(imageBuffer, 0);
void* bufferAddress;
size_t width;
size_t height;
size_t bytesPerRow;
CGColorSpaceRef colorSpace;
CGContextRef context;
int format_opencv;
OSType format = CVPixelBufferGetPixelFormatType(imageBuffer);
if (format == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange) {
format_opencv = CV_8UC1;
bufferAddress = CVPixelBufferGetBaseAddressOfPlane(imageBuffer, 0);
width = CVPixelBufferGetWidthOfPlane(imageBuffer, 0);
height = CVPixelBufferGetHeightOfPlane(imageBuffer, 0);
bytesPerRow = CVPixelBufferGetBytesPerRowOfPlane(imageBuffer, 0);
} else { // expect kCVPixelFormatType_32BGRA
format_opencv = CV_8UC4;
bufferAddress = CVPixelBufferGetBaseAddress(imageBuffer);
width = CVPixelBufferGetWidth(imageBuffer);
height = CVPixelBufferGetHeight(imageBuffer);
bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer);
}
// delegate image processing to the delegate
cv::Mat image(height, width, format_opencv, bufferAddress, bytesPerRow);
cv::Mat* result = NULL;
CGImage* dstImage;
if ([self.delegate respondsToSelector:@selector(processImage:)]) {
[self.delegate processImage:image];
}
// check if matrix data pointer or dimensions were changed by the delegate
bool iOSimage = false;
if (height == image.rows && width == image.cols && format_opencv == image.type() && bufferAddress == image.data && bytesPerRow == image.step) {
iOSimage = true;
}
// (create color space, create graphics context, render buffer)
CGBitmapInfo bitmapInfo;
// basically we decide if it's a grayscale, rgb or rgba image
if (image.channels() == 1) {
colorSpace = CGColorSpaceCreateDeviceGray();
bitmapInfo = kCGImageAlphaNone;
} else if (image.channels() == 3) {
colorSpace = CGColorSpaceCreateDeviceRGB();
bitmapInfo = kCGImageAlphaNone;
if (iOSimage) {
bitmapInfo |= kCGBitmapByteOrder32Little;
} else {
bitmapInfo |= kCGBitmapByteOrder32Big;
}
} else {
colorSpace = CGColorSpaceCreateDeviceRGB();
bitmapInfo = kCGImageAlphaPremultipliedFirst;
if (iOSimage) {
bitmapInfo |= kCGBitmapByteOrder32Little;
} else {
bitmapInfo |= kCGBitmapByteOrder32Big;
}
}
if (iOSimage) {
context = CGBitmapContextCreate(bufferAddress, width, height, 8, bytesPerRow, colorSpace, bitmapInfo);
dstImage = CGBitmapContextCreateImage(context);
CGContextRelease(context);
} else {
NSData *data = [NSData dataWithBytes:image.data length:image.elemSize()*image.total()];
CGDataProviderRef provider = CGDataProviderCreateWithCFData((__bridge CFDataRef)data);
// Creating CGImage from cv::Mat
dstImage = CGImageCreate(image.cols, // width
image.rows, // height
8, // bits per component
8 * image.elemSize(), // bits per pixel
image.step, // bytesPerRow
colorSpace, // colorspace
bitmapInfo, // bitmap info
provider, // CGDataProviderRef
NULL, // decode
false, // should interpolate
kCGRenderingIntentDefault // intent
);
CGDataProviderRelease(provider);
}
// render buffer
dispatch_sync(dispatch_get_main_queue(), ^{
self.customPreviewLayer.contents = (__bridge id)dstImage;
});
if (self.recordVideo == YES) {
lastSampleTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer);
// CMTimeShow(lastSampleTime);
if (self.recordAssetWriter.status != AVAssetWriterStatusWriting) {
[self.recordAssetWriter startWriting];
[self.recordAssetWriter startSessionAtSourceTime:lastSampleTime];
if (self.recordAssetWriter.status != AVAssetWriterStatusWriting) {
NSLog(@"[Camera] Recording Error: asset writer status is not writing: %@", self.recordAssetWriter.error);
return;
} else {
NSLog(@"[Camera] Video recording started");
}
}
if (self.recordAssetWriterInput.readyForMoreMediaData) {
if (! [self.recordPixelBufferAdaptor appendPixelBuffer:imageBuffer
withPresentationTime:lastSampleTime] ) {
NSLog(@"Video Writing Error");
}
}
}
// cleanup
CGImageRelease(dstImage);
CGColorSpaceRelease(colorSpace);
CVPixelBufferUnlockBaseAddress(imageBuffer, 0);
}
}
- (void)updateOrientation;
{
NSLog(@"rotate..");
self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height);
[self layoutPreviewLayer];
}
- (void)saveVideo;
{
if (self.recordVideo == NO) {
return;
}
ALAssetsLibrary *library = [[ALAssetsLibrary alloc] init];
if ([library videoAtPathIsCompatibleWithSavedPhotosAlbum:[self videoFileURL]]) {
[library writeVideoAtPathToSavedPhotosAlbum:[self videoFileURL]
completionBlock:^(NSURL *assetURL, NSError *error){}];
}
}
- (NSURL *)videoFileURL;
{
NSString *outputPath = [[NSString alloc] initWithFormat:@"%@%@", NSTemporaryDirectory(), @"output.mov"];
NSURL *outputURL = [NSURL fileURLWithPath:outputPath];
NSFileManager *fileManager = [NSFileManager defaultManager];
if ([fileManager fileExistsAtPath:outputPath]) {
NSLog(@"file exists");
}
return outputURL;
}
- (NSString *)videoFileString;
{
NSString *outputPath = [[NSString alloc] initWithFormat:@"%@%@", NSTemporaryDirectory(), @"output.mov"];
return outputPath;
}
@end

@ -0,0 +1,68 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace cv;
using namespace perf;
using namespace testing;
using std::tr1::make_tuple;
using std::tr1::get;
CV_ENUM(MatrixType, CV_16UC1, CV_16SC1, CV_32FC1)
CV_ENUM(MapType, CV_16SC2, CV_32FC1, CV_32FC2)
CV_ENUM(InterType, INTER_LINEAR, INTER_CUBIC, INTER_LANCZOS4, INTER_NEAREST)
typedef TestBaseWithParam< tr1::tuple<Size, MatrixType, MapType, InterType> > TestRemap;
PERF_TEST_P( TestRemap, Remap,
Combine(
Values( szVGA, sz1080p ),
ValuesIn( MatrixType::all() ),
ValuesIn( MapType::all() ),
ValuesIn( InterType::all() )
)
)
{
Size sz;
int src_type, map1_type, inter_type;
sz = get<0>(GetParam());
src_type = get<1>(GetParam());
map1_type = get<2>(GetParam());
inter_type = get<3>(GetParam());
Mat src(sz, src_type);
Mat map1(sz, map1_type);
Mat dst(sz, src_type);
Mat map2(map1_type == CV_32FC1 ? sz : Size(), CV_32FC1);
RNG rng;
rng.fill(src, RNG::UNIFORM, 0, 256);
for (int j = 0; j < map1.rows; ++j)
for (int i = 0; i < map1.cols; ++i)
switch (map1_type)
{
case CV_32FC1:
map1.at<float>(j, i) = src.cols - i;
map2.at<float>(j, i) = j;
break;
case CV_32FC2:
map1.at<Vec2f>(j, i)[0] = src.cols - i;
map1.at<Vec2f>(j, i)[1] = j;
break;
case CV_16SC2:
map1.at<Vec2s>(j, i)[0] = src.cols - i;
map1.at<Vec2s>(j, i)[1] = j;
break;
default:
CV_Assert(0);
}
declare.in(src, WARMUP_RNG).out(dst).time(20);
TEST_CYCLE() remap(src, dst, map1, map2, inter_type);
SANITY_CHECK(dst);
}

@ -59,11 +59,11 @@ PERF_TEST_P(MatInfo_Size_Size, resizeDownLinear,
typedef tr1::tuple<MatType, Size, int> MatInfo_Size_Scale_t;
typedef TestBaseWithParam<MatInfo_Size_Scale_t> MatInfo_Size_Scale;
PERF_TEST_P(MatInfo_Size_Scale, resizeAreaFast,
PERF_TEST_P(MatInfo_Size_Scale, ResizeAreaFast,
testing::Combine(
testing::Values(CV_8UC1, CV_8UC4),
testing::Values(szVGA, szqHD, sz720p, sz1080p),
testing::Values(2, 4)
testing::Values(2)
)
)
{
@ -84,3 +84,31 @@ PERF_TEST_P(MatInfo_Size_Scale, resizeAreaFast,
//difference equal to 1 is allowed because of different possible rounding modes: round-to-nearest vs bankers' rounding
SANITY_CHECK(dst, 1);
}
typedef TestBaseWithParam<tr1::tuple<MatType, Size, double> > MatInfo_Size_Scale_Area;
PERF_TEST_P(MatInfo_Size_Scale_Area, ResizeArea,
testing::Combine(
testing::Values(CV_8UC1, CV_8UC4),
testing::Values(szVGA, szqHD, sz720p, sz1080p),
testing::Values(2.4, 3.4, 1.3)
)
)
{
int matType = get<0>(GetParam());
Size from = get<1>(GetParam());
double scale = get<2>(GetParam());
cv::Mat src(from, matType);
Size to(cvRound(from.width * scale), cvRound(from.height * scale));
cv::Mat dst(to, matType);
declare.in(src, WARMUP_RNG).out(dst);
TEST_CYCLE() resize(src, dst, dst.size(), 0, 0, INTER_AREA);
//difference equal to 1 is allowed because of different possible rounding modes: round-to-nearest vs bankers' rounding
SANITY_CHECK(dst, 1);
}

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save