Merge branch '2.4'

pull/680/merge
Andrey Kamaev 12 years ago
commit 1ca8f33b4e
  1. 13
      CMakeLists.txt
  2. 15
      android/service/engine/jni/BinderComponent/HardwareDetector.cpp
  3. 1
      android/service/engine/jni/BinderComponent/HardwareDetector.h
  4. 8
      android/service/engine/jni/BinderComponent/TegraDetector.cpp
  5. 5
      android/service/engine/jni/NativeService/CommonPackageManager.cpp
  6. 9
      android/service/engine/jni/NativeService/PackageInfo.cpp
  7. 3
      android/service/engine/jni/NativeService/PackageInfo.h
  8. 18
      android/service/engine/jni/Tests/OpenCVEngineTest.cpp
  9. 22
      android/service/engine/jni/Tests/PackageInfoTest.cpp
  10. 16
      android/service/engine/jni/Tests/PackageManagmentTest.cpp
  11. 2
      android/service/engine/src/org/opencv/engine/HardwareDetector.java
  12. 10
      android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java
  13. 10
      cmake/OpenCVDetectAndroidSDK.cmake
  14. 228
      cmake/OpenCVDetectOpenCL.cmake
  15. 15
      cmake/OpenCVModule.cmake
  16. 0
      cmake/cl2cpp.cmake
  17. 4
      modules/calib3d/src/stereobm.cpp
  18. 8
      modules/core/src/lapack.cpp
  19. 29
      modules/core/test/test_math.cpp
  20. 2
      modules/gpu/CMakeLists.txt
  21. 103
      modules/gpu/doc/feature_detection_and_description.rst
  22. 71
      modules/gpu/doc/video.rst
  23. 133
      modules/gpu/include/opencv2/gpu.hpp
  24. 910
      modules/gpu/include/opencv2/gpu/device/simd_functions.hpp
  25. 2
      modules/gpu/include/opencv2/gpu/gpu.hpp
  26. 7
      modules/gpu/perf/perf_calib3d.cpp
  27. 15
      modules/gpu/perf/perf_core.cpp
  28. 3
      modules/gpu/perf/perf_denoising.cpp
  29. 102
      modules/gpu/perf/perf_features2d.cpp
  30. 1
      modules/gpu/perf/perf_filters.cpp
  31. 65
      modules/gpu/perf/perf_imgproc.cpp
  32. 1
      modules/gpu/perf/perf_labeling.cpp
  33. 67
      modules/gpu/perf/perf_main.cpp
  34. 1
      modules/gpu/perf/perf_matop.cpp
  35. 3
      modules/gpu/perf/perf_objdetect.cpp
  36. 4
      modules/gpu/perf/perf_precomp.hpp
  37. 111
      modules/gpu/perf/perf_video.cpp
  38. 184
      modules/gpu/perf/utility.cpp
  39. 63
      modules/gpu/perf/utility.hpp
  40. 56
      modules/gpu/perf4au/main.cpp
  41. 61
      modules/gpu/src/arithm.cpp
  42. 118
      modules/gpu/src/color.cpp
  43. 383
      modules/gpu/src/cuda/debayer.cu
  44. 702
      modules/gpu/src/cuda/element_operations.cu
  45. 355
      modules/gpu/src/element_operations.cpp
  46. 66
      modules/gpu/test/main.cpp
  47. 65
      modules/gpu/test/test_bgfg.cpp
  48. 2
      modules/gpu/test/test_calib3d.cpp
  49. 235
      modules/gpu/test/test_color.cpp
  50. 2
      modules/gpu/test/test_copy_make_border.cpp
  51. 2
      modules/gpu/test/test_core.cpp
  52. 2
      modules/gpu/test/test_denoising.cpp
  53. 301
      modules/gpu/test/test_features2d.cpp
  54. 2
      modules/gpu/test/test_filters.cpp
  55. 2
      modules/gpu/test/test_gpumat.cpp
  56. 2
      modules/gpu/test/test_hough.cpp
  57. 2
      modules/gpu/test/test_imgproc.cpp
  58. 2
      modules/gpu/test/test_objdetect.cpp
  59. 2
      modules/gpu/test/test_opengl.cpp
  60. 2
      modules/gpu/test/test_optflow.cpp
  61. 3
      modules/gpu/test/test_precomp.hpp
  62. 2
      modules/gpu/test/test_pyramids.cpp
  63. 2
      modules/gpu/test/test_remap.cpp
  64. 2
      modules/gpu/test/test_resize.cpp
  65. 2
      modules/gpu/test/test_stream.cpp
  66. 2
      modules/gpu/test/test_threshold.cpp
  67. 2
      modules/gpu/test/test_warp_affine.cpp
  68. 2
      modules/gpu/test/test_warp_perspective.cpp
  69. 407
      modules/gpu/test/utility.cpp
  70. 331
      modules/gpu/test/utility.hpp
  71. 2
      modules/imgproc/include/opencv2/imgproc/imgproc.hpp
  72. 4
      modules/java/generator/rst_parser.py
  73. 19
      modules/java/generator/src/java/android+CameraBridgeViewBase.java
  74. 6
      modules/java/generator/src/java/android+JavaCameraView.java
  75. 6
      modules/java/generator/src/java/android+NativeCameraView.java
  76. 2
      modules/ml/include/opencv2/ml.hpp
  77. 55
      modules/ml/src/svm.cpp
  78. 26
      modules/nonfree/CMakeLists.txt
  79. 79
      modules/nonfree/doc/background_subtraction.rst
  80. 201
      modules/nonfree/doc/feature_detection.rst
  81. 1
      modules/nonfree/doc/nonfree.rst
  82. 169
      modules/nonfree/include/opencv2/nonfree/gpu.hpp
  83. 124
      modules/nonfree/include/opencv2/nonfree/ocl.hpp
  84. 138
      modules/nonfree/perf/perf_gpu.cpp
  85. 3
      modules/nonfree/perf/perf_main.cpp
  86. 9
      modules/nonfree/perf/perf_precomp.hpp
  87. 80
      modules/nonfree/perf/perf_surf.ocl.cpp
  88. 30
      modules/nonfree/src/cuda/surf.cu
  89. 14
      modules/nonfree/src/cuda/vibe.cu
  90. 32
      modules/nonfree/src/opencl/surf.cl
  91. 20
      modules/nonfree/src/precomp.hpp
  92. 46
      modules/nonfree/src/surf.ocl.cpp
  93. 10
      modules/nonfree/src/surf_gpu.cpp
  94. 4
      modules/nonfree/src/vibe_gpu.cpp
  95. 285
      modules/nonfree/test/test_gpu.cpp
  96. 70
      modules/nonfree/test/test_main.cpp
  97. 10
      modules/nonfree/test/test_precomp.hpp
  98. 226
      modules/nonfree/test/test_surf.ocl.cpp
  99. 64
      modules/ocl/CMakeLists.txt
  100. 99
      modules/ocl/doc/object_detection.rst
  101. Some files were not shown because too many files have changed in this diff Show More

@ -413,15 +413,6 @@ endif()
# --- OpenCL ---
if(WITH_OPENCL)
include(cmake/OpenCVDetectOpenCL.cmake)
if(OPENCL_FOUND)
set(HAVE_OPENCL 1)
endif()
if(WITH_OPENCLAMDFFT AND CLAMDFFT_INCLUDE_DIR)
set(HAVE_CLAMDFFT 1)
endif()
if(WITH_OPENCLAMDBLAS AND CLAMDBLAS_INCLUDE_DIR)
set(HAVE_CLAMDBLAS 1)
endif()
endif()
# ----------------------------------------------------------------------------
@ -799,11 +790,11 @@ if(HAVE_CUDA)
status(" Use fast math:" CUDA_FAST_MATH THEN YES ELSE NO)
endif()
if(HAVE_OPENCL AND BUILD_opencv_ocl)
if(HAVE_OPENCL)
status("")
status(" OpenCL")
if(OPENCL_INCLUDE_DIR)
status(" Include:" ${OPENCL_INCLUDE_DIR})
status(" Include path:" ${OPENCL_INCLUDE_DIRS})
endif()
if(OPENCL_LIBRARIES)
status(" libraries:" ${OPENCL_LIBRARIES})

@ -163,22 +163,13 @@ int DetectKnownPlatforms()
{
int tegra_status = DetectTegra();
if (3 == tegra_status)
// All Tegra platforms since Tegra3
if (2 < tegra_status)
{
return PLATFORM_TEGRA3;
return PLATFORM_TEGRA + tegra_status - 1;
}
else
{
return PLATFORM_UNKNOWN;
}
// NOTE: Uncomment when all Tegras will be supported
/*if (tegra_status > 0)
* {
* return PLATFORM_TEGRA + tegra_status - 1;
}
else
{
return PLATFORM_UNKNOWN;
}*/
}

@ -27,6 +27,7 @@
#define PLATFORM_TEGRA 1L
#define PLATFORM_TEGRA2 2L
#define PLATFORM_TEGRA3 3L
#define PLATFORM_TEGRA4 4L
int DetectKnownPlatforms();
int GetProcessorCount();

@ -7,6 +7,7 @@
#define KERNEL_CONFIG_TEGRA_MAGIC "CONFIG_ARCH_TEGRA=y"
#define KERNEL_CONFIG_TEGRA2_MAGIC "CONFIG_ARCH_TEGRA_2x_SOC=y"
#define KERNEL_CONFIG_TEGRA3_MAGIC "CONFIG_ARCH_TEGRA_3x_SOC=y"
#define KERNEL_CONFIG_TEGRA4_MAGIC "CONFIG_ARCH_TEGRA_11x_SOC=y"
#define MAX_DATA_LEN 4096
int DetectTegra()
@ -19,9 +20,11 @@ int DetectTegra()
const char *tegra_config = KERNEL_CONFIG_TEGRA_MAGIC;
const char *tegra2_config = KERNEL_CONFIG_TEGRA2_MAGIC;
const char *tegra3_config = KERNEL_CONFIG_TEGRA3_MAGIC;
const char *tegra4_config = KERNEL_CONFIG_TEGRA4_MAGIC;
int len = strlen(tegra_config);
int len2 = strlen(tegra2_config);
int len3 = strlen(tegra3_config);
int len4 = strlen(tegra4_config);
while (0 != gzgets(kernelConfig, tmpbuf, KERNEL_CONFIG_MAX_LINE_WIDTH))
{
if (0 == strncmp(tmpbuf, tegra_config, len))
@ -41,6 +44,11 @@ int DetectTegra()
break;
}
if (0 == strncmp(tmpbuf, tegra4_config, len4))
{
result = 4;
break;
}
}
gzclose(kernelConfig);
}

@ -197,6 +197,7 @@ std::vector<std::pair<int, int> > CommonPackageManager::InitArmRating()
result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON));
result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_VFPv3d16 | FEATURES_HAS_NEON));
result.push_back(std::pair<int, int>(PLATFORM_TEGRA3, ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON));
result.push_back(std::pair<int, int>(PLATFORM_TEGRA4, ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON));
return result;
}
@ -218,8 +219,8 @@ std::vector<std::pair<int, int> > CommonPackageManager::InitMipsRating()
}
const std::vector<std::pair<int, int> > CommonPackageManager::ArchRatings[] = {
CommonPackageManager::InitArmRating(),
CommonPackageManager::InitIntelRating(),
CommonPackageManager::InitArmRating(),
CommonPackageManager::InitIntelRating(),
CommonPackageManager::InitMipsRating()
};

@ -18,6 +18,7 @@ map<int, string> PackageInfo::InitPlatformNameMap()
result[PLATFORM_TEGRA] = PLATFORM_TEGRA_NAME;
result[PLATFORM_TEGRA2] = PLATFORM_TEGRA2_NAME;
result[PLATFORM_TEGRA3] = PLATFORM_TEGRA3_NAME;
result[PLATFORM_TEGRA4] = PLATFORM_TEGRA4_NAME;
return result;
}
@ -186,6 +187,10 @@ inline int SplitPlatfrom(const vector<string>& features)
{
result = PLATFORM_TEGRA3;
}
else if (PLATFORM_TEGRA4_NAME == tmp)
{
result = PLATFORM_TEGRA4;
}
}
else
{
@ -425,6 +430,10 @@ InstallPath(install_path)
{
CpuID = ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON;
} break;
case PLATFORM_TEGRA4:
{
CpuID = ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON;
} break;
}
}
else

@ -12,7 +12,6 @@
#define ARCH_ARMv7_NAME "armv7a"
#define ARCH_ARMv8_NAME "armv8"
#define FEATURES_HAS_VFPv3d16_NAME "vfpv3d16"
#define FEATURES_HAS_VFPv3_NAME "vfpv3"
#define FEATURES_HAS_NEON_NAME "neon"
@ -25,7 +24,7 @@
#define PLATFORM_TEGRA_NAME "tegra"
#define PLATFORM_TEGRA2_NAME "tegra2"
#define PLATFORM_TEGRA3_NAME "tegra3"
#define PLATFORM_TEGRA4_NAME "tegra4"
class PackageInfo
{

@ -201,6 +201,24 @@ TEST(OpenCVEngineTest, GetPathForCompatiblePackage2)
#endif
}
TEST(OpenCVEngineTest, GetPathForCompatiblePackage3)
{
sp<IOpenCVEngine> Engine = InitConnect();
Starter.PackageManager->InstalledPackages.clear();
Starter.PackageManager->InstallVersion(2040400, PLATFORM_TEGRA4, ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON);
EXPECT_FALSE(NULL == Engine.get());
String16 result = Engine->GetLibPathByVersion(String16("2.4"));
#ifdef __SUPPORT_TEGRA3
EXPECT_STREQ("/data/data/org.opencv.lib_v24_tegra4/lib", String8(result).string());
#else
#ifdef __SUPPORT_ARMEABI_V7A_FEATURES
EXPECT_STREQ("/data/data/org.opencv.lib_v24_armv7a_neon/lib", String8(result).string());
#else
EXPECT_STREQ("/data/data/org.opencv.lib_v24_armv7a/lib", String8(result).string());
#endif
#endif
}
TEST(OpenCVEngineTest, InstallAndGetVersion)
{
sp<IOpenCVEngine> Engine = InitConnect();

@ -85,6 +85,21 @@ TEST(PackageInfo, FullNameTegra3)
#endif
}
TEST(PackageInfo, FullNameTegra4)
{
PackageInfo info(2040400, PLATFORM_TEGRA4, ARCH_ARMv7 | FEATURES_HAS_NEON);
string name = info.GetFullName();
#ifdef __SUPPORT_TEGRA3
EXPECT_STREQ("org.opencv.lib_v24_tegra4", name.c_str());
#else
#ifdef __SUPPORT_ARMEABI_V7A_FEATURES
EXPECT_STREQ("org.opencv.lib_v24_armv7a_neon", name.c_str());
#else
EXPECT_STREQ("org.opencv.lib_v24_armv7a", name.c_str());
#endif
#endif
}
TEST(PackageInfo, FullNameX86SSE2)
{
PackageInfo info(2030000, PLATFORM_UNKNOWN, ARCH_X86 | FEATURES_HAS_SSE2);
@ -148,6 +163,13 @@ TEST(PackageInfo, Tegra3FromFullName)
EXPECT_EQ(PLATFORM_TEGRA3, info.GetPlatform());
}
TEST(PackageInfo, Tegra4FromFullName)
{
PackageInfo info("org.opencv.lib_v24_tegra4", "/data/data/org.opencv.lib_v24_tegra4");
EXPECT_EQ(2040000, info.GetVersion());
EXPECT_EQ(PLATFORM_TEGRA4, info.GetPlatform());
}
#ifdef __SUPPORT_MIPS
TEST(PackageInfo, MipsFromFullName)
{

@ -102,6 +102,22 @@ TEST(PackageManager, GetPackagePathForTegra3)
#endif
}
TEST(PackageManager, GetPackagePathForTegra4)
{
PackageManagerStub pm;
EXPECT_TRUE(pm.InstallVersion(2040400, PLATFORM_TEGRA4, ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON));
string path = pm.GetPackagePathByVersion(2040400, PLATFORM_TEGRA4, ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON);
#ifdef __SUPPORT_TEGRA3
EXPECT_STREQ("/data/data/org.opencv.lib_v24_tegra4/lib", path.c_str());
#else
#ifdef __SUPPORT_ARMEABI_V7A_FEATURES
EXPECT_STREQ("/data/data/org.opencv.lib_v24_armv7a_neon/lib", path.c_str());
#else
EXPECT_STREQ("/data/data/org.opencv.lib_v24_armv7a/lib", path.c_str());
#endif
#endif
}
#ifdef __SUPPORT_MIPS
TEST(PackageManager, GetPackagePathForMips)
{

@ -33,6 +33,8 @@ public class HardwareDetector
public static final int PLATFORM_TEGRA = 1;
public static final int PLATFORM_TEGRA2 = 2;
public static final int PLATFORM_TEGRA3 = 3;
public static final int PLATFORM_TEGRA4 = 4;
public static final int PLATFORM_UNKNOWN = 0;

@ -83,10 +83,14 @@ public class ManagerActivity extends Activity
{
HardwarePlatformView.setText("Tegra 2");
}
else
else if (HardwareDetector.PLATFORM_TEGRA3 == Platfrom)
{
HardwarePlatformView.setText("Tegra 3");
}
else
{
HardwarePlatformView.setText("Tegra 4");
}
}
else
{
@ -367,10 +371,10 @@ public class ManagerActivity extends Activity
temp.put("Version", NormalizeVersion(OpenCVersion, VersionName));
// HACK: OpenCV Manager for Armv7-a Neon already has Tegra3 optimizations
// that is enabled on proper hardware
if (HardwareDetector.DetectKnownPlatforms() == HardwareDetector.PLATFORM_TEGRA3 &&
if (HardwareDetector.DetectKnownPlatforms() >= HardwareDetector.PLATFORM_TEGRA3 &&
HardwareName.equals("armv7a neon ") && Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD)
{
temp.put("Hardware", "Tegra 3");
temp.put("Hardware", "Tegra");
if (Tags == null)
{
Tags = "optimized";

@ -264,13 +264,23 @@ macro(add_android_project target path)
ocv_list_filterout(android_proj_jni_files "\\\\.svn")
if(android_proj_jni_files AND EXISTS ${path}/jni/Android.mk AND NOT DEFINED JNI_LIB_NAME)
# find local module name in Android.mk file to build native lib
file(STRINGS "${path}/jni/Android.mk" JNI_LIB_NAME REGEX "LOCAL_MODULE[ ]*:=[ ]*.*" )
string(REGEX REPLACE "LOCAL_MODULE[ ]*:=[ ]*([a-zA-Z_][a-zA-Z_0-9]*)[ ]*" "\\1" JNI_LIB_NAME "${JNI_LIB_NAME}")
# find using of native app glue to determine native activity
file(STRINGS "${path}/jni/Android.mk" NATIVE_APP_GLUE REGEX ".*(call import-module,android/native_app_glue)" )
if(JNI_LIB_NAME)
ocv_include_modules_recurse(${android_proj_NATIVE_DEPS})
ocv_include_directories("${path}/jni")
if (NATIVE_APP_GLUE)
include_directories(${ANDROID_NDK}/sources/android/native_app_glue)
list(APPEND android_proj_jni_files ${ANDROID_NDK}/sources/android/native_app_glue/android_native_app_glue.c)
set(android_proj_NATIVE_DEPS ${android_proj_NATIVE_DEPS} android)
endif()
add_library(${JNI_LIB_NAME} MODULE ${android_proj_jni_files})
target_link_libraries(${JNI_LIB_NAME} ${OPENCV_LINKER_LIBS} ${android_proj_NATIVE_DEPS})

@ -1,154 +1,104 @@
if(APPLE)
set(OPENCL_FOUND YES)
set(OPENCL_LIBRARIES "-framework OpenCL")
else()
set(OPENCL_LIBRARY "-framework OpenCL" CACHE STRING "OpenCL library")
set(OPENCL_INCLUDE_DIR "" CACHE STRING "OpenCL include directory")
mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)
else(APPLE)
find_package(OpenCL QUIET)
if(WITH_OPENCLAMDFFT)
set(CLAMDFFT_SEARCH_PATH $ENV{CLAMDFFT_PATH})
if(NOT CLAMDFFT_SEARCH_PATH)
if(WIN32)
set( CLAMDFFT_SEARCH_PATH "C:\\Program Files (x86)\\AMD\\clAmdFft" )
endif()
endif()
set( CLAMDFFT_INCLUDE_SEARCH_PATH ${CLAMDFFT_SEARCH_PATH}/include )
if(UNIX)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(CLAMDFFT_LIB_SEARCH_PATH /usr/lib)
else()
set(CLAMDFFT_LIB_SEARCH_PATH /usr/lib64)
endif()
else()
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(CLAMDFFT_LIB_SEARCH_PATH ${CLAMDFFT_SEARCH_PATH}\\lib32\\import)
else()
set(CLAMDFFT_LIB_SEARCH_PATH ${CLAMDFFT_SEARCH_PATH}\\lib64\\import)
endif()
if (NOT OPENCL_FOUND)
find_path(OPENCL_ROOT_DIR
NAMES OpenCL/cl.h CL/cl.h include/CL/cl.h include/nvidia-current/CL/cl.h
PATHS ENV OCLROOT ENV AMDAPPSDKROOT ENV CUDA_PATH ENV INTELOCLSDKROOT
DOC "OpenCL root directory"
NO_DEFAULT_PATH)
find_path(OPENCL_INCLUDE_DIR
NAMES OpenCL/cl.h CL/cl.h
HINTS ${OPENCL_ROOT_DIR}
PATH_SUFFIXES include include/nvidia-current
DOC "OpenCL include directory")
if (X86_64)
set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win64 lib/x86_64 lib/x64)
elseif (X86)
set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win32 lib/x86)
endif()
find_library(OPENCL_LIBRARY
NAMES OpenCL
HINTS ${OPENCL_ROOT_DIR}
PATH_SUFFIXES ${OPENCL_POSSIBLE_LIB_SUFFIXES}
DOC "OpenCL library")
mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(OPENCL DEFAULT_MSG OPENCL_LIBRARY OPENCL_INCLUDE_DIR )
endif()
endif(APPLE)
if(OPENCL_FOUND)
set(HAVE_OPENCL 1)
set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR})
set(OPENCL_LIBRARIES ${OPENCL_LIBRARY})
if (X86_64)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import)
elseif (X86)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import)
endif()
if(WITH_OPENCLAMDFFT)
find_path(CLAMDFFT_ROOT_DIR
NAMES include/clAmdFft.h
PATHS ENV CLAMDFFT_PATH ENV ProgramFiles
PATH_SUFFIXES clAmdFft AMD/clAmdFft
DOC "AMD FFT root directory"
NO_DEFAULT_PATH)
find_path(CLAMDFFT_INCLUDE_DIR
NAMES clAmdFft.h
PATHS ${CLAMDFFT_INCLUDE_SEARCH_PATH}
PATH_SUFFIXES clAmdFft
NO_DEFAULT_PATH)
NAMES clAmdFft.h
HINTS ${CLAMDFFT_ROOT_DIR}
PATH_SUFFIXES include
DOC "clAmdFft include directory")
find_library(CLAMDFFT_LIBRARY
NAMES clAmdFft.Runtime
PATHS ${CLAMDFFT_LIB_SEARCH_PATH}
NO_DEFAULT_PATH)
if(CLAMDFFT_LIBRARY)
set(CLAMDFFT_LIBRARIES ${CLAMDFFT_LIBRARY})
else()
set(CLAMDFFT_LIBRARIES "")
endif()
endif()
if(WITH_OPENCLAMDBLAS)
set(CLAMDBLAS_SEARCH_PATH $ENV{CLAMDBLAS_PATH})
if(NOT CLAMDBLAS_SEARCH_PATH)
if(WIN32)
set( CLAMDBLAS_SEARCH_PATH "C:\\Program Files (x86)\\AMD\\clAmdBlas" )
endif()
endif()
set( CLAMDBLAS_INCLUDE_SEARCH_PATH ${CLAMDBLAS_SEARCH_PATH}/include )
if(UNIX)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(CLAMDBLAS_LIB_SEARCH_PATH /usr/lib)
else()
set(CLAMDBLAS_LIB_SEARCH_PATH /usr/lib64)
endif()
else()
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(CLAMDBLAS_LIB_SEARCH_PATH ${CLAMDBLAS_SEARCH_PATH}\\lib32\\import)
else()
set(CLAMDBLAS_LIB_SEARCH_PATH ${CLAMDBLAS_SEARCH_PATH}\\lib64\\import)
endif()
endif()
find_path(CLAMDBLAS_INCLUDE_DIR
NAMES clAmdBlas.h
PATHS ${CLAMDBLAS_INCLUDE_SEARCH_PATH}
PATH_SUFFIXES clAmdBlas
NO_DEFAULT_PATH)
find_library(CLAMDBLAS_LIBRARY
NAMES clAmdBlas
PATHS ${CLAMDBLAS_LIB_SEARCH_PATH}
NO_DEFAULT_PATH)
if(CLAMDBLAS_LIBRARY)
set(CLAMDBLAS_LIBRARIES ${CLAMDBLAS_LIBRARY})
else()
set(CLAMDBLAS_LIBRARIES "")
NAMES clAmdFft.Runtime
HINTS ${CLAMDFFT_ROOT_DIR}
PATH_SUFFIXES ${CLAMD_POSSIBLE_LIB_SUFFIXES}
DOC "clAmdFft library")
if(CLAMDFFT_LIBRARY AND CLAMDFFT_INCLUDE_DIR)
set(HAVE_CLAMDFFT 1)
list(APPEND OPENCL_INCLUDE_DIRS "${CLAMDFFT_INCLUDE_DIR}")
list(APPEND OPENCL_LIBRARIES "${CLAMDFFT_LIBRARY}")
endif()
endif()
# Try AMD/ATI Stream SDK
if (NOT OPENCL_FOUND)
set(ENV_AMDSTREAMSDKROOT $ENV{AMDAPPSDKROOT})
set(ENV_AMDAPPSDKROOT $ENV{AMDAPPSDKROOT})
set(ENV_OPENCLROOT $ENV{OPENCLROOT})
set(ENV_CUDA_PATH $ENV{CUDA_PATH})
set(ENV_INTELOCLSDKROOT $ENV{INTELOCLSDKROOT})
if(ENV_AMDSTREAMSDKROOT)
set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_AMDAPPSDKROOT}/include)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_AMDAPPSDKROOT}/lib/x86)
else()
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_AMDAPPSDKROOT}/lib/x86_64)
endif()
elseif(ENV_AMDSTREAMSDKROOT)
set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_AMDSTREAMSDKROOT}/include)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_AMDSTREAMSDKROOT}/lib/x86)
else()
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_AMDSTREAMSDKROOT}/lib/x86_64)
endif()
elseif(ENV_CUDA_PATH AND WIN32)
set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_CUDA_PATH}/include)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_CUDA_PATH}/lib/Win32)
else()
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_CUDA_PATH}/lib/x64)
endif()
elseif(ENV_OPENCLROOT AND UNIX)
set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_OPENCLROOT}/inc)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} /usr/lib)
else()
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} /usr/lib64)
endif()
elseif(ENV_INTELOCLSDKROOT)
set(OPENCL_INCLUDE_SEARCH_PATH ${ENV_INTELOCLSDKROOT}/include)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_INTELOCLSDKROOT}/lib/x86)
else()
set(OPENCL_LIB_SEARCH_PATH ${OPENCL_LIB_SEARCH_PATH} ${ENV_INTELOCLSDKROOT}/lib/x64)
endif()
endif()
if(OPENCL_INCLUDE_SEARCH_PATH)
find_path(OPENCL_INCLUDE_DIR
NAMES CL/cl.h OpenCL/cl.h
PATHS ${OPENCL_INCLUDE_SEARCH_PATH}
NO_DEFAULT_PATH)
else()
find_path(OPENCL_INCLUDE_DIR
NAMES CL/cl.h OpenCL/cl.h)
endif()
if(WITH_OPENCLAMDBLAS)
find_path(CLAMDBLAS_ROOT_DIR
NAMES include/clAmdBlas.h
PATHS ENV CLAMDFFT_PATH ENV ProgramFiles
PATH_SUFFIXES clAmdBlas AMD/clAmdBlas
DOC "AMD FFT root directory"
NO_DEFAULT_PATH)
if(OPENCL_LIB_SEARCH_PATH)
find_library(OPENCL_LIBRARY NAMES OpenCL PATHS ${OPENCL_LIB_SEARCH_PATH} NO_DEFAULT_PATH)
else()
find_library(OPENCL_LIBRARY NAMES OpenCL)
endif()
find_path(CLAMDBLAS_INCLUDE_DIR
NAMES clAmdBlas.h
HINTS ${CLAMDBLAS_ROOT_DIR}
PATH_SUFFIXES include
DOC "clAmdFft include directory")
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
OPENCL
DEFAULT_MSG
OPENCL_LIBRARY OPENCL_INCLUDE_DIR
)
find_library(CLAMDBLAS_LIBRARY
NAMES clAmdBlas
HINTS ${CLAMDBLAS_ROOT_DIR}
PATH_SUFFIXES ${CLAMD_POSSIBLE_LIB_SUFFIXES}
DOC "clAmdBlas library")
if(OPENCL_FOUND)
set(OPENCL_LIBRARIES ${OPENCL_LIBRARY})
set(HAVE_OPENCL 1)
else()
set(OPENCL_LIBRARIES)
if(CLAMDBLAS_LIBRARY AND CLAMDBLAS_INCLUDE_DIR)
set(HAVE_CLAMDBLAS 1)
list(APPEND OPENCL_INCLUDE_DIRS "${CLAMDBLAS_INCLUDE_DIR}")
list(APPEND OPENCL_LIBRARIES "${CLAMDBLAS_LIBRARY}")
endif()
else()
set(HAVE_OPENCL 1)
endif()
endif()

@ -444,6 +444,18 @@ macro(ocv_glob_module_sources)
source_group("Src\\Cuda" FILES ${lib_device_srcs} ${lib_device_hdrs})
endif()
file(GLOB cl_kernels "src/opencl/*.cl")
if(HAVE_OPENCL AND cl_kernels)
ocv_include_directories(${OPENCL_INCLUDE_DIRS})
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp"
COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake")
source_group("Src\\OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
endif()
ocv_set_module_sources(${ARGN} HEADERS ${lib_hdrs} ${lib_hdrs_detail}
SOURCES ${lib_srcs} ${lib_int_hdrs} ${device_objs} ${lib_device_srcs} ${lib_device_hdrs})
@ -465,6 +477,9 @@ macro(ocv_create_module)
if (HAVE_CUDA)
target_link_libraries(${the_module} ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
endif()
if(HAVE_OPENCL AND OPENCL_LIBRARIES)
target_link_libraries(${the_module} ${OPENCL_LIBRARIES})
endif()
endif()
add_dependencies(opencv_modules ${the_module})

@ -195,9 +195,9 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
d1 = _mm_sub_epi16(d1, c1);
__m128i c2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x - 1)), z);
__m128i c3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x - 1)), z);
__m128i c3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x - 1)), z);
__m128i d2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x + 1)), z);
__m128i d3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x + 1)), z);
__m128i d3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x + 1)), z);
d2 = _mm_sub_epi16(d2, c2);
d3 = _mm_sub_epi16(d3, c3);

@ -531,12 +531,12 @@ template<> inline int VBLAS<double>::givensx(double* a, double* b, int n, double
#endif
template<typename _Tp> void
JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep, int m, int n, int n1, double minval)
JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
int m, int n, int n1, double minval, _Tp eps)
{
VBLAS<_Tp> vblas;
AutoBuffer<double> Wbuf(n);
double* W = Wbuf;
_Tp eps = DBL_EPSILON*10;
int i, j, k, iter, max_iter = std::max(m, 30);
_Tp c, s;
double sd;
@ -729,12 +729,12 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep, int m, int
static void JacobiSVD(float* At, size_t astep, float* W, float* Vt, size_t vstep, int m, int n, int n1=-1)
{
JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN);
JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN, FLT_EPSILON*2);
}
static void JacobiSVD(double* At, size_t astep, double* W, double* Vt, size_t vstep, int m, int n, int n1=-1)
{
JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN);
JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN, DBL_EPSILON*10);
}
/* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */

@ -2599,6 +2599,35 @@ TEST(Core_Trace, accuracy) { Core_TraceTest test; test.safe_run(); }
TEST(Core_SolvePoly, accuracy) { Core_SolvePolyTest test; test.safe_run(); }
TEST(Core_Phase, accuracy) { Core_PhaseTest test; test.safe_run(); }
TEST(Core_SVD, flt)
{
float a[] = {
1.23377746e+011f, -7.05490125e+010f, -4.18380882e+010f, -11693456.f,
-39091328.f, 77492224.f, -7.05490125e+010f, 2.36211143e+011f,
-3.51093473e+010f, 70773408.f, -4.83386156e+005f, -129560368.f,
-4.18380882e+010f, -3.51093473e+010f, 9.25311222e+010f, -49052424.f,
43922752.f, 12176842.f, -11693456.f, 70773408.f, -49052424.f, 8.40836094e+004f,
5.17475293e+003f, -1.16122949e+004f, -39091328.f, -4.83386156e+005f,
43922752.f, 5.17475293e+003f, 5.16047969e+004f, 5.68887842e+003f, 77492224.f,
-129560368.f, 12176842.f, -1.16122949e+004f, 5.68887842e+003f,
1.28060578e+005f
};
float b[] = {
283751232.f, 2.61604198e+009f, -745033216.f, 2.31125625e+005f,
-4.52429188e+005f, -1.37596525e+006f
};
Mat A(6, 6, CV_32F, a);
Mat B(6, 1, CV_32F, b);
Mat X, B1;
solve(A, B, X, DECOMP_SVD);
B1 = A*X;
EXPECT_LE(norm(B1, B, NORM_L2 + NORM_RELATIVE), FLT_EPSILON*10);
}
// TODO: eigenvv, invsqrt, cbrt, fastarctan, (round, floor, ceil(?)),

@ -3,7 +3,7 @@ if(ANDROID OR IOS)
endif()
set(the_description "GPU-accelerated Computer Vision")
ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_nonfree opencv_photo opencv_legacy)
ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy)
ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")

@ -5,109 +5,6 @@ Feature Detection and Description
gpu::SURF_GPU
-------------
.. ocv:class:: gpu::SURF_GPU
Class used for extracting Speeded Up Robust Features (SURF) from an image. ::
class SURF_GPU
{
public:
enum KeypointLayout
{
X_ROW = 0,
Y_ROW,
LAPLACIAN_ROW,
OCTAVE_ROW,
SIZE_ROW,
ANGLE_ROW,
HESSIAN_ROW,
ROWS_COUNT
};
//! the default constructor
SURF_GPU();
//! the full constructor taking all the necessary parameters
explicit SURF_GPU(double _hessianThreshold, int _nOctaves=4,
int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f);
//! returns the descriptor size in float's (64 or 128)
int descriptorSize() const;
//! upload host keypoints to device memory
void uploadKeypoints(const vector<KeyPoint>& keypoints,
GpuMat& keypointsGPU);
//! download keypoints from device to host memory
void downloadKeypoints(const GpuMat& keypointsGPU,
vector<KeyPoint>& keypoints);
//! download descriptors from device to host memory
void downloadDescriptors(const GpuMat& descriptorsGPU,
vector<float>& descriptors);
void operator()(const GpuMat& img, const GpuMat& mask,
GpuMat& keypoints);
void operator()(const GpuMat& img, const GpuMat& mask,
GpuMat& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints = false,
bool calcOrientation = true);
void operator()(const GpuMat& img, const GpuMat& mask,
std::vector<KeyPoint>& keypoints);
void operator()(const GpuMat& img, const GpuMat& mask,
std::vector<KeyPoint>& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints = false,
bool calcOrientation = true);
void operator()(const GpuMat& img, const GpuMat& mask,
std::vector<KeyPoint>& keypoints,
std::vector<float>& descriptors,
bool useProvidedKeypoints = false,
bool calcOrientation = true);
void releaseMemory();
// SURF parameters
double hessianThreshold;
int nOctaves;
int nOctaveLayers;
bool extended;
bool upright;
//! max keypoints = keypointsRatio * img.size().area()
float keypointsRatio;
GpuMat sum, mask1, maskSum, intBuffer;
GpuMat det, trace;
GpuMat maxPosBuffer;
};
The class ``SURF_GPU`` implements Speeded Up Robust Features descriptor. There is a fast multi-scale Hessian keypoint detector that can be used to find the keypoints (which is the default option). But the descriptors can also be computed for the user-specified keypoints. Only 8-bit grayscale images are supported.
The class ``SURF_GPU`` can store results in the GPU and CPU memory. It provides functions to convert results between CPU and GPU version ( ``uploadKeypoints``, ``downloadKeypoints``, ``downloadDescriptors`` ). The format of CPU results is the same as ``SURF`` results. GPU results are stored in ``GpuMat``. The ``keypoints`` matrix is :math:`\texttt{nFeatures} \times 7` matrix with the ``CV_32FC1`` type.
* ``keypoints.ptr<float>(X_ROW)[i]`` contains x coordinate of the i-th feature.
* ``keypoints.ptr<float>(Y_ROW)[i]`` contains y coordinate of the i-th feature.
* ``keypoints.ptr<float>(LAPLACIAN_ROW)[i]`` contains the laplacian sign of the i-th feature.
* ``keypoints.ptr<float>(OCTAVE_ROW)[i]`` contains the octave of the i-th feature.
* ``keypoints.ptr<float>(SIZE_ROW)[i]`` contains the size of the i-th feature.
* ``keypoints.ptr<float>(ANGLE_ROW)[i]`` contain orientation of the i-th feature.
* ``keypoints.ptr<float>(HESSIAN_ROW)[i]`` contains the response of the i-th feature.
The ``descriptors`` matrix is :math:`\texttt{nFeatures} \times \texttt{descriptorSize}` matrix with the ``CV_32FC1`` type.
The class ``SURF_GPU`` uses some buffers and provides access to it. All buffers can be safely released between function calls.
.. seealso:: :ocv:class:`SURF`
gpu::FAST_GPU
-------------
.. ocv:class:: gpu::FAST_GPU

@ -579,76 +579,6 @@ Releases all inner buffer's memory.
gpu::VIBE_GPU
-------------
.. ocv:class:: gpu::VIBE_GPU
Class used for background/foreground segmentation. ::
class VIBE_GPU
{
public:
explicit VIBE_GPU(unsigned long rngSeed = 1234567);
void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null());
void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null());
void release();
...
};
The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [VIBE2011]_.
gpu::VIBE_GPU::VIBE_GPU
-----------------------
The constructor.
.. ocv:function:: gpu::VIBE_GPU::VIBE_GPU(unsigned long rngSeed = 1234567)
:param rngSeed: Value used to initiate a random sequence.
Default constructor sets all parameters to default values.
gpu::VIBE_GPU::initialize
-------------------------
Initialize background model and allocates all inner buffers.
.. ocv:function:: void gpu::VIBE_GPU::initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null())
:param firstFrame: First frame from video sequence.
:param stream: Stream for the asynchronous version.
gpu::VIBE_GPU::operator()
-------------------------
Updates the background model and returns the foreground mask
.. ocv:function:: void gpu::VIBE_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null())
:param frame: Next video frame.
:param fgmask: The output foreground mask as an 8-bit binary image.
:param stream: Stream for the asynchronous version.
gpu::VIBE_GPU::release
----------------------
Releases all inner buffer's memory.
.. ocv:function:: void gpu::VIBE_GPU::release()
gpu::GMG_GPU
------------
.. ocv:class:: gpu::GMG_GPU
@ -1209,5 +1139,4 @@ Parse next video frame. Implementation must call this method after new frame was
.. [MOG2001] P. KadewTraKuPong and R. Bowden. *An improved adaptive background mixture model for real-time tracking with shadow detection*. Proc. 2nd European Workshop on Advanced Video-Based Surveillance Systems, 2001
.. [MOG2004] Z. Zivkovic. *Improved adaptive Gausian mixture model for background subtraction*. International Conference Pattern Recognition, UK, August, 2004
.. [ShadowDetect2003] Prati, Mikic, Trivedi and Cucchiarra. *Detecting Moving Shadows...*. IEEE PAMI, 2003
.. [VIBE2011] O. Barnich and M. Van D Roogenbroeck. *ViBe: A universal background subtraction algorithm for video sequences*. IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011
.. [GMG2012] A. Godbehere, A. Matsukawa and K. Goldberg. *Visual Tracking of Human Visitors under Variable-Lighting Conditions for a Responsive Audio Art Installation*. American Control Conference, Montreal, June 2012

@ -491,6 +491,26 @@ CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat&
//! converts image from one color space to another
CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null());
enum
{
// Bayer Demosaicing (Malvar, He, and Cutler)
COLOR_BayerBG2BGR_MHT = 256,
COLOR_BayerGB2BGR_MHT = 257,
COLOR_BayerRG2BGR_MHT = 258,
COLOR_BayerGR2BGR_MHT = 259,
COLOR_BayerBG2RGB_MHT = COLOR_BayerRG2BGR_MHT,
COLOR_BayerGB2RGB_MHT = COLOR_BayerGR2BGR_MHT,
COLOR_BayerRG2RGB_MHT = COLOR_BayerBG2BGR_MHT,
COLOR_BayerGR2RGB_MHT = COLOR_BayerGB2BGR_MHT,
COLOR_BayerBG2GRAY_MHT = 260,
COLOR_BayerGB2GRAY_MHT = 261,
COLOR_BayerRG2GRAY_MHT = 262,
COLOR_BayerGR2GRAY_MHT = 263
};
CV_EXPORTS void demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn = -1, Stream& stream = Stream::Null());
//! swap channels
//! dstOrder - Integer array describing how channel values are permutated. The n-th entry
//! of the array contains the number of the channel that is stored in the n-th channel of
@ -894,9 +914,11 @@ CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels
//! Calculates histogram for 8u one channel image
//! Output hist will have one row, 256 cols and CV32SC1 type.
CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null());
CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
//! normalizes the grayscale image brightness and contrast by normalizing its histogram
CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream = Stream::Null());
CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
//////////////////////////////// StereoBM_GPU ////////////////////////////////
@ -1386,82 +1408,6 @@ private:
friend class CascadeClassifier_GPU_LBP;
};
////////////////////////////////// SURF //////////////////////////////////////////
class CV_EXPORTS SURF_GPU
{
public:
enum KeypointLayout
{
X_ROW = 0,
Y_ROW,
LAPLACIAN_ROW,
OCTAVE_ROW,
SIZE_ROW,
ANGLE_ROW,
HESSIAN_ROW,
ROWS_COUNT
};
//! the default constructor
SURF_GPU();
//! the full constructor taking all the necessary parameters
explicit SURF_GPU(double _hessianThreshold, int _nOctaves=4,
int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false);
//! returns the descriptor size in float's (64 or 128)
int descriptorSize() const;
//! upload host keypoints to device memory
static void uploadKeypoints(const std::vector<KeyPoint>& keypoints, GpuMat& keypointsGPU);
//! download keypoints from device to host memory
static void downloadKeypoints(const GpuMat& keypointsGPU, std::vector<KeyPoint>& keypoints);
//! download descriptors from device to host memory
static void downloadDescriptors(const GpuMat& descriptorsGPU, std::vector<float>& descriptors);
//! finds the keypoints using fast hessian detector used in SURF
//! supports CV_8UC1 images
//! keypoints will have nFeature cols and 6 rows
//! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
//! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
//! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
//! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
//! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
//! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
//! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints);
//! finds the keypoints and computes their descriptors.
//! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints = false);
void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints = false);
void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, std::vector<float>& descriptors,
bool useProvidedKeypoints = false);
void releaseMemory();
// SURF parameters
double hessianThreshold;
int nOctaves;
int nOctaveLayers;
bool extended;
bool upright;
//! max keypoints = min(keypointsRatio * img.size().area(), 65535)
float keypointsRatio;
GpuMat sum, mask1, maskSum, intBuffer;
GpuMat det, trace;
GpuMat maxPosBuffer;
};
////////////////////////////////// FAST //////////////////////////////////////////
class CV_EXPORTS FAST_GPU
@ -2129,41 +2075,6 @@ private:
GpuMat bgmodelUsedModes_; //keep track of number of modes per pixel
};
/*!
* The class implements the following algorithm:
* "ViBe: A universal background subtraction algorithm for video sequences"
* O. Barnich and M. Van D Roogenbroeck
* IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011
*/
class CV_EXPORTS VIBE_GPU
{
public:
//! the default constructor
explicit VIBE_GPU(unsigned long rngSeed = 1234567);
//! re-initiaization method
void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null());
//! the update operator
void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null());
//! releases all inner buffers
void release();
int nbSamples; // number of samples per pixel
int reqMatches; // #_min
int radius; // R
int subsamplingFactor; // amount of random subsampling
private:
Size frameSize_;
unsigned long rngSeed_;
GpuMat randStates_;
GpuMat samples_;
};
/**
* Background Subtractor module. Takes a series of images and returns a sequence of mask (8UC1)
* images of the same size, where 255 indicates Foreground and 0 represents Background.

@ -0,0 +1,910 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2010-2013, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of NVIDIA Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __OPENCV_GPU_SIMD_FUNCTIONS_HPP__
#define __OPENCV_GPU_SIMD_FUNCTIONS_HPP__
#include "common.hpp"
/*
This header file contains inline functions that implement intra-word SIMD
operations, that are hardware accelerated on sm_3x (Kepler) GPUs. Efficient
emulation code paths are provided for earlier architectures (sm_1x, sm_2x)
to make the code portable across all GPUs supported by CUDA. The following
functions are currently implemented:
vadd2(a,b) per-halfword unsigned addition, with wrap-around: a + b
vsub2(a,b) per-halfword unsigned subtraction, with wrap-around: a - b
vabsdiff2(a,b) per-halfword unsigned absolute difference: |a - b|
vavg2(a,b) per-halfword unsigned average: (a + b) / 2
vavrg2(a,b) per-halfword unsigned rounded average: (a + b + 1) / 2
vseteq2(a,b) per-halfword unsigned comparison: a == b ? 1 : 0
vcmpeq2(a,b) per-halfword unsigned comparison: a == b ? 0xffff : 0
vsetge2(a,b) per-halfword unsigned comparison: a >= b ? 1 : 0
vcmpge2(a,b) per-halfword unsigned comparison: a >= b ? 0xffff : 0
vsetgt2(a,b) per-halfword unsigned comparison: a > b ? 1 : 0
vcmpgt2(a,b) per-halfword unsigned comparison: a > b ? 0xffff : 0
vsetle2(a,b) per-halfword unsigned comparison: a <= b ? 1 : 0
vcmple2(a,b) per-halfword unsigned comparison: a <= b ? 0xffff : 0
vsetlt2(a,b) per-halfword unsigned comparison: a < b ? 1 : 0
vcmplt2(a,b) per-halfword unsigned comparison: a < b ? 0xffff : 0
vsetne2(a,b) per-halfword unsigned comparison: a != b ? 1 : 0
vcmpne2(a,b) per-halfword unsigned comparison: a != b ? 0xffff : 0
vmax2(a,b) per-halfword unsigned maximum: max(a, b)
vmin2(a,b) per-halfword unsigned minimum: min(a, b)
vadd4(a,b) per-byte unsigned addition, with wrap-around: a + b
vsub4(a,b) per-byte unsigned subtraction, with wrap-around: a - b
vabsdiff4(a,b) per-byte unsigned absolute difference: |a - b|
vavg4(a,b) per-byte unsigned average: (a + b) / 2
vavrg4(a,b) per-byte unsigned rounded average: (a + b + 1) / 2
vseteq4(a,b) per-byte unsigned comparison: a == b ? 1 : 0
vcmpeq4(a,b) per-byte unsigned comparison: a == b ? 0xff : 0
vsetge4(a,b) per-byte unsigned comparison: a >= b ? 1 : 0
vcmpge4(a,b) per-byte unsigned comparison: a >= b ? 0xff : 0
vsetgt4(a,b) per-byte unsigned comparison: a > b ? 1 : 0
vcmpgt4(a,b) per-byte unsigned comparison: a > b ? 0xff : 0
vsetle4(a,b) per-byte unsigned comparison: a <= b ? 1 : 0
vcmple4(a,b) per-byte unsigned comparison: a <= b ? 0xff : 0
vsetlt4(a,b) per-byte unsigned comparison: a < b ? 1 : 0
vcmplt4(a,b) per-byte unsigned comparison: a < b ? 0xff : 0
vsetne4(a,b) per-byte unsigned comparison: a != b ? 1: 0
vcmpne4(a,b) per-byte unsigned comparison: a != b ? 0xff: 0
vmax4(a,b) per-byte unsigned maximum: max(a, b)
vmin4(a,b) per-byte unsigned minimum: min(a, b)
*/
namespace cv { namespace gpu { namespace device
{
// 2
static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = a ^ b; // sum bits
r = a + b; // actual sum
s = s ^ r; // determine carry-ins for each bit position
s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
r = r - s; // subtract out carry-out from low word
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = a ^ b; // sum bits
r = a - b; // actual sum
s = s ^ r; // determine carry-ins for each bit position
s = s & 0x00010000; // borrow to high word
r = r + s; // compensate for borrow from low word
#endif
return r;
}
static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t, u, v;
s = a & 0x0000ffff; // extract low halfword
r = b & 0x0000ffff; // extract low halfword
u = ::max(r, s); // maximum of low halfwords
v = ::min(r, s); // minimum of low halfwords
s = a & 0xffff0000; // extract high halfword
r = b & 0xffff0000; // extract high halfword
t = ::max(r, s); // maximum of high halfwords
s = ::min(r, s); // minimum of high halfwords
r = u | t; // maximum of both halfwords
s = v | s; // minimum of both halfwords
r = r - s; // |a - b| = max(a,b) - min(a,b);
#endif
return r;
}
static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b)
{
unsigned int r, s;
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
s = a ^ b;
r = a & b;
s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
s = s >> 1;
s = r + s;
return s;
}
static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
unsigned int s;
s = a ^ b;
r = a | b;
s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
s = s >> 1;
r = r - s;
#endif
return r;
}
static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
unsigned int c;
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r & ~c; // msb = 1, if r was 0x0000
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vseteq2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r & ~c; // msb = 1, if r was 0x0000
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetge2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(b));
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80008000; // msbs = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetgt2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80008000; // msbs = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetle2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetlt2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80008000; // msb = carry-outs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
unsigned int c;
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r | c; // msb = 1, if r was not 0x0000
c = c & 0x80008000; // extract msbs
r = c >> 15; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetne2(a, b);
c = r << 16; // convert bool
r = c - r; // into mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
r = a ^ b; // 0x0000 if a == b
c = r | 0x80008000; // set msbs, to catch carry out
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
c = r | c; // msb = 1, if r was not 0x0000
c = c & 0x80008000; // extract msbs
r = c >> 15; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t, u;
r = a & 0x0000ffff; // extract low halfword
s = b & 0x0000ffff; // extract low halfword
t = ::max(r, s); // maximum of low halfwords
r = a & 0xffff0000; // extract high halfword
s = b & 0xffff0000; // extract high halfword
u = ::max(r, s); // maximum of high halfwords
r = t | u; // combine halfword maximums
#endif
return r;
}
static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t, u;
r = a & 0x0000ffff; // extract low halfword
s = b & 0x0000ffff; // extract low halfword
t = ::min(r, s); // minimum of low halfwords
r = a & 0xffff0000; // extract high halfword
s = b & 0xffff0000; // extract high halfword
u = ::min(r, s); // minimum of high halfwords
r = t | u; // combine halfword minimums
#endif
return r;
}
// 4
static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t;
s = a ^ b; // sum bits
r = a & 0x7f7f7f7f; // clear msbs
t = b & 0x7f7f7f7f; // clear msbs
s = s & 0x80808080; // msb sum bits
r = r + t; // add without msbs, record carry-out in msbs
r = r ^ s; // sum of msb sum and carry-in bits, w/o carry-out
#endif /* __CUDA_ARCH__ >= 300 */
return r;
}
static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s, t;
s = a ^ ~b; // inverted sum bits
r = a | 0x80808080; // set msbs
t = b & 0x7f7f7f7f; // clear msbs
s = s & 0x80808080; // inverted msb sum bits
r = r - t; // subtract w/o msbs, record inverted borrows in msb
r = r ^ s; // combine inverted msb sum bits and borrows
#endif
return r;
}
static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b)
{
unsigned int r, s;
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
s = a ^ b;
r = a & b;
s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
s = s >> 1;
s = r + s;
return s;
}
static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
unsigned int c;
c = a ^ b;
r = a | b;
c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
c = c >> 1;
r = r - c;
#endif
return r;
}
static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
unsigned int c;
r = a ^ b; // 0x00 if a == b
c = r | 0x80808080; // set msbs, to catch carry out
r = r ^ c; // extract msbs, msb = 1 if r < 0x80
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
c = r & ~c; // msb = 1, if r was 0x00
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b)
{
unsigned int r, t;
#if __CUDA_ARCH__ >= 300
r = vseteq4(a, b);
t = r << 8; // convert bool
r = t - r; // to mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
t = a ^ b; // 0x00 if a == b
r = t | 0x80808080; // set msbs, to catch carry out
t = t ^ r; // extract msbs, msb = 1 if t < 0x80
r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
r = t & ~r; // msb = 1, if t was 0x00
t = r >> 7; // build mask
t = r - t; // from
r = t | r; // msbs
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetle4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
c = c & 0x80808080; // msbs = carry-outs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetlt4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
asm("not.b32 %0, %0;" : "+r"(a));
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
c = c & 0x80808080; // msbs = carry-outs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavrg4(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b)
{
unsigned int r, s;
#if __CUDA_ARCH__ >= 300
r = vsetge4(a, b);
s = r << 8; // convert bool
r = s - r; // to mask
#else
asm ("not.b32 %0,%0;" : "+r"(b));
r = vavrg4 (a, b); // (a + ~b + 1) / 2 = (a - b) / 2
r = r & 0x80808080; // msb = carry-outs
s = r >> 7; // build mask
s = r - s; // from
r = s | r; // msbs
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int c;
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetgt4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
asm("not.b32 %0, %0;" : "+r"(b));
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
c = c & 0x80808080; // msb = carry-outs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
unsigned int c;
r = a ^ b; // 0x00 if a == b
c = r | 0x80808080; // set msbs, to catch carry out
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
c = r | c; // msb = 1, if r was not 0x00
c = c & 0x80808080; // extract msbs
r = c >> 7; // convert to bool
#endif
return r;
}
static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b)
{
unsigned int r, c;
#if __CUDA_ARCH__ >= 300
r = vsetne4(a, b);
c = r << 8; // convert bool
r = c - r; // to mask
#else
// inspired by Alan Mycroft's null-byte detection algorithm:
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
r = a ^ b; // 0x00 if a == b
c = r | 0x80808080; // set msbs, to catch carry out
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
c = r | c; // msb = 1, if r was not 0x00
c = c & 0x80808080; // extract msbs
r = c >> 7; // convert
r = c - r; // msbs to
r = c | r; // mask
#endif
return r;
}
static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = vcmpge4(a, b); // mask = 0xff if a >= b
r = a ^ b; //
s = (r & s) ^ b; // select a when a >= b, else select b => max(a,b)
r = s ^ r; // select a when b >= a, else select b => min(a,b)
r = s - r; // |a - b| = max(a,b) - min(a,b);
#endif
return r;
}
static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = vcmpge4(a, b); // mask = 0xff if a >= b
r = a & s; // select a when b >= a
s = b & ~s; // select b when b < a
r = r | s; // combine byte selections
#endif
return r; // byte-wise unsigned maximum
}
static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b)
{
unsigned int r = 0;
#if __CUDA_ARCH__ >= 300
asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#elif __CUDA_ARCH__ >= 200
asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
#else
unsigned int s;
s = vcmpge4(b, a); // mask = 0xff if a >= b
r = a & s; // select a when b >= a
s = b & ~s; // select b when b < a
r = r | s; // combine byte selections
#endif
return r;
}
}}}
#endif // __OPENCV_GPU_SIMD_FUNCTIONS_HPP__

@ -7,7 +7,7 @@
// copy or use the software.
//
//
// License Agreement
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.

@ -2,6 +2,7 @@
using namespace std;
using namespace testing;
using namespace perf;
//////////////////////////////////////////////////////////////////////
// StereoBM
@ -12,7 +13,7 @@ DEF_PARAM_TEST_1(ImagePair, pair_string);
PERF_TEST_P(ImagePair, Calib3D_StereoBM,
Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
{
declare.time(5.0);
declare.time(300.0);
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgLeft.empty());
@ -53,7 +54,7 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBM,
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation,
Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
{
declare.time(10.0);
declare.time(300.0);
const cv::Mat imgLeft = readImage(GET_PARAM(0));
ASSERT_FALSE(imgLeft.empty());
@ -87,7 +88,7 @@ PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation,
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP,
Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
{
declare.time(10.0);
declare.time(300.0);
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgLeft.empty());

@ -1748,7 +1748,10 @@ PERF_TEST_P(Sz_Depth_Norm, Core_Norm,
const int normType = GET_PARAM(2);
cv::Mat src(size, depth);
declare.in(src, WARMUP_RNG);
if (depth == CV_8U)
cv::randu(src, 0, 254);
else
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
@ -1923,7 +1926,10 @@ PERF_TEST_P(Sz_Depth, Core_MinMax,
const int depth = GET_PARAM(1);
cv::Mat src(size, depth);
declare.in(src, WARMUP_RNG);
if (depth == CV_8U)
cv::randu(src, 0, 254);
else
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
@ -1958,7 +1964,10 @@ PERF_TEST_P(Sz_Depth, Core_MinMaxLoc,
const int depth = GET_PARAM(1);
cv::Mat src(size, depth);
declare.in(src, WARMUP_RNG);
if (depth == CV_8U)
cv::randu(src, 0, 254);
else
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{

@ -2,6 +2,7 @@
using namespace std;
using namespace testing;
using namespace perf;
#define GPU_DENOISING_IMAGE_SIZES testing::Values(perf::szVGA, perf::sz720p)
@ -63,7 +64,7 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
Values(21),
Values(5)))
{
declare.time(60.0);
declare.time(600.0);
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);

@ -2,105 +2,7 @@
using namespace std;
using namespace testing;
struct KeypointIdxCompare
{
std::vector<cv::KeyPoint>* keypoints;
explicit KeypointIdxCompare(std::vector<cv::KeyPoint>* _keypoints) : keypoints(_keypoints) {}
bool operator ()(size_t i1, size_t i2) const
{
cv::KeyPoint kp1 = (*keypoints)[i1];
cv::KeyPoint kp2 = (*keypoints)[i2];
if (kp1.pt.x != kp2.pt.x)
return kp1.pt.x < kp2.pt.x;
if (kp1.pt.y != kp2.pt.y)
return kp1.pt.y < kp2.pt.y;
if (kp1.response != kp2.response)
return kp1.response < kp2.response;
return kp1.octave < kp2.octave;
}
};
static void sortKeyPoints(std::vector<cv::KeyPoint>& keypoints, cv::InputOutputArray _descriptors = cv::noArray())
{
std::vector<size_t> indexies(keypoints.size());
for (size_t i = 0; i < indexies.size(); ++i)
indexies[i] = i;
std::sort(indexies.begin(), indexies.end(), KeypointIdxCompare(&keypoints));
std::vector<cv::KeyPoint> new_keypoints;
cv::Mat new_descriptors;
new_keypoints.resize(keypoints.size());
cv::Mat descriptors;
if (_descriptors.needed())
{
descriptors = _descriptors.getMat();
new_descriptors.create(descriptors.size(), descriptors.type());
}
for (size_t i = 0; i < indexies.size(); ++i)
{
size_t new_idx = indexies[i];
new_keypoints[i] = keypoints[new_idx];
if (!new_descriptors.empty())
descriptors.row((int) new_idx).copyTo(new_descriptors.row((int) i));
}
keypoints.swap(new_keypoints);
if (_descriptors.needed())
new_descriptors.copyTo(_descriptors);
}
//////////////////////////////////////////////////////////////////////
// SURF
DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, Features2D_SURF,
Values<string>("gpu/perf/aloe.png"))
{
declare.time(50.0);
const cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::SURF_GPU d_surf;
const cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints, d_descriptors;
TEST_CYCLE() d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
std::vector<cv::KeyPoint> gpu_keypoints;
d_surf.downloadKeypoints(d_keypoints, gpu_keypoints);
cv::Mat gpu_descriptors(d_descriptors);
sortKeyPoints(gpu_keypoints, gpu_descriptors);
SANITY_CHECK_KEYPOINTS(gpu_keypoints);
SANITY_CHECK(gpu_descriptors, 1e-3);
}
else
{
cv::SURF surf;
std::vector<cv::KeyPoint> cpu_keypoints;
cv::Mat cpu_descriptors;
TEST_CYCLE() surf(img, cv::noArray(), cpu_keypoints, cpu_descriptors);
SANITY_CHECK_KEYPOINTS(cpu_keypoints);
SANITY_CHECK(cpu_descriptors);
}
}
using namespace perf;
//////////////////////////////////////////////////////////////////////
// FAST
@ -153,6 +55,8 @@ PERF_TEST_P(Image_NFeatures, Features2D_ORB,
Combine(Values<string>("gpu/perf/aloe.png"),
Values(4000)))
{
declare.time(300.0);
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());

@ -2,6 +2,7 @@
using namespace std;
using namespace testing;
using namespace perf;
//////////////////////////////////////////////////////////////////////
// Blur

@ -632,7 +632,7 @@ DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, ImgProc_MeanShiftFiltering,
Values<string>("gpu/meanshift/cones.png"))
{
declare.time(15.0);
declare.time(300.0);
const cv::Mat img = readImage(GetParam());
ASSERT_FALSE(img.empty());
@ -668,7 +668,7 @@ PERF_TEST_P(Image, ImgProc_MeanShiftFiltering,
PERF_TEST_P(Image, ImgProc_MeanShiftProc,
Values<string>("gpu/meanshift/cones.png"))
{
declare.time(5.0);
declare.time(300.0);
const cv::Mat img = readImage(GetParam());
ASSERT_FALSE(img.empty());
@ -702,7 +702,7 @@ PERF_TEST_P(Image, ImgProc_MeanShiftProc,
PERF_TEST_P(Image, ImgProc_MeanShiftSegmentation,
Values<string>("gpu/meanshift/cones.png"))
{
declare.time(5.0);
declare.time(300.0);
const cv::Mat img = readImage(GetParam());
ASSERT_FALSE(img.empty());
@ -830,6 +830,8 @@ PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate8U,
GPU_CHANNELS_1_3_4,
ALL_TEMPLATE_METHODS))
{
declare.time(300.0);
const cv::Size size = GET_PARAM(0);
const cv::Size templ_size = GET_PARAM(1);
const int cn = GET_PARAM(2);
@ -868,6 +870,8 @@ PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate32F,
GPU_CHANNELS_1_3_4,
Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))))
{
declare.time(300.0);
const cv::Size size = GET_PARAM(0);
const cv::Size templ_size = GET_PARAM(1);
const int cn = GET_PARAM(2);
@ -1034,7 +1038,7 @@ PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerHarris,
TEST_CYCLE() cv::gpu::cornerHarris(d_img, dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, k, borderMode);
GPU_SANITY_CHECK(dst);
GPU_SANITY_CHECK(dst, 1e-4);
}
else
{
@ -1077,7 +1081,7 @@ PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerMinEigenVal,
TEST_CYCLE() cv::gpu::cornerMinEigenVal(d_img, dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, borderMode);
GPU_SANITY_CHECK(dst);
GPU_SANITY_CHECK(dst, 1e-4);
}
else
{
@ -1341,7 +1345,12 @@ PERF_TEST_P(Sz_Depth_Code, ImgProc_CvtColorBayer,
Values(CvtColorInfo(1, 3, cv::COLOR_BayerBG2BGR),
CvtColorInfo(1, 3, cv::COLOR_BayerGB2BGR),
CvtColorInfo(1, 3, cv::COLOR_BayerRG2BGR),
CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR))))
CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR),
CvtColorInfo(1, 1, cv::COLOR_BayerBG2GRAY),
CvtColorInfo(1, 1, cv::COLOR_BayerGB2GRAY),
CvtColorInfo(1, 1, cv::COLOR_BayerRG2GRAY),
CvtColorInfo(1, 1, cv::COLOR_BayerGR2GRAY))))
{
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
@ -1369,6 +1378,50 @@ PERF_TEST_P(Sz_Depth_Code, ImgProc_CvtColorBayer,
}
}
CV_ENUM(DemosaicingCode,
cv::COLOR_BayerBG2BGR, cv::COLOR_BayerGB2BGR, cv::COLOR_BayerRG2BGR, cv::COLOR_BayerGR2BGR,
cv::COLOR_BayerBG2GRAY, cv::COLOR_BayerGB2GRAY, cv::COLOR_BayerRG2GRAY, cv::COLOR_BayerGR2GRAY,
cv::gpu::COLOR_BayerBG2BGR_MHT, cv::gpu::COLOR_BayerGB2BGR_MHT, cv::gpu::COLOR_BayerRG2BGR_MHT, cv::gpu::COLOR_BayerGR2BGR_MHT,
cv::gpu::COLOR_BayerBG2GRAY_MHT, cv::gpu::COLOR_BayerGB2GRAY_MHT, cv::gpu::COLOR_BayerRG2GRAY_MHT, cv::gpu::COLOR_BayerGR2GRAY_MHT)
DEF_PARAM_TEST(Sz_Code, cv::Size, DemosaicingCode);
PERF_TEST_P(Sz_Code, ImgProc_Demosaicing,
Combine(GPU_TYPICAL_MAT_SIZES,
ValuesIn(DemosaicingCode::all())))
{
const cv::Size size = GET_PARAM(0);
const int code = GET_PARAM(1);
cv::Mat src(size, CV_8UC1);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
TEST_CYCLE() cv::gpu::demosaicing(d_src, dst, code);
GPU_SANITY_CHECK(dst);
}
else
{
if (code >= cv::COLOR_COLORCVT_MAX)
{
FAIL_NO_CPU();
}
else
{
cv::Mat dst;
TEST_CYCLE() cv::cvtColor(src, dst, code);
CPU_SANITY_CHECK(dst);
}
}
}
//////////////////////////////////////////////////////////////////////
// SwapChannels

@ -2,6 +2,7 @@
using namespace std;
using namespace testing;
using namespace perf;
DEF_PARAM_TEST_1(Image, string);

@ -1,70 +1,5 @@
#include "perf_precomp.hpp"
static void printOsInfo()
{
#if defined _WIN32
# if defined _WIN64
printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x64.\n[----------]\n"), fflush(stdout);
# else
printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x32.\n[----------]\n"), fflush(stdout);
# endif
#elif defined linux
# if defined _LP64
printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x64.\n[----------]\n"), fflush(stdout);
# else
printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x32.\n[----------]\n"), fflush(stdout);
# endif
#elif defined __APPLE__
# if defined _LP64
printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x64.\n[----------]\n"), fflush(stdout);
# else
printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x32.\n[----------]\n"), fflush(stdout);
# endif
#endif
}
static void printCudaInfo()
{
printOsInfo();
#ifndef HAVE_CUDA
printf("[----------]\n[ GPU INFO ] \tOpenCV was built without CUDA support.\n[----------]\n"), fflush(stdout);
#else
int driver;
cudaDriverGetVersion(&driver);
printf("[----------]\n"), fflush(stdout);
printf("[ GPU INFO ] \tCUDA Driver version: %d.\n", driver), fflush(stdout);
printf("[ GPU INFO ] \tCUDA Runtime version: %d.\n", CUDART_VERSION), fflush(stdout);
printf("[----------]\n"), fflush(stdout);
printf("[----------]\n"), fflush(stdout);
printf("[ GPU INFO ] \tGPU module was compiled for the following GPU archs.\n"), fflush(stdout);
printf("[ BIN ] \t%s.\n", CUDA_ARCH_BIN), fflush(stdout);
printf("[ PTX ] \t%s.\n", CUDA_ARCH_PTX), fflush(stdout);
printf("[----------]\n"), fflush(stdout);
printf("[----------]\n"), fflush(stdout);
int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
printf("[ GPU INFO ] \tCUDA device count:: %d.\n", deviceCount), fflush(stdout);
printf("[----------]\n"), fflush(stdout);
for (int i = 0; i < deviceCount; ++i)
{
cv::gpu::DeviceInfo info(i);
printf("[----------]\n"), fflush(stdout);
printf("[ DEVICE ] \t# %d %s.\n", i, info.name().c_str()), fflush(stdout);
printf("[ ] \tCompute capability: %d.%d\n", (int)info.majorVersion(), (int)info.minorVersion()), fflush(stdout);
printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()), fflush(stdout);
printf("[ ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)), fflush(stdout);
printf("[ ] \tFree memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0)), fflush(stdout);
if (!info.isCompatible())
printf("[ GPU INFO ] \tThis device is NOT compatible with current GPU module build\n");
printf("[----------]\n"), fflush(stdout);
}
#endif
}
using namespace perf;
CV_PERF_TEST_MAIN(gpu, printCudaInfo())

@ -2,6 +2,7 @@
using namespace std;
using namespace testing;
using namespace perf;
//////////////////////////////////////////////////////////////////////
// SetTo

@ -2,6 +2,7 @@
using namespace std;
using namespace testing;
using namespace perf;
///////////////////////////////////////////////////////////////
// HOG
@ -18,6 +19,8 @@ PERF_TEST_P(Image, ObjDetect_HOG,
"gpu/caltech/image_00000527_0.png",
"gpu/caltech/image_00000574_0.png"))
{
declare.time(300.0);
const cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());

@ -19,6 +19,7 @@
#endif
#include "opencv2/ts.hpp"
#include "opencv2/ts/gpu_perf.hpp"
#include "opencv2/core.hpp"
#include "opencv2/highgui.hpp"
@ -26,12 +27,9 @@
#include "opencv2/calib3d.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/video.hpp"
#include "opencv2/nonfree.hpp"
#include "opencv2/legacy.hpp"
#include "opencv2/photo.hpp"
#include "utility.hpp"
#ifdef GTEST_CREATE_SHARED_LIBRARY
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
#endif

@ -4,6 +4,18 @@ using namespace std;
using namespace testing;
using namespace perf;
#if defined(HAVE_XINE) || \
defined(HAVE_GSTREAMER) || \
defined(HAVE_QUICKTIME) || \
defined(HAVE_AVFOUNDATION) || \
defined(HAVE_FFMPEG) || \
defined(WIN32) /* assume that we have ffmpeg */
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
#else
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
#endif
namespace cv
{
template<> void Ptr<CvBGStatModel>::delete_obj()
@ -142,7 +154,7 @@ PERF_TEST_P(Image_MinDistance, Video_GoodFeaturesToTrack,
PERF_TEST_P(ImagePair, Video_BroxOpticalFlow,
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
{
declare.time(10);
declare.time(300);
cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
@ -372,8 +384,8 @@ PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1,
TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v);
GPU_SANITY_CHECK(u, 1e-4);
GPU_SANITY_CHECK(v, 1e-4);
GPU_SANITY_CHECK(u, 1e-2);
GPU_SANITY_CHECK(v, 1e-2);
}
else
{
@ -482,6 +494,8 @@ PERF_TEST_P(ImagePair, Video_FastOpticalFlowBM,
//////////////////////////////////////////////////////
// FGDStatModel
#if BUILD_WITH_VIDEO_INPUT_SUPPORT
DEF_PARAM_TEST_1(Video, string);
PERF_TEST_P(Video, Video_FGDStatModel,
@ -548,9 +562,13 @@ PERF_TEST_P(Video, Video_FGDStatModel,
}
}
#endif
//////////////////////////////////////////////////////
// MOG
#if BUILD_WITH_VIDEO_INPUT_SUPPORT
DEF_PARAM_TEST(Video_Cn_LearningRate, string, MatCn, double);
PERF_TEST_P(Video_Cn_LearningRate, Video_MOG,
@ -643,9 +661,13 @@ PERF_TEST_P(Video_Cn_LearningRate, Video_MOG,
}
}
#endif
//////////////////////////////////////////////////////
// MOG2
#if BUILD_WITH_VIDEO_INPUT_SUPPORT
DEF_PARAM_TEST(Video_Cn, string, int);
PERF_TEST_P(Video_Cn, Video_MOG2,
@ -740,9 +762,13 @@ PERF_TEST_P(Video_Cn, Video_MOG2,
}
}
#endif
//////////////////////////////////////////////////////
// MOG2GetBackgroundImage
#if BUILD_WITH_VIDEO_INPUT_SUPPORT
PERF_TEST_P(Video_Cn, Video_MOG2GetBackgroundImage,
Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
GPU_CHANNELS_1_3_4))
@ -818,74 +844,13 @@ PERF_TEST_P(Video_Cn, Video_MOG2GetBackgroundImage,
}
}
//////////////////////////////////////////////////////
// VIBE
PERF_TEST_P(Video_Cn, Video_VIBE,
Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
GPU_CHANNELS_1_3_4))
{
const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
const int cn = GET_PARAM(1);
cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened());
cv::Mat frame;
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_frame(frame);
cv::gpu::VIBE_GPU vibe;
cv::gpu::GpuMat foreground;
vibe(d_frame, foreground);
for (int i = 0; i < 10; ++i)
{
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
d_frame.upload(frame);
startTimer(); next();
vibe(d_frame, foreground);
stopTimer();
}
GPU_SANITY_CHECK(foreground);
}
else
{
FAIL_NO_CPU();
}
}
#endif
//////////////////////////////////////////////////////
// GMG
#if BUILD_WITH_VIDEO_INPUT_SUPPORT
DEF_PARAM_TEST(Video_Cn_MaxFeatures, string, MatCn, int);
PERF_TEST_P(Video_Cn_MaxFeatures, Video_GMG,
@ -993,11 +958,13 @@ PERF_TEST_P(Video_Cn_MaxFeatures, Video_GMG,
}
}
#ifdef HAVE_NVCUVID
#endif
//////////////////////////////////////////////////////
// VideoReader
#if defined(HAVE_NVCUVID) && BUILD_WITH_VIDEO_INPUT_SUPPORT
PERF_TEST_P(Video, Video_VideoReader, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
{
declare.time(20);
@ -1028,10 +995,12 @@ PERF_TEST_P(Video, Video_VideoReader, Values("gpu/video/768x576.avi", "gpu/video
}
}
#endif
//////////////////////////////////////////////////////
// VideoWriter
#ifdef WIN32
#if defined(HAVE_NVCUVID) && defined(WIN32)
PERF_TEST_P(Video, Video_VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"))
{
@ -1089,6 +1058,4 @@ PERF_TEST_P(Video, Video_VideoWriter, Values("gpu/video/768x576.avi", "gpu/video
SANITY_CHECK(frame);
}
#endif // WIN32
#endif // HAVE_NVCUVID
#endif

@ -1,184 +0,0 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace cv;
Mat readImage(const string& fileName, int flags)
{
return imread(perf::TestBase::getDataPath(fileName), flags);
}
void PrintTo(const CvtColorInfo& info, ostream* os)
{
static const char* str[] =
{
"BGR2BGRA",
"BGRA2BGR",
"BGR2RGBA",
"RGBA2BGR",
"BGR2RGB",
"BGRA2RGBA",
"BGR2GRAY",
"RGB2GRAY",
"GRAY2BGR",
"GRAY2BGRA",
"BGRA2GRAY",
"RGBA2GRAY",
"BGR2BGR565",
"RGB2BGR565",
"BGR5652BGR",
"BGR5652RGB",
"BGRA2BGR565",
"RGBA2BGR565",
"BGR5652BGRA",
"BGR5652RGBA",
"GRAY2BGR565",
"BGR5652GRAY",
"BGR2BGR555",
"RGB2BGR555",
"BGR5552BGR",
"BGR5552RGB",
"BGRA2BGR555",
"RGBA2BGR555",
"BGR5552BGRA",
"BGR5552RGBA",
"GRAY2BGR555",
"BGR5552GRAY",
"BGR2XYZ",
"RGB2XYZ",
"XYZ2BGR",
"XYZ2RGB",
"BGR2YCrCb",
"RGB2YCrCb",
"YCrCb2BGR",
"YCrCb2RGB",
"BGR2HSV",
"RGB2HSV",
"",
"",
"BGR2Lab",
"RGB2Lab",
"BayerBG2BGR",
"BayerGB2BGR",
"BayerRG2BGR",
"BayerGR2BGR",
"BGR2Luv",
"RGB2Luv",
"BGR2HLS",
"RGB2HLS",
"HSV2BGR",
"HSV2RGB",
"Lab2BGR",
"Lab2RGB",
"Luv2BGR",
"Luv2RGB",
"HLS2BGR",
"HLS2RGB",
"BayerBG2BGR_VNG",
"BayerGB2BGR_VNG",
"BayerRG2BGR_VNG",
"BayerGR2BGR_VNG",
"BGR2HSV_FULL",
"RGB2HSV_FULL",
"BGR2HLS_FULL",
"RGB2HLS_FULL",
"HSV2BGR_FULL",
"HSV2RGB_FULL",
"HLS2BGR_FULL",
"HLS2RGB_FULL",
"LBGR2Lab",
"LRGB2Lab",
"LBGR2Luv",
"LRGB2Luv",
"Lab2LBGR",
"Lab2LRGB",
"Luv2LBGR",
"Luv2LRGB",
"BGR2YUV",
"RGB2YUV",
"YUV2BGR",
"YUV2RGB",
"BayerBG2GRAY",
"BayerGB2GRAY",
"BayerRG2GRAY",
"BayerGR2GRAY",
//YUV 4:2:0 formats family
"YUV2RGB_NV12",
"YUV2BGR_NV12",
"YUV2RGB_NV21",
"YUV2BGR_NV21",
"YUV2RGBA_NV12",
"YUV2BGRA_NV12",
"YUV2RGBA_NV21",
"YUV2BGRA_NV21",
"YUV2RGB_YV12",
"YUV2BGR_YV12",
"YUV2RGB_IYUV",
"YUV2BGR_IYUV",
"YUV2RGBA_YV12",
"YUV2BGRA_YV12",
"YUV2RGBA_IYUV",
"YUV2BGRA_IYUV",
"YUV2GRAY_420",
//YUV 4:2:2 formats family
"YUV2RGB_UYVY",
"YUV2BGR_UYVY",
"YUV2RGB_VYUY",
"YUV2BGR_VYUY",
"YUV2RGBA_UYVY",
"YUV2BGRA_UYVY",
"YUV2RGBA_VYUY",
"YUV2BGRA_VYUY",
"YUV2RGB_YUY2",
"YUV2BGR_YUY2",
"YUV2RGB_YVYU",
"YUV2BGR_YVYU",
"YUV2RGBA_YUY2",
"YUV2BGRA_YUY2",
"YUV2RGBA_YVYU",
"YUV2BGRA_YVYU",
"YUV2GRAY_UYVY",
"YUV2GRAY_YUY2",
// alpha premultiplication
"RGBA2mRGBA",
"mRGBA2RGBA",
"COLORCVT_MAX"
};
*os << str[info.code];
}

@ -1,63 +0,0 @@
#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__
#define __OPENCV_PERF_GPU_UTILITY_HPP__
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/ts/ts_perf.hpp"
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
using perf::MatType;
using perf::MatDepth;
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
#define ALL_BORDER_MODES testing::ValuesIn(BorderMode::all())
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
#define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING, cv::NORM_MINMAX)
enum { Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4 };
CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA)
#define GPU_CHANNELS_1_3_4 testing::Values(MatCn(Gray), MatCn(BGR), MatCn(BGRA))
#define GPU_CHANNELS_1_3 testing::Values(MatCn(Gray), MatCn(BGR))
struct CvtColorInfo
{
int scn;
int dcn;
int code;
CvtColorInfo() {}
explicit CvtColorInfo(int scn_, int dcn_, int code_) : scn(scn_), dcn(dcn_), code(code_) {}
};
void PrintTo(const CvtColorInfo& info, std::ostream* os);
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name
#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name
DEF_PARAM_TEST_1(Sz, cv::Size);
typedef perf::Size_MatType Sz_Type;
DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth);
DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn);
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p)
#define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
#define GPU_SANITY_CHECK(mat, ...) \
do{ \
cv::Mat gpu_##mat(mat); \
SANITY_CHECK(gpu_##mat, ## __VA_ARGS__); \
} while(0)
#define CPU_SANITY_CHECK(mat, ...) \
do{ \
cv::Mat cpu_##mat(mat); \
SANITY_CHECK(cpu_##mat, ## __VA_ARGS__); \
} while(0)
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__

@ -8,69 +8,19 @@
#include "opencv2/video.hpp"
#include "opencv2/legacy.hpp"
#include "opencv2/ts.hpp"
static void printOsInfo()
{
#if defined _WIN32
# if defined _WIN64
printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x64.\n[----------]\n"); fflush(stdout);
# else
printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x32.\n[----------]\n"); fflush(stdout);
# endif
#elif defined linux
# if defined _LP64
printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x64.\n[----------]\n"); fflush(stdout);
# else
printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x32.\n[----------]\n"); fflush(stdout);
# endif
#elif defined __APPLE__
# if defined _LP64
printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x64.\n[----------]\n"); fflush(stdout);
# else
printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x32.\n[----------]\n"); fflush(stdout);
# endif
#endif
}
static void printCudaInfo()
{
const int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
printf("[----------]\n"); fflush(stdout);
printf("[ GPU INFO ] \tCUDA device count:: %d.\n", deviceCount); fflush(stdout);
printf("[----------]\n"); fflush(stdout);
for (int i = 0; i < deviceCount; ++i)
{
cv::gpu::DeviceInfo info(i);
printf("[----------]\n"); fflush(stdout);
printf("[ DEVICE ] \t# %d %s.\n", i, info.name().c_str()); fflush(stdout);
printf("[ ] \tCompute capability: %d.%d\n", info.majorVersion(), info.minorVersion()); fflush(stdout);
printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()); fflush(stdout);
printf("[ ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)); fflush(stdout);
printf("[ ] \tFree memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0)); fflush(stdout);
if (!info.isCompatible())
printf("[ GPU INFO ] \tThis device is NOT compatible with current GPU module build\n");
printf("[----------]\n"); fflush(stdout);
}
}
#include "opencv2/ts/gpu_perf.hpp"
int main(int argc, char* argv[])
{
printOsInfo();
printCudaInfo();
perf::printCudaInfo();
perf::Regression::Init("nv_perf_test");
perf::Regression::Init("gpu_perf4au");
perf::TestBase::Init(argc, argv);
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name
#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name
//////////////////////////////////////////////////////////
// HoughLinesP

@ -318,40 +318,14 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream)
void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
{
class LevelsInit
{
public:
Npp32s pLevels[256];
const Npp32s* pLevels3[3];
int nValues3[3];
#if (CUDA_VERSION > 4020)
GpuMat d_pLevels;
#endif
LevelsInit()
{
nValues3[0] = nValues3[1] = nValues3[2] = 256;
for (int i = 0; i < 256; ++i)
pLevels[i] = i;
const int cn = src.channels();
#if (CUDA_VERSION <= 4020)
pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels;
#else
d_pLevels.upload(Mat(1, 256, CV_32S, pLevels));
pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr<Npp32s>();
#endif
}
};
static LevelsInit lvls;
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 );
CV_Assert( lut.depth() == CV_8U );
CV_Assert( lut.channels() == 1 || lut.channels() == cn );
CV_Assert( lut.rows * lut.cols == 256 && lut.isContinuous() );
int cn = src.channels();
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);
CV_Assert(lut.depth() == CV_8U && (lut.channels() == 1 || lut.channels() == cn) && lut.rows * lut.cols == 256 && lut.isContinuous());
dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn));
dst.create(src.size(), CV_MAKE_TYPE(lut.depth(), cn));
NppiSize sz;
sz.height = src.rows;
@ -360,19 +334,34 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
Mat nppLut;
lut.convertTo(nppLut, CV_32S);
cudaStream_t stream = StreamAccessor::getStream(s);
int nValues3[] = {256, 256, 256};
Npp32s pLevels[256];
for (int i = 0; i < 256; ++i)
pLevels[i] = i;
const Npp32s* pLevels3[3];
#if (CUDA_VERSION <= 4020)
pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels;
#else
GpuMat d_pLevels;
d_pLevels.upload(Mat(1, 256, CV_32S, pLevels));
pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr<Npp32s>();
#endif
cudaStream_t stream = StreamAccessor::getStream(s);
NppStreamHandler h(stream);
if (src.type() == CV_8UC1)
{
#if (CUDA_VERSION <= 4020)
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppLut.ptr<Npp32s>(), pLevels, 256) );
#else
GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, d_nppLut.ptr<Npp32s>(), lvls.d_pLevels.ptr<Npp32s>(), 256) );
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, d_nppLut.ptr<Npp32s>(), d_pLevels.ptr<Npp32s>(), 256) );
#endif
}
else
@ -409,7 +398,7 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
}
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, pValues3, lvls.pLevels3, lvls.nValues3) );
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, pValues3, pLevels3, nValues3) );
}
if (stream == 0)

@ -48,6 +48,7 @@ using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, Stream&) { throw_nogpu(); }
void cv::gpu::demosaicing(const GpuMat&, GpuMat&, int, int, Stream&) { throw_nogpu(); }
void cv::gpu::swapChannels(GpuMat&, const int[], Stream&) { throw_nogpu(); }
void cv::gpu::gammaCorrection(const GpuMat&, GpuMat&, bool, Stream&) { throw_nogpu(); }
@ -62,6 +63,9 @@ namespace cv { namespace gpu {
void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template <int cn>
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template <int cn>
void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
}
}}
@ -1620,26 +1624,56 @@ namespace
funcs[src.depth()][dcn - 1](src, dst, blue_last, start_with_green, StreamAccessor::getStream(stream));
}
void bayerBG_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
{
bayer_to_bgr(src, dst, dcn, false, false, stream);
}
void bayerGB_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
{
bayer_to_bgr(src, dst, dcn, false, true, stream);
}
void bayerRG_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
{
bayer_to_bgr(src, dst, dcn, true, false, stream);
}
void bayerGR_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream)
{
bayer_to_bgr(src, dst, dcn, true, true, stream);
}
void bayer_to_gray(const GpuMat& src, GpuMat& dst, bool blue_last, bool start_with_green, Stream& stream)
{
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
static const func_t funcs[3] =
{
Bayer2BGR_8u_gpu<1>,
0,
Bayer2BGR_16u_gpu<1>,
};
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1);
CV_Assert(src.rows > 2 && src.cols > 2);
dst.create(src.size(), CV_MAKETYPE(src.depth(), 1));
funcs[src.depth()](src, dst, blue_last, start_with_green, StreamAccessor::getStream(stream));
}
void bayerBG_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream)
{
bayer_to_gray(src, dst, false, false, stream);
}
void bayerGB_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream)
{
bayer_to_gray(src, dst, false, true, stream);
}
void bayerRG_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream)
{
bayer_to_gray(src, dst, true, false, stream);
}
void bayerGR_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream)
{
bayer_to_gray(src, dst, true, true, stream);
}
}
void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream)
@ -1756,10 +1790,10 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream
yuv_to_bgr, // CV_YUV2BGR = 84
yuv_to_rgb, // CV_YUV2RGB = 85
0, // CV_BayerBG2GRAY = 86
0, // CV_BayerGB2GRAY = 87
0, // CV_BayerRG2GRAY = 88
0, // CV_BayerGR2GRAY = 89
bayerBG_to_gray, // CV_BayerBG2GRAY = 86
bayerGB_to_gray, // CV_BayerGB2GRAY = 87
bayerRG_to_gray, // CV_BayerRG2GRAY = 88
bayerGR_to_gray, // CV_BayerGR2GRAY = 89
//YUV 4:2:0 formats family
0, // CV_YUV2RGB_NV12 = 90,
@ -1825,6 +1859,74 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream
func(src, dst, dcn, stream);
}
void cv::gpu::demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream)
{
const int depth = src.depth();
CV_Assert( src.channels() == 1 );
switch (code)
{
case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY:
bayer_to_gray(src, dst, code == CV_BayerBG2GRAY || code == CV_BayerGB2GRAY, code == CV_BayerGB2GRAY || code == CV_BayerGR2GRAY, stream);
break;
case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR:
bayer_to_bgr(src, dst, dcn, code == CV_BayerBG2BGR || code == CV_BayerGB2BGR, code == CV_BayerGB2BGR || code == CV_BayerGR2BGR, stream);
break;
case COLOR_BayerBG2BGR_MHT: case COLOR_BayerGB2BGR_MHT: case COLOR_BayerRG2BGR_MHT: case COLOR_BayerGR2BGR_MHT:
{
if (dcn <= 0)
dcn = 3;
CV_Assert( depth == CV_8U );
CV_Assert( dcn == 3 || dcn == 4 );
dst.create(src.size(), CV_MAKETYPE(depth, dcn));
dst.setTo(Scalar::all(0));
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step);
const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1,
code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1);
if (dcn == 3)
device::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
else
device::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
break;
}
case COLOR_BayerBG2GRAY_MHT: case COLOR_BayerGB2GRAY_MHT: case COLOR_BayerRG2GRAY_MHT: case COLOR_BayerGR2GRAY_MHT:
{
CV_Assert( depth == CV_8U );
dst.create(src.size(), CV_MAKETYPE(depth, 1));
dst.setTo(Scalar::all(0));
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step);
const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1,
code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1);
device::MHCdemosaic<1>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
break;
}
default:
CV_Error( CV_StsBadFlag, "Unknown / unsupported color conversion code" );
}
}
void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s)
{
CV_Assert(image.type() == CV_8UC4);

@ -42,42 +42,38 @@
#if !defined CUDA_DISABLER
#include <opencv2/gpu/device/common.hpp>
#include <opencv2/gpu/device/vec_traits.hpp>
#include <opencv2/gpu/device/vec_math.hpp>
#include <opencv2/gpu/device/limits.hpp>
namespace cv { namespace gpu {
namespace device
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/color.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename T> struct Bayer2BGR;
template <> struct Bayer2BGR<uchar>
{
template <typename D>
__global__ void Bayer2BGR_8u(const PtrStepb src, PtrStepSz<D> dst, const bool blue_last, const bool start_with_green)
{
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
if (s_y >= dst.rows || (s_x << 2) >= dst.cols)
return;
s_y = ::min(::max(s_y, 1), dst.rows - 2);
uchar3 res0;
uchar3 res1;
uchar3 res2;
uchar3 res3;
__device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
{
uchar4 patch[3][3];
patch[0][1] = ((const uchar4*) src.ptr(s_y - 1))[s_x];
patch[0][0] = ((const uchar4*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)];
patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
patch[1][1] = ((const uchar4*) src.ptr(s_y))[s_x];
patch[1][0] = ((const uchar4*) src.ptr(s_y))[::max(s_x - 1, 0)];
patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)];
patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
patch[2][1] = ((const uchar4*) src.ptr(s_y + 1))[s_x];
patch[2][0] = ((const uchar4*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((dst.cols + 3) >> 2) - 1)];
D res0 = VecTraits<D>::all(numeric_limits<uchar>::max());
D res1 = VecTraits<D>::all(numeric_limits<uchar>::max());
D res2 = VecTraits<D>::all(numeric_limits<uchar>::max());
D res3 = VecTraits<D>::all(numeric_limits<uchar>::max());
patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
if ((s_y & 1) ^ start_with_green)
{
@ -181,45 +177,69 @@ namespace cv { namespace gpu {
res3.z = t7;
}
}
}
};
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
template <typename D> __device__ __forceinline__ D toDst(const uchar3& pix);
template <> __device__ __forceinline__ uchar toDst<uchar>(const uchar3& pix)
{
typename bgr_to_gray_traits<uchar>::functor_type f = bgr_to_gray_traits<uchar>::create_functor();
return f(pix);
}
template <> __device__ __forceinline__ uchar3 toDst<uchar3>(const uchar3& pix)
{
return pix;
}
template <> __device__ __forceinline__ uchar4 toDst<uchar4>(const uchar3& pix)
{
return make_uchar4(pix.x, pix.y, pix.z, 255);
}
dst(d_y, d_x) = res0;
if (d_x + 1 < dst.cols)
dst(d_y, d_x + 1) = res1;
if (d_x + 2 < dst.cols)
dst(d_y, d_x + 2) = res2;
if (d_x + 3 < dst.cols)
dst(d_y, d_x + 3) = res3;
}
template <typename D>
__global__ void Bayer2BGR_8u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
{
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
template <typename D>
__global__ void Bayer2BGR_16u(const PtrStepb src, PtrStepSz<D> dst, const bool blue_last, const bool start_with_green)
{
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
if (s_y >= src.rows || (s_x << 2) >= src.cols)
return;
s_y = ::min(::max(s_y, 1), src.rows - 2);
Bayer2BGR<uchar> bayer;
bayer.apply(src, s_x, s_y, blue_last, start_with_green);
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
if (s_y >= dst.rows || (s_x << 1) >= dst.cols)
return;
dst(d_y, d_x) = toDst<D>(bayer.res0);
if (d_x + 1 < src.cols)
dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
if (d_x + 2 < src.cols)
dst(d_y, d_x + 2) = toDst<D>(bayer.res2);
if (d_x + 3 < src.cols)
dst(d_y, d_x + 3) = toDst<D>(bayer.res3);
}
s_y = ::min(::max(s_y, 1), dst.rows - 2);
template <> struct Bayer2BGR<ushort>
{
ushort3 res0;
ushort3 res1;
__device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
{
ushort2 patch[3][3];
patch[0][1] = ((const ushort2*) src.ptr(s_y - 1))[s_x];
patch[0][0] = ((const ushort2*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)];
patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
patch[1][1] = ((const ushort2*) src.ptr(s_y))[s_x];
patch[1][0] = ((const ushort2*) src.ptr(s_y))[::max(s_x - 1, 0)];
patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)];
patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
patch[2][1] = ((const ushort2*) src.ptr(s_y + 1))[s_x];
patch[2][0] = ((const ushort2*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((dst.cols + 1) >> 1) - 1)];
D res0 = VecTraits<D>::all(numeric_limits<ushort>::max());
D res1 = VecTraits<D>::all(numeric_limits<ushort>::max());
patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
if ((s_y & 1) ^ start_with_green)
{
@ -279,53 +299,246 @@ namespace cv { namespace gpu {
res1.z = t3;
}
}
}
};
template <typename D> __device__ __forceinline__ D toDst(const ushort3& pix);
template <> __device__ __forceinline__ ushort toDst<ushort>(const ushort3& pix)
{
typename bgr_to_gray_traits<ushort>::functor_type f = bgr_to_gray_traits<ushort>::create_functor();
return f(pix);
}
template <> __device__ __forceinline__ ushort3 toDst<ushort3>(const ushort3& pix)
{
return pix;
}
template <> __device__ __forceinline__ ushort4 toDst<ushort4>(const ushort3& pix)
{
return make_ushort4(pix.x, pix.y, pix.z, numeric_limits<ushort>::max());
}
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
template <typename D>
__global__ void Bayer2BGR_16u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
{
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
dst(d_y, d_x) = res0;
if (d_x + 1 < dst.cols)
dst(d_y, d_x + 1) = res1;
}
if (s_y >= src.rows || (s_x << 1) >= src.cols)
return;
template <int cn>
void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
{
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
s_y = ::min(::max(s_y, 1), src.rows - 2);
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, 4 * block.x), divUp(dst.rows, block.y));
Bayer2BGR<ushort> bayer;
bayer.apply(src, s_x, s_y, blue_last, start_with_green);
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
cudaSafeCall( cudaGetLastError() );
dst(d_y, d_x) = toDst<D>(bayer.res0);
if (d_x + 1 < src.cols)
dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int cn>
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
{
typedef typename TypeVec<ushort, cn>::vec_type dst_t;
template <int cn>
void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
{
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, 2 * block.x), divUp(dst.rows, block.y));
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
cudaSafeCall( cudaGetLastError() );
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int cn>
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
{
typedef typename TypeVec<ushort, cn>::vec_type dst_t;
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
cudaSafeCall( cudaGetLastError() );
template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}}
#endif /* CUDA_DISABLER */
template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_16u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
//////////////////////////////////////////////////////////////
// Bayer Demosaicing (Malvar, He, and Cutler)
//
// by Morgan McGuire, Williams College
// http://graphics.cs.williams.edu/papers/BayerJGT09/#shaders
//
// ported to CUDA
texture<uchar, cudaTextureType2D, cudaReadModeElementType> sourceTex(false, cudaFilterModePoint, cudaAddressModeClamp);
template <typename DstType>
__global__ void MHCdemosaic(PtrStepSz<DstType> dst, const int2 sourceOffset, const int2 firstRed)
{
const float kAx = -1.0f / 8.0f, kAy = -1.5f / 8.0f, kAz = 0.5f / 8.0f /*kAw = -1.0f / 8.0f*/;
const float kBx = 2.0f / 8.0f, /*kBy = 0.0f / 8.0f,*/ /*kBz = 0.0f / 8.0f,*/ kBw = 4.0f / 8.0f ;
const float kCx = 4.0f / 8.0f, kCy = 6.0f / 8.0f, kCz = 5.0f / 8.0f /*kCw = 5.0f / 8.0f*/;
const float /*kDx = 0.0f / 8.0f,*/ kDy = 2.0f / 8.0f, kDz = -1.0f / 8.0f /*kDw = -1.0f / 8.0f*/;
const float kEx = -1.0f / 8.0f, kEy = -1.5f / 8.0f, /*kEz = -1.0f / 8.0f,*/ kEw = 0.5f / 8.0f ;
const float kFx = 2.0f / 8.0f, /*kFy = 0.0f / 8.0f,*/ kFz = 4.0f / 8.0f /*kFw = 0.0f / 8.0f*/;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x == 0 || x >= dst.cols - 1 || y == 0 || y >= dst.rows - 1)
return;
int2 center;
center.x = x + sourceOffset.x;
center.y = y + sourceOffset.y;
int4 xCoord;
xCoord.x = center.x - 2;
xCoord.y = center.x - 1;
xCoord.z = center.x + 1;
xCoord.w = center.x + 2;
int4 yCoord;
yCoord.x = center.y - 2;
yCoord.y = center.y - 1;
yCoord.z = center.y + 1;
yCoord.w = center.y + 2;
float C = tex2D(sourceTex, center.x, center.y); // ( 0, 0)
float4 Dvec;
Dvec.x = tex2D(sourceTex, xCoord.y, yCoord.y); // (-1,-1)
Dvec.y = tex2D(sourceTex, xCoord.y, yCoord.z); // (-1, 1)
Dvec.z = tex2D(sourceTex, xCoord.z, yCoord.y); // ( 1,-1)
Dvec.w = tex2D(sourceTex, xCoord.z, yCoord.z); // ( 1, 1)
float4 value;
value.x = tex2D(sourceTex, center.x, yCoord.x); // ( 0,-2) A0
value.y = tex2D(sourceTex, center.x, yCoord.y); // ( 0,-1) B0
value.z = tex2D(sourceTex, xCoord.x, center.y); // (-2, 0) E0
value.w = tex2D(sourceTex, xCoord.y, center.y); // (-1, 0) F0
// (A0 + A1), (B0 + B1), (E0 + E1), (F0 + F1)
value.x += tex2D(sourceTex, center.x, yCoord.w); // ( 0, 2) A1
value.y += tex2D(sourceTex, center.x, yCoord.z); // ( 0, 1) B1
value.z += tex2D(sourceTex, xCoord.w, center.y); // ( 2, 0) E1
value.w += tex2D(sourceTex, xCoord.z, center.y); // ( 1, 0) F1
float4 PATTERN;
PATTERN.x = kCx * C;
PATTERN.y = kCy * C;
PATTERN.z = kCz * C;
PATTERN.w = PATTERN.z;
float D = Dvec.x + Dvec.y + Dvec.z + Dvec.w;
// There are five filter patterns (identity, cross, checker,
// theta, phi). Precompute the terms from all of them and then
// use swizzles to assign to color channels.
//
// Channel Matches
// x cross (e.g., EE G)
// y checker (e.g., EE B)
// z theta (e.g., EO R)
// w phi (e.g., EO B)
#define A value.x // A0 + A1
#define B value.y // B0 + B1
#define E value.z // E0 + E1
#define F value.w // F0 + F1
float3 temp;
// PATTERN.yzw += (kD.yz * D).xyy;
temp.x = kDy * D;
temp.y = kDz * D;
PATTERN.y += temp.x;
PATTERN.z += temp.y;
PATTERN.w += temp.y;
// PATTERN += (kA.xyz * A).xyzx;
temp.x = kAx * A;
temp.y = kAy * A;
temp.z = kAz * A;
PATTERN.x += temp.x;
PATTERN.y += temp.y;
PATTERN.z += temp.z;
PATTERN.w += temp.x;
// PATTERN += (kE.xyw * E).xyxz;
temp.x = kEx * E;
temp.y = kEy * E;
temp.z = kEw * E;
PATTERN.x += temp.x;
PATTERN.y += temp.y;
PATTERN.z += temp.x;
PATTERN.w += temp.z;
// PATTERN.xw += kB.xw * B;
PATTERN.x += kBx * B;
PATTERN.w += kBw * B;
// PATTERN.xz += kF.xz * F;
PATTERN.x += kFx * F;
PATTERN.z += kFz * F;
// Determine which of four types of pixels we are on.
int2 alternate;
alternate.x = (x + firstRed.x) % 2;
alternate.y = (y + firstRed.y) % 2;
// in BGR sequence;
uchar3 pixelColor =
(alternate.y == 0) ?
((alternate.x == 0) ?
make_uchar3(saturate_cast<uchar>(PATTERN.y), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(C)) :
make_uchar3(saturate_cast<uchar>(PATTERN.w), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.z))) :
((alternate.x == 0) ?
make_uchar3(saturate_cast<uchar>(PATTERN.z), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.w)) :
make_uchar3(saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(PATTERN.y)));
dst(y, x) = toDst<DstType>(pixelColor);
}
template <int cn>
void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream)
{
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
bindTexture(&sourceTex, src);
MHCdemosaic<dst_t><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, sourceOffset, firstRed);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
template void MHCdemosaic<3>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
template void MHCdemosaic<4>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
}}}
#endif /* CUDA_DISABLER */

@ -48,6 +48,7 @@
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/simd_functions.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
@ -154,170 +155,28 @@ namespace arithm
namespace arithm
{
template <typename T, typename D> struct VAdd4;
template <> struct VAdd4<uint, uint> : binary_function<uint, uint, uint>
struct VAdd4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VAdd4() {}
__device__ __forceinline__ VAdd4(const VAdd4<uint, uint>& other) {}
};
template <> struct VAdd4<int, uint> : binary_function<int, int, uint>
{
__device__ __forceinline__ uint operator ()(int a, int b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd4.u32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vadd.u32.s32.s32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.u32.s32.s32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.u32.s32.s32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.u32.s32.s32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
return vadd4(a, b);
}
__device__ __forceinline__ VAdd4() {}
__device__ __forceinline__ VAdd4(const VAdd4<int, uint>& other) {}
};
template <> struct VAdd4<uint, int> : binary_function<uint, uint, int>
{
__device__ __forceinline__ int operator ()(uint a, uint b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd4.s32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vadd.s32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.s32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.s32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.s32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VAdd4() {}
__device__ __forceinline__ VAdd4(const VAdd4<uint, int>& other) {}
};
template <> struct VAdd4<int, int> : binary_function<int, int, int>
{
__device__ __forceinline__ int operator ()(int a, int b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd4.s32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vadd.s32.s32.s32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.s32.s32.s32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.s32.s32.s32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.s32.s32.s32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VAdd4() {}
__device__ __forceinline__ VAdd4(const VAdd4<int, int>& other) {}
__device__ __forceinline__ VAdd4(const VAdd4& other) {}
};
////////////////////////////////////
template <typename T, typename D> struct VAdd2;
template <> struct VAdd2<uint, uint> : binary_function<uint, uint, uint>
struct VAdd2 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VAdd2() {}
__device__ __forceinline__ VAdd2(const VAdd2<uint, uint>& other) {}
};
template <> struct VAdd2<uint, int> : binary_function<uint, uint, int>
{
__device__ __forceinline__ int operator ()(uint a, uint b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd2.s32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vadd.s32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.s32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VAdd2() {}
__device__ __forceinline__ VAdd2(const VAdd2<uint, int>& other) {}
};
template <> struct VAdd2<int, uint> : binary_function<int, int, uint>
{
__device__ __forceinline__ uint operator ()(int a, int b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd2.u32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vadd.u32.s32.s32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.u32.s32.s32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
return vadd2(a, b);
}
__device__ __forceinline__ VAdd2() {}
__device__ __forceinline__ VAdd2(const VAdd2<int, uint>& other) {}
};
template <> struct VAdd2<int, int> : binary_function<int, int, int>
{
__device__ __forceinline__ int operator ()(int a, int b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vadd2.s32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vadd.s32.s32.s32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vadd.s32.s32.s32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VAdd2() {}
__device__ __forceinline__ VAdd2(const VAdd2<int, int>& other) {}
__device__ __forceinline__ VAdd2(const VAdd2& other) {}
};
////////////////////////////////////
@ -336,13 +195,13 @@ namespace arithm
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D> struct TransformFunctorTraits< arithm::VAdd4<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <> struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
////////////////////////////////////
template <typename T, typename D> struct TransformFunctorTraits< arithm::VAdd2<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <> struct TransformFunctorTraits< arithm::VAdd2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
@ -355,28 +214,16 @@ namespace cv { namespace gpu { namespace device
namespace arithm
{
template <typename T, typename D>
void vadd4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
void addMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, VAdd4<T, D>(), WithOutMask(), stream);
transform(src1, src2, dst, VAdd4(), WithOutMask(), stream);
}
template void vadd4<uint, uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vadd4<uint, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vadd4<int, uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vadd4<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T, typename D>
void vadd2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
void addMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, VAdd2<T, D>(), WithOutMask(), stream);
transform(src1, src2, dst, VAdd2(), WithOutMask(), stream);
}
template void vadd2<uint, uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vadd2<uint, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vadd2<int, uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vadd2<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T, typename D>
void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
{
@ -543,170 +390,28 @@ namespace arithm
namespace arithm
{
template <typename T, typename D> struct VSub4;
template <> struct VSub4<uint, uint> : binary_function<uint, uint, uint>
struct VSub4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VSub4() {}
__device__ __forceinline__ VSub4(const VSub4<uint, uint>& other) {}
};
template <> struct VSub4<int, uint> : binary_function<int, int, uint>
{
__device__ __forceinline__ uint operator ()(int a, int b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub4.u32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vsub.u32.s32.s32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.u32.s32.s32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.u32.s32.s32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.u32.s32.s32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VSub4() {}
__device__ __forceinline__ VSub4(const VSub4<int, uint>& other) {}
};
template <> struct VSub4<uint, int> : binary_function<uint, uint, int>
{
__device__ __forceinline__ int operator ()(uint a, uint b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub4.s32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vsub.s32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.s32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.s32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.s32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
return vsub4(a, b);
}
__device__ __forceinline__ VSub4() {}
__device__ __forceinline__ VSub4(const VSub4<uint, int>& other) {}
};
template <> struct VSub4<int, int> : binary_function<int, int, int>
{
__device__ __forceinline__ int operator ()(int a, int b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub4.s32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vsub.s32.s32.s32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.s32.s32.s32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.s32.s32.s32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.s32.s32.s32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VSub4() {}
__device__ __forceinline__ VSub4(const VSub4<int, int>& other) {}
__device__ __forceinline__ VSub4(const VSub4& other) {}
};
////////////////////////////////////
template <typename T, typename D> struct VSub2;
template <> struct VSub2<uint, uint> : binary_function<uint, uint, uint>
struct VSub2 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VSub2() {}
__device__ __forceinline__ VSub2(const VSub2<uint, uint>& other) {}
};
template <> struct VSub2<uint, int> : binary_function<uint, uint, int>
{
__device__ __forceinline__ int operator ()(uint a, uint b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub2.s32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vsub.s32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.s32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VSub2() {}
__device__ __forceinline__ VSub2(const VSub2<uint, int>& other) {}
};
template <> struct VSub2<int, uint> : binary_function<int, int, uint>
{
__device__ __forceinline__ uint operator ()(int a, int b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub2.u32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vsub.u32.s32.s32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.u32.s32.s32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VSub2() {}
__device__ __forceinline__ VSub2(const VSub2<int, uint>& other) {}
};
template <> struct VSub2<int, int> : binary_function<int, int, int>
{
__device__ __forceinline__ int operator ()(int a, int b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vsub2.s32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vsub.s32.s32.s32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vsub.s32.s32.s32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
return vsub2(a, b);
}
__device__ __forceinline__ VSub2() {}
__device__ __forceinline__ VSub2(const VSub2<int, int>& other) {}
__device__ __forceinline__ VSub2(const VSub2& other) {}
};
////////////////////////////////////
@ -725,13 +430,13 @@ namespace arithm
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D> struct TransformFunctorTraits< arithm::VSub4<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
////////////////////////////////////
template <typename T, typename D> struct TransformFunctorTraits< arithm::VSub2<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <> struct TransformFunctorTraits< arithm::VSub2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
@ -744,28 +449,16 @@ namespace cv { namespace gpu { namespace device
namespace arithm
{
template <typename T, typename D>
void vsub4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
void subMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, VSub4<T, D>(), WithOutMask(), stream);
transform(src1, src2, dst, VSub4(), WithOutMask(), stream);
}
template void vsub4<uint, uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vsub4<uint, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vsub4<int, uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vsub4<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T, typename D>
void vsub2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
void subMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, VSub2<T, D>(), WithOutMask(), stream);
transform(src1, src2, dst, VSub2(), WithOutMask(), stream);
}
template void vsub2<uint, uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vsub2<uint, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vsub2<int, uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vsub2<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T, typename D>
void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
{
@ -1496,90 +1189,28 @@ namespace arithm
namespace arithm
{
template <typename T, typename D> struct VAbsDiff4;
template <> struct VAbsDiff4<uint, uint> : binary_function<uint, uint, uint>
struct VAbsDiff4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
return vabsdiff4(a, b);
}
__device__ __forceinline__ VAbsDiff4() {}
__device__ __forceinline__ VAbsDiff4(const VAbsDiff4<uint, uint>& other) {}
};
template <> struct VAbsDiff4<int, int> : binary_function<int, int, int>
{
__device__ __forceinline__ int operator ()(int a, int b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vabsdiff4.s32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vabsdiff.s32.s32.s32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vabsdiff.s32.s32.s32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vabsdiff.s32.s32.s32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vabsdiff.s32.s32.s32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VAbsDiff4() {}
__device__ __forceinline__ VAbsDiff4(const VAbsDiff4<int, int>& other) {}
__device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {}
};
////////////////////////////////////
template <typename T, typename D> struct VAbsDiff2;
template <> struct VAbsDiff2<uint, uint> : binary_function<uint, uint, uint>
struct VAbsDiff2 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VAbsDiff2() {}
__device__ __forceinline__ VAbsDiff2(const VAbsDiff2<uint, uint>& other) {}
};
template <> struct VAbsDiff2<int, int> : binary_function<int, int, int>
{
__device__ __forceinline__ int operator ()(int a, int b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vabsdiff2.s32.s32.s32.sat %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vabsdiff.s32.s32.s32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vabsdiff.s32.s32.s32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
return vabsdiff2(a, b);
}
__device__ __forceinline__ VAbsDiff2() {}
__device__ __forceinline__ VAbsDiff2(const VAbsDiff2<int, int>& other) {}
__device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {}
};
////////////////////////////////////
@ -1611,13 +1242,13 @@ namespace arithm
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D> struct TransformFunctorTraits< arithm::VAbsDiff4<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <> struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
////////////////////////////////////
template <typename T, typename D> struct TransformFunctorTraits< arithm::VAbsDiff2<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <> struct TransformFunctorTraits< arithm::VAbsDiff2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
@ -1630,24 +1261,16 @@ namespace cv { namespace gpu { namespace device
namespace arithm
{
template <typename T>
void vabsDiff4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
void absDiffMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, VAbsDiff4<T, T>(), WithOutMask(), stream);
transform(src1, src2, dst, VAbsDiff4(), WithOutMask(), stream);
}
template void vabsDiff4<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vabsDiff4<int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T>
void vabsDiff2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
void absDiffMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, VAbsDiff2<T, T>(), WithOutMask(), stream);
transform(src1, src2, dst, VAbsDiff2(), WithOutMask(), stream);
}
template void vabsDiff2<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vabsDiff2<int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T>
void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
{
@ -1877,6 +1500,49 @@ namespace arithm
namespace arithm
{
struct VCmpEq4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vcmpeq4(a, b);
}
__device__ __forceinline__ VCmpEq4() {}
__device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {}
};
struct VCmpNe4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vcmpne4(a, b);
}
__device__ __forceinline__ VCmpNe4() {}
__device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {}
};
struct VCmpLt4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vcmplt4(a, b);
}
__device__ __forceinline__ VCmpLt4() {}
__device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {}
};
struct VCmpLe4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vcmple4(a, b);
}
__device__ __forceinline__ VCmpLe4() {}
__device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {}
};
////////////////////////////////////
template <class Op, typename T>
struct Cmp : binary_function<T, T, uchar>
{
@ -1890,6 +1556,21 @@ namespace arithm
namespace cv { namespace gpu { namespace device
{
template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
template <> struct TransformFunctorTraits< arithm::VCmpNe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
template <> struct TransformFunctorTraits< arithm::VCmpLt4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
template <> struct TransformFunctorTraits< arithm::VCmpLe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
////////////////////////////////////
template <class Op, typename T> struct TransformFunctorTraits< arithm::Cmp<Op, T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
{
};
@ -1897,6 +1578,23 @@ namespace cv { namespace gpu { namespace device
namespace arithm
{
void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream);
}
void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream);
}
void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream);
}
void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream);
}
template <template <typename> class Op, typename T>
void cmpMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
{
@ -2303,44 +2001,11 @@ namespace arithm
namespace arithm
{
template <typename T> struct VMin4;
template <> struct VMin4<uint> : binary_function<uint, uint, uint>
struct VMin4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VMin4() {}
__device__ __forceinline__ VMin4(const VMin4& other) {}
};
template <> struct VMin4<int> : binary_function<int, int, int>
{
__device__ __forceinline__ int operator ()(int a, int b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vmin4.s32.s32.s32 %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vmin.s32.s32.s32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmin.s32.s32.s32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmin.s32.s32.s32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmin.s32.s32.s32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
return vmin4(a, b);
}
__device__ __forceinline__ VMin4() {}
@ -2349,40 +2014,11 @@ namespace arithm
////////////////////////////////////
template <typename T> struct VMin2;
template <> struct VMin2<uint> : binary_function<uint, uint, uint>
struct VMin2 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VMin2() {}
__device__ __forceinline__ VMin2(const VMin2& other) {}
};
template <> struct VMin2<int> : binary_function<int, int, int>
{
__device__ __forceinline__ int operator ()(int a, int b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vmin2.s32.s32.s32 %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vmin.s32.s32.s32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmin.s32.s32.s32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
return vmin2(a, b);
}
__device__ __forceinline__ VMin2() {}
@ -2392,13 +2028,13 @@ namespace arithm
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< arithm::VMin4<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
template <> struct TransformFunctorTraits< arithm::VMin4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
////////////////////////////////////
template <typename T> struct TransformFunctorTraits< arithm::VMin2<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
template <> struct TransformFunctorTraits< arithm::VMin2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
@ -2415,14 +2051,14 @@ namespace cv { namespace gpu { namespace device
namespace arithm
{
template <typename T> void vmin4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
void minMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, VMin4<T>(), WithOutMask(), stream);
transform(src1, src2, dst, VMin4(), WithOutMask(), stream);
}
template <typename T> void vmin2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
void minMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, VMin2<T>(), WithOutMask(), stream);
transform(src1, src2, dst, VMin2(), WithOutMask(), stream);
}
template <typename T> void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
@ -2430,12 +2066,6 @@ namespace arithm
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, minimum<T>(), WithOutMask(), stream);
}
template void vmin4<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vmin4<int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vmin2<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vmin2<int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void minMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void minMat<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void minMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
@ -2463,44 +2093,11 @@ namespace arithm
namespace arithm
{
template <typename T> struct VMax4;
template <> struct VMax4<uint> : binary_function<uint, uint, uint>
struct VMax4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VMax4() {}
__device__ __forceinline__ VMax4(const VMax4& other) {}
};
template <> struct VMax4<int> : binary_function<int, int, int>
{
__device__ __forceinline__ int operator ()(int a, int b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vmax4.s32.s32.s32 %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vmax.s32.s32.s32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmax.s32.s32.s32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmax.s32.s32.s32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmax.s32.s32.s32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
return vmax4(a, b);
}
__device__ __forceinline__ VMax4() {}
@ -2509,40 +2106,11 @@ namespace arithm
////////////////////////////////////
template <typename T> struct VMax2;
template <> struct VMax2<uint> : binary_function<uint, uint, uint>
struct VMax2 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
uint res = 0;
#if __CUDA_ARCH__ >= 300
asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
}
__device__ __forceinline__ VMax2() {}
__device__ __forceinline__ VMax2(const VMax2& other) {}
};
template <> struct VMax2<int> : binary_function<int, int, int>
{
__device__ __forceinline__ int operator ()(int a, int b) const
{
int res = 0;
#if __CUDA_ARCH__ >= 300
asm("vmax2.s32.s32.s32 %0, %1, %2, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#elif __CUDA_ARCH__ >= 200
asm("vmax.s32.s32.s32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
asm("vmax.s32.s32.s32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(res) : "r"(a), "r"(b), "r"(res));
#endif
return res;
return vmax2(a, b);
}
__device__ __forceinline__ VMax2() {}
@ -2552,13 +2120,13 @@ namespace arithm
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< arithm::VMax4<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
template <> struct TransformFunctorTraits< arithm::VMax4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
////////////////////////////////////
template <typename T> struct TransformFunctorTraits< arithm::VMax2<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
template <> struct TransformFunctorTraits< arithm::VMax2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
@ -2575,14 +2143,14 @@ namespace cv { namespace gpu { namespace device
namespace arithm
{
template <typename T> void vmax4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
void maxMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, VMax4<T>(), WithOutMask(), stream);
transform(src1, src2, dst, VMax4(), WithOutMask(), stream);
}
template <typename T> void vmax2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
void maxMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, VMax2<T>(), WithOutMask(), stream);
transform(src1, src2, dst, VMax2(), WithOutMask(), stream);
}
template <typename T> void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
@ -2590,12 +2158,6 @@ namespace arithm
transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, maximum<T>(), WithOutMask(), stream);
}
template void vmax4<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vmax4<int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vmax2<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void vmax2<int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void maxMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void maxMat<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void maxMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);

@ -263,11 +263,8 @@ namespace
namespace arithm
{
template <typename T, typename D>
void vadd4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T, typename D>
void vadd2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
void addMat_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
void addMat_v2(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
template <typename T, typename D>
void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
@ -345,62 +342,6 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu
}
};
typedef void (*vfunc_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
static const vfunc_t vfuncs4[4][4] =
{
{
vadd4<unsigned int, unsigned int>,
vadd4<unsigned int, int>,
0,
0
},
{
vadd4<int, unsigned int>,
vadd4<int, int>,
0,
0
},
{
0,
0,
0,
0
},
{
0,
0,
0,
0
}
};
static const vfunc_t vfuncs2[4][4] =
{
{
0,
0,
0,
0
},
{
0,
0,
0,
0
},
{
0,
0,
vadd2<unsigned int, unsigned int>,
vadd2<unsigned int, int>
},
{
0,
0,
vadd2<int, unsigned int>,
vadd2<int, int>
}
};
if (dtype < 0)
dtype = src1.depth();
@ -426,7 +367,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu
PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
if (mask.empty() && sdepth < CV_32S && ddepth < CV_32S)
if (mask.empty() && (sdepth == CV_8U || sdepth == CV_16U) && ddepth == sdepth)
{
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
@ -434,31 +375,27 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
if (deviceSupports(FEATURE_SET_COMPUTE_20) && isAllAligned)
if (isAllAligned)
{
const vfunc_t vfunc4 = vfuncs4[sdepth][ddepth];
const vfunc_t vfunc2 = vfuncs2[sdepth][ddepth];
if (vfunc4 != 0 && (src1_.cols & 3) == 0)
if (sdepth == CV_8U && (src1_.cols & 3) == 0)
{
const int vcols = src1_.cols >> 2;
vfunc4(PtrStepSzb(src1_.rows, vcols, src1_.data, src1_.step),
PtrStepSzb(src1_.rows, vcols, src2_.data, src2_.step),
PtrStepSzb(src1_.rows, vcols, dst_.data, dst_.step),
stream);
addMat_v4(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
if (vfunc2 != 0 && (src1_.cols & 1) == 0)
else if (sdepth == CV_16U && (src1_.cols & 1) == 0)
{
const int vcols = src1_.cols >> 1;
vfunc2(PtrStepSzb(src1_.rows, vcols, src1_.data, src1_.step),
PtrStepSzb(src1_.rows, vcols, src2_.data, src2_.step),
PtrStepSzb(src1_.rows, vcols, dst_.data, dst_.step),
stream);
addMat_v2(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
@ -606,11 +543,8 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
namespace arithm
{
template <typename T, typename D>
void vsub4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T, typename D>
void vsub2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
void subMat_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
void subMat_v2(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
template <typename T, typename D>
void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
@ -688,62 +622,6 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
}
};
typedef void (*vfunc_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
static const vfunc_t vfuncs4[4][4] =
{
{
vsub4<unsigned int, unsigned int>,
vsub4<unsigned int, int>,
0,
0
},
{
vsub4<int, unsigned int>,
vsub4<int, int>,
0,
0
},
{
0,
0,
0,
0
},
{
0,
0,
0,
0
}
};
static const vfunc_t vfuncs2[4][4] =
{
{
0,
0,
0,
0
},
{
0,
0,
0,
0
},
{
0,
0,
vsub2<unsigned int, unsigned int>,
vsub2<unsigned int, int>
},
{
0,
0,
vsub2<int, unsigned int>,
vsub2<int, int>
}
};
if (dtype < 0)
dtype = src1.depth();
@ -769,7 +647,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
if (mask.empty() && sdepth < CV_32S && ddepth < CV_32S)
if (mask.empty() && (sdepth == CV_8U || sdepth == CV_16U) && ddepth == sdepth)
{
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
@ -777,31 +655,27 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
if (deviceSupports(FEATURE_SET_COMPUTE_20) && isAllAligned)
if (isAllAligned)
{
const vfunc_t vfunc4 = vfuncs4[sdepth][ddepth];
const vfunc_t vfunc2 = vfuncs2[sdepth][ddepth];
if (vfunc4 != 0 && (src1_.cols & 3) == 0)
if (sdepth == CV_8U && (src1_.cols & 3) == 0)
{
const int vcols = src1_.cols >> 2;
vfunc4(PtrStepSzb(src1_.rows, vcols, src1_.data, src1_.step),
PtrStepSzb(src1_.rows, vcols, src2_.data, src2_.step),
PtrStepSzb(src1_.rows, vcols, dst_.data, dst_.step),
stream);
subMat_v4(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
if (vfunc2 != 0 && (src1_.cols & 1) == 0)
else if (sdepth == CV_16U && (src1_.cols & 1) == 0)
{
const int vcols = src1_.cols >> 1;
vfunc2(PtrStepSzb(src1_.rows, vcols, src1_.data, src1_.step),
PtrStepSzb(src1_.rows, vcols, src2_.data, src2_.step),
PtrStepSzb(src1_.rows, vcols, dst_.data, dst_.step),
stream);
subMat_v2(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
@ -1585,11 +1459,8 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St
namespace arithm
{
template <typename T>
void vabsDiff4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T>
void vabsDiff2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
void absDiffMat_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
void absDiffMat_v2(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
template <typename T>
void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
@ -1610,20 +1481,6 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
absDiffMat<float>,
absDiffMat<double>
};
static const func_t vfuncs4[] =
{
vabsDiff4<unsigned int>,
vabsDiff4<int>,
0,
0
};
static const func_t vfuncs2[] =
{
0,
0,
vabsDiff2<unsigned int>,
vabsDiff2<int>
};
const int depth = src1.depth();
const int cn = src1.channels();
@ -1645,7 +1502,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
if (depth < CV_32S)
if (depth == CV_8U || depth == CV_16U)
{
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
@ -1653,31 +1510,27 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
if (deviceSupports(FEATURE_SET_COMPUTE_20) && isAllAligned)
if (isAllAligned)
{
const func_t vfunc4 = vfuncs4[depth];
const func_t vfunc2 = vfuncs2[depth];
if (vfunc4 != 0 && (src1_.cols & 3) == 0)
if (depth == CV_8U && (src1_.cols & 3) == 0)
{
const int vcols = src1_.cols >> 2;
vfunc4(PtrStepSzb(src1_.rows, vcols, src1_.data, src1_.step),
PtrStepSzb(src1_.rows, vcols, src2_.data, src2_.step),
PtrStepSzb(src1_.rows, vcols, dst_.data, dst_.step),
stream);
absDiffMat_v4(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
if (vfunc2 != 0 && (src1_.cols & 1) == 0)
else if (depth == CV_16U && (src1_.cols & 1) == 0)
{
const int vcols = src1_.cols >> 1;
vfunc2(PtrStepSzb(src1_.rows, vcols, src1_.data, src1_.step),
PtrStepSzb(src1_.rows, vcols, src2_.data, src2_.step),
PtrStepSzb(src1_.rows, vcols, dst_.data, dst_.step),
stream);
absDiffMat_v2(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
@ -1940,6 +1793,11 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream)
namespace arithm
{
void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T> void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T> void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
@ -1962,6 +1820,12 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
{cmpMatEq<double> , cmpMatNe<double> , cmpMatLt<double> , cmpMatLe<double> }
};
typedef void (*func_v4_t)(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream);
static const func_v4_t funcs_v4[] =
{
cmpMatEq_v4, cmpMatNe_v4, cmpMatLt_v4, cmpMatLe_v4
};
const int depth = src1.depth();
const int cn = src1.channels();
@ -1997,6 +1861,27 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
PtrStepSzb src2_(src1.rows, src1.cols * cn, psrc2[cmpop]->data, psrc2[cmpop]->step);
PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
if (depth == CV_8U && (src1_.cols & 3) == 0)
{
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
if (isAllAligned)
{
const int vcols = src1_.cols >> 2;
funcs_v4[code](PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
}
const func_t func = funcs[depth][code];
func(src1_, src2_, dst_, stream);
@ -2532,13 +2417,13 @@ void cv::gpu::lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& st
namespace arithm
{
template <typename T> void vmin4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T> void vmin2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
void minMat_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
void minMat_v2(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
template <typename T> void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T> void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T> void vmax4(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T> void vmax2(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
void maxMat_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
void maxMat_v2(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
template <typename T> void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template <typename T> void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
}
@ -2558,20 +2443,6 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
minMat<float>,
minMat<double>
};
static const func_t vfuncs4[] =
{
vmin4<unsigned int>,
vmin4<int>,
0,
0
};
static const func_t vfuncs2[] =
{
0,
0,
vmin2<unsigned int>,
vmin2<int>
};
const int depth = src1.depth();
const int cn = src1.channels();
@ -2593,7 +2464,7 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
if (depth < CV_32S)
if (depth == CV_8U || depth == CV_16U)
{
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
@ -2601,31 +2472,27 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
if (deviceSupports(FEATURE_SET_COMPUTE_20) && isAllAligned)
if (isAllAligned)
{
const func_t vfunc4 = vfuncs4[depth];
const func_t vfunc2 = vfuncs2[depth];
if (vfunc4 != 0 && (src1_.cols & 3) == 0)
if (depth == CV_8U && (src1_.cols & 3) == 0)
{
const int vcols = src1_.cols >> 2;
vfunc4(PtrStepSzb(src1_.rows, vcols, src1_.data, src1_.step),
PtrStepSzb(src1_.rows, vcols, src2_.data, src2_.step),
PtrStepSzb(src1_.rows, vcols, dst_.data, dst_.step),
stream);
minMat_v4(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
if (vfunc2 != 0 && (src1_.cols & 1) == 0)
else if (depth == CV_16U && (src1_.cols & 1) == 0)
{
const int vcols = src1_.cols >> 1;
vfunc2(PtrStepSzb(src1_.rows, vcols, src1_.data, src1_.step),
PtrStepSzb(src1_.rows, vcols, src2_.data, src2_.step),
PtrStepSzb(src1_.rows, vcols, dst_.data, dst_.step),
stream);
minMat_v2(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
@ -2655,20 +2522,6 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
maxMat<float>,
maxMat<double>
};
static const func_t vfuncs4[] =
{
vmax4<unsigned int>,
vmax4<int>,
0,
0
};
static const func_t vfuncs2[] =
{
0,
0,
vmax2<unsigned int>,
vmax2<int>
};
const int depth = src1.depth();
const int cn = src1.channels();
@ -2690,7 +2543,7 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
if (depth < CV_32S)
if (depth == CV_8U || depth == CV_16U)
{
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
@ -2698,31 +2551,27 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
if (deviceSupports(FEATURE_SET_COMPUTE_20) && isAllAligned)
if (isAllAligned)
{
const func_t vfunc4 = vfuncs4[depth];
const func_t vfunc2 = vfuncs2[depth];
if (vfunc4 != 0 && (src1_.cols & 3) == 0)
if (depth == CV_8U && (src1_.cols & 3) == 0)
{
const int vcols = src1_.cols >> 2;
vfunc4(PtrStepSzb(src1_.rows, vcols, src1_.data, src1_.step),
PtrStepSzb(src1_.rows, vcols, src2_.data, src2_.step),
PtrStepSzb(src1_.rows, vcols, dst_.data, dst_.step),
stream);
maxMat_v4(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}
if (vfunc2 != 0 && (src1_.cols & 1) == 0)
else if (depth == CV_16U && (src1_.cols & 1) == 0)
{
const int vcols = src1_.cols >> 1;
vfunc2(PtrStepSzb(src1_.rows, vcols, src1_.data, src1_.step),
PtrStepSzb(src1_.rows, vcols, src2_.data, src2_.step),
PtrStepSzb(src1_.rows, vcols, dst_.data, dst_.step),
stream);
maxMat_v2(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
stream);
return;
}

@ -49,71 +49,6 @@ using namespace cv::gpu;
using namespace cvtest;
using namespace testing;
void printOsInfo()
{
#if defined _WIN32
# if defined _WIN64
cout << "OS: Windows x64 \n" << endl;
# else
cout << "OS: Windows x32 \n" << endl;
# endif
#elif defined linux
# if defined _LP64
cout << "OS: Linux x64 \n" << endl;
# else
cout << "OS: Linux x32 \n" << endl;
# endif
#elif defined __APPLE__
# if defined _LP64
cout << "OS: Apple x64 \n" << endl;
# else
cout << "OS: Apple x32 \n" << endl;
# endif
#endif
}
void printCudaInfo()
{
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
cout << "OpenCV was built without CUDA support \n" << endl;
#else
int driver;
cudaDriverGetVersion(&driver);
cout << "CUDA Driver version: " << driver << '\n';
cout << "CUDA Runtime version: " << CUDART_VERSION << '\n';
cout << endl;
cout << "GPU module was compiled for the following GPU archs:" << endl;
cout << " BIN: " << CUDA_ARCH_BIN << '\n';
cout << " PTX: " << CUDA_ARCH_PTX << '\n';
cout << endl;
int deviceCount = getCudaEnabledDeviceCount();
cout << "CUDA device count: " << deviceCount << '\n';
cout << endl;
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
cout << "Device [" << i << "] \n";
cout << "\t Name: " << info.name() << '\n';
cout << "\t Compute capability: " << info.majorVersion() << '.' << info.minorVersion()<< '\n';
cout << "\t Multi Processor Count: " << info.multiProcessorCount() << '\n';
cout << "\t Total memory: " << static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0) << " Mb \n";
cout << "\t Free memory: " << static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0) << " Mb \n";
if (!info.isCompatible())
cout << "\t !!! This device is NOT compatible with current GPU module build \n";
cout << endl;
}
#endif
}
int main(int argc, char** argv)
{
try
@ -133,7 +68,6 @@ int main(int argc, char** argv)
return 0;
}
printOsInfo();
printCudaInfo();
if (cmd.has("info"))

@ -43,9 +43,25 @@
#ifdef HAVE_CUDA
using namespace cvtest;
#if defined(HAVE_XINE) || \
defined(HAVE_GSTREAMER) || \
defined(HAVE_QUICKTIME) || \
defined(HAVE_AVFOUNDATION) || \
defined(HAVE_FFMPEG) || \
defined(WIN32) /* assume that we have ffmpeg */
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
#else
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
#endif
//////////////////////////////////////////////////////
// FGDStatModel
#if BUILD_WITH_VIDEO_INPUT_SUPPORT
namespace cv
{
template<> void Ptr<CvBGStatModel>::delete_obj()
@ -130,9 +146,13 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, FGDStatModel, testing::Combine(
testing::Values(std::string("768x576.avi")),
testing::Values(Channels(3), Channels(4))));
#endif
//////////////////////////////////////////////////////
// MOG
#if BUILD_WITH_VIDEO_INPUT_SUPPORT
namespace
{
IMPLEMENT_PARAM_CLASS(UseGray, bool)
@ -204,9 +224,13 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, MOG, testing::Combine(
testing::Values(LearningRate(0.0), LearningRate(0.01)),
WHOLE_SUBMAT));
#endif
//////////////////////////////////////////////////////
// MOG2
#if BUILD_WITH_VIDEO_INPUT_SUPPORT
namespace
{
IMPLEMENT_PARAM_CLASS(DetectShadow, bool)
@ -320,46 +344,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, MOG2, testing::Combine(
testing::Values(DetectShadow(true), DetectShadow(false)),
WHOLE_SUBMAT));
//////////////////////////////////////////////////////
// VIBE
PARAM_TEST_CASE(VIBE, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
{
};
GPU_TEST_P(VIBE, Accuracy)
{
const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
const cv::Size size = GET_PARAM(1);
const int type = GET_PARAM(2);
const bool useRoi = GET_PARAM(3);
const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255));
cv::Mat frame = randomMat(size, type, 0.0, 100);
cv::gpu::GpuMat d_frame = loadMat(frame, useRoi);
cv::gpu::VIBE_GPU vibe;
cv::gpu::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi);
vibe.initialize(d_frame);
for (int i = 0; i < 20; ++i)
vibe(d_frame, d_fgmask);
frame = randomMat(size, type, 160, 255);
d_frame = loadMat(frame, useRoi);
vibe(d_frame, d_fgmask);
// now fgmask should be entirely foreground
ASSERT_MAT_NEAR(fullfg, d_fgmask, 0);
}
INSTANTIATE_TEST_CASE_P(GPU_Video, VIBE, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4)),
WHOLE_SUBMAT));
#endif
//////////////////////////////////////////////////////
// GMG

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
//////////////////////////////////////////////////////////////////////////
// StereoBM

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
///////////////////////////////////////////////////////////////////////////////////////////////////////
// cvtColor
@ -2218,12 +2220,245 @@ GPU_TEST_P(CvtColor, BayerGR2BGR4)
EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst3(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 0);
}
GPU_TEST_P(CvtColor, BayerBG2Gray)
{
if ((depth != CV_8U && depth != CV_16U) || useRoi)
return;
cv::Mat src = randomMat(size, depth);
cv::gpu::GpuMat dst;
cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerBG2GRAY);
cv::Mat dst_gold;
cv::cvtColor(src, dst_gold, cv::COLOR_BayerBG2GRAY);
EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2);
}
GPU_TEST_P(CvtColor, BayerGB2Gray)
{
if ((depth != CV_8U && depth != CV_16U) || useRoi)
return;
cv::Mat src = randomMat(size, depth);
cv::gpu::GpuMat dst;
cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGB2GRAY);
cv::Mat dst_gold;
cv::cvtColor(src, dst_gold, cv::COLOR_BayerGB2GRAY);
EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2);
}
GPU_TEST_P(CvtColor, BayerRG2Gray)
{
if ((depth != CV_8U && depth != CV_16U) || useRoi)
return;
cv::Mat src = randomMat(size, depth);
cv::gpu::GpuMat dst;
cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerRG2GRAY);
cv::Mat dst_gold;
cv::cvtColor(src, dst_gold, cv::COLOR_BayerRG2GRAY);
EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2);
}
GPU_TEST_P(CvtColor, BayerGR2Gray)
{
if ((depth != CV_8U && depth != CV_16U) || useRoi)
return;
cv::Mat src = randomMat(size, depth);
cv::gpu::GpuMat dst;
cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGR2GRAY);
cv::Mat dst_gold;
cv::cvtColor(src, dst_gold, cv::COLOR_BayerGR2GRAY);
EXPECT_MAT_NEAR(dst_gold(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), dst(cv::Rect(1, 1, dst.cols - 2, dst.rows - 2)), 2);
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CvtColor, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)),
WHOLE_SUBMAT));
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Demosaicing
struct Demosaicing : testing::TestWithParam<cv::gpu::DeviceInfo>
{
cv::gpu::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
}
static void mosaic(const cv::Mat_<cv::Vec3b>& src, cv::Mat_<uchar>& dst, cv::Point firstRed)
{
dst.create(src.size());
for (int y = 0; y < src.rows; ++y)
{
for (int x = 0; x < src.cols; ++x)
{
cv::Vec3b pix = src(y, x);
cv::Point alternate;
alternate.x = (x + firstRed.x) % 2;
alternate.y = (y + firstRed.y) % 2;
if (alternate.y == 0)
{
if (alternate.x == 0)
{
// RG
// GB
dst(y, x) = pix[2];
}
else
{
// GR
// BG
dst(y, x) = pix[1];
}
}
else
{
if (alternate.x == 0)
{
// GB
// RG
dst(y, x) = pix[1];
}
else
{
// BG
// GR
dst(y, x) = pix[0];
}
}
}
}
}
};
GPU_TEST_P(Demosaicing, BayerBG2BGR)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(1, 1));
cv::gpu::GpuMat dst;
cv::gpu::demosaicing(loadMat(src), dst, cv::COLOR_BayerBG2BGR);
EXPECT_MAT_SIMILAR(img, dst, 2e-2);
}
GPU_TEST_P(Demosaicing, BayerGB2BGR)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(0, 1));
cv::gpu::GpuMat dst;
cv::gpu::demosaicing(loadMat(src), dst, cv::COLOR_BayerGB2BGR);
EXPECT_MAT_SIMILAR(img, dst, 2e-2);
}
GPU_TEST_P(Demosaicing, BayerRG2BGR)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(0, 0));
cv::gpu::GpuMat dst;
cv::gpu::demosaicing(loadMat(src), dst, cv::COLOR_BayerRG2BGR);
EXPECT_MAT_SIMILAR(img, dst, 2e-2);
}
GPU_TEST_P(Demosaicing, BayerGR2BGR)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(1, 0));
cv::gpu::GpuMat dst;
cv::gpu::demosaicing(loadMat(src), dst, cv::COLOR_BayerGR2BGR);
EXPECT_MAT_SIMILAR(img, dst, 2e-2);
}
GPU_TEST_P(Demosaicing, BayerBG2BGR_MHT)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(1, 1));
cv::gpu::GpuMat dst;
cv::gpu::demosaicing(loadMat(src), dst, cv::gpu::COLOR_BayerBG2BGR_MHT);
EXPECT_MAT_SIMILAR(img, dst, 5e-3);
}
GPU_TEST_P(Demosaicing, BayerGB2BGR_MHT)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(0, 1));
cv::gpu::GpuMat dst;
cv::gpu::demosaicing(loadMat(src), dst, cv::gpu::COLOR_BayerGB2BGR_MHT);
EXPECT_MAT_SIMILAR(img, dst, 5e-3);
}
GPU_TEST_P(Demosaicing, BayerRG2BGR_MHT)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(0, 0));
cv::gpu::GpuMat dst;
cv::gpu::demosaicing(loadMat(src), dst, cv::gpu::COLOR_BayerRG2BGR_MHT);
EXPECT_MAT_SIMILAR(img, dst, 5e-3);
}
GPU_TEST_P(Demosaicing, BayerGR2BGR_MHT)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(1, 0));
cv::gpu::GpuMat dst;
cv::gpu::demosaicing(loadMat(src), dst, cv::gpu::COLOR_BayerGR2BGR_MHT);
EXPECT_MAT_SIMILAR(img, dst, 5e-3);
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Demosaicing, ALL_DEVICES);
///////////////////////////////////////////////////////////////////////////////////////////////////////
// swapChannels

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
namespace
{
IMPLEMENT_PARAM_CLASS(Border, int)

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
////////////////////////////////////////////////////////////////////////////////
// Merge

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
////////////////////////////////////////////////////////
// BilateralFilter

@ -43,306 +43,7 @@
#ifdef HAVE_CUDA
namespace
{
bool keyPointsEquals(const cv::KeyPoint& p1, const cv::KeyPoint& p2)
{
const double maxPtDif = 1.0;
const double maxSizeDif = 1.0;
const double maxAngleDif = 2.0;
const double maxResponseDif = 0.1;
double dist = cv::norm(p1.pt - p2.pt);
if (dist < maxPtDif &&
fabs(p1.size - p2.size) < maxSizeDif &&
abs(p1.angle - p2.angle) < maxAngleDif &&
abs(p1.response - p2.response) < maxResponseDif &&
p1.octave == p2.octave &&
p1.class_id == p2.class_id)
{
return true;
}
return false;
}
struct KeyPointLess : std::binary_function<cv::KeyPoint, cv::KeyPoint, bool>
{
bool operator()(const cv::KeyPoint& kp1, const cv::KeyPoint& kp2) const
{
return kp1.pt.y < kp2.pt.y || (kp1.pt.y == kp2.pt.y && kp1.pt.x < kp2.pt.x);
}
};
testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char* actual_expr, std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual)
{
if (gold.size() != actual.size())
{
return testing::AssertionFailure() << "KeyPoints size mistmach\n"
<< "\"" << gold_expr << "\" : " << gold.size() << "\n"
<< "\"" << actual_expr << "\" : " << actual.size();
}
std::sort(actual.begin(), actual.end(), KeyPointLess());
std::sort(gold.begin(), gold.end(), KeyPointLess());
for (size_t i = 0; i < gold.size(); ++i)
{
const cv::KeyPoint& p1 = gold[i];
const cv::KeyPoint& p2 = actual[i];
if (!keyPointsEquals(p1, p2))
{
return testing::AssertionFailure() << "KeyPoints differ at " << i << "\n"
<< "\"" << gold_expr << "\" vs \"" << actual_expr << "\" : \n"
<< "pt : " << testing::PrintToString(p1.pt) << " vs " << testing::PrintToString(p2.pt) << "\n"
<< "size : " << p1.size << " vs " << p2.size << "\n"
<< "angle : " << p1.angle << " vs " << p2.angle << "\n"
<< "response : " << p1.response << " vs " << p2.response << "\n"
<< "octave : " << p1.octave << " vs " << p2.octave << "\n"
<< "class_id : " << p1.class_id << " vs " << p2.class_id;
}
}
return ::testing::AssertionSuccess();
}
#define ASSERT_KEYPOINTS_EQ(gold, actual) EXPECT_PRED_FORMAT2(assertKeyPointsEquals, gold, actual);
int getMatchedPointsCount(std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual)
{
std::sort(actual.begin(), actual.end(), KeyPointLess());
std::sort(gold.begin(), gold.end(), KeyPointLess());
int validCount = 0;
for (size_t i = 0; i < gold.size(); ++i)
{
const cv::KeyPoint& p1 = gold[i];
const cv::KeyPoint& p2 = actual[i];
if (keyPointsEquals(p1, p2))
++validCount;
}
return validCount;
}
int getMatchedPointsCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches)
{
int validCount = 0;
for (size_t i = 0; i < matches.size(); ++i)
{
const cv::DMatch& m = matches[i];
const cv::KeyPoint& p1 = keypoints1[m.queryIdx];
const cv::KeyPoint& p2 = keypoints2[m.trainIdx];
if (keyPointsEquals(p1, p2))
++validCount;
}
return validCount;
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// SURF
namespace
{
IMPLEMENT_PARAM_CLASS(SURF_HessianThreshold, double)
IMPLEMENT_PARAM_CLASS(SURF_Octaves, int)
IMPLEMENT_PARAM_CLASS(SURF_OctaveLayers, int)
IMPLEMENT_PARAM_CLASS(SURF_Extended, bool)
IMPLEMENT_PARAM_CLASS(SURF_Upright, bool)
}
PARAM_TEST_CASE(SURF, cv::gpu::DeviceInfo, SURF_HessianThreshold, SURF_Octaves, SURF_OctaveLayers, SURF_Extended, SURF_Upright)
{
cv::gpu::DeviceInfo devInfo;
double hessianThreshold;
int nOctaves;
int nOctaveLayers;
bool extended;
bool upright;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
hessianThreshold = GET_PARAM(1);
nOctaves = GET_PARAM(2);
nOctaveLayers = GET_PARAM(3);
extended = GET_PARAM(4);
upright = GET_PARAM(5);
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(SURF, Detector)
{
cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
cv::gpu::SURF_GPU surf;
surf.hessianThreshold = hessianThreshold;
surf.nOctaves = nOctaves;
surf.nOctaveLayers = nOctaveLayers;
surf.extended = extended;
surf.upright = upright;
surf.keypointsRatio = 0.05f;
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, cv::noArray(), keypoints_gold);
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
EXPECT_GT(matchedRatio, 0.95);
}
}
GPU_TEST_P(SURF, Detector_Masked)
{
cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1));
mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
cv::gpu::SURF_GPU surf;
surf.hessianThreshold = hessianThreshold;
surf.nOctaves = nOctaves;
surf.nOctaveLayers = nOctaveLayers;
surf.extended = extended;
surf.upright = upright;
surf.keypointsRatio = 0.05f;
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), loadMat(mask), keypoints);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), loadMat(mask), keypoints);
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, mask, keypoints_gold);
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
EXPECT_GT(matchedRatio, 0.95);
}
}
GPU_TEST_P(SURF, Descriptor)
{
cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
cv::gpu::SURF_GPU surf;
surf.hessianThreshold = hessianThreshold;
surf.nOctaves = nOctaves;
surf.nOctaveLayers = nOctaveLayers;
surf.extended = extended;
surf.upright = upright;
surf.keypointsRatio = 0.05f;
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
cv::gpu::GpuMat descriptors;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
surf_gold(image, cv::noArray(), keypoints);
cv::gpu::GpuMat descriptors;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true);
cv::Mat descriptors_gold;
surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
cv::BFMatcher matcher(cv::NORM_L2);
std::vector<cv::DMatch> matches;
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches);
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
EXPECT_GT(matchedRatio, 0.6);
}
}
INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine(
ALL_DEVICES,
testing::Values(SURF_HessianThreshold(100.0), SURF_HessianThreshold(500.0), SURF_HessianThreshold(1000.0)),
testing::Values(SURF_Octaves(3), SURF_Octaves(4)),
testing::Values(SURF_OctaveLayers(2), SURF_OctaveLayers(3)),
testing::Values(SURF_Extended(false), SURF_Extended(true)),
testing::Values(SURF_Upright(false), SURF_Upright(true))));
using namespace cvtest;
/////////////////////////////////////////////////////////////////////////////////////////////////
// FAST

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
namespace
{
IMPLEMENT_PARAM_CLASS(KSize, cv::Size)

@ -44,6 +44,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
////////////////////////////////////////////////////////////////////////////////
// SetTo

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
///////////////////////////////////////////////////////////////////////////////////////////////////////
// HoughLines

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Integral

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
//#define DUMP
struct HOG : testing::TestWithParam<cv::gpu::DeviceInfo>, cv::gpu::HOGDescriptor

@ -43,6 +43,8 @@
#if defined(HAVE_CUDA) && defined(HAVE_OPENGL)
using namespace cvtest;
/////////////////////////////////////////////
// Buffer

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
//////////////////////////////////////////////////////
// BroxOpticalFlow

@ -76,11 +76,10 @@
#include "opencv2/imgproc.hpp"
#include "opencv2/video.hpp"
#include "opencv2/ts.hpp"
#include "opencv2/ts/gpu_test.hpp"
#include "opencv2/gpu.hpp"
#include "opencv2/nonfree.hpp"
#include "opencv2/legacy.hpp"
#include "utility.hpp"
#include "interpolation.hpp"
#include "main_test_nvidia.h"
#endif

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
////////////////////////////////////////////////////////
// pyrDown

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
///////////////////////////////////////////////////////////////////
// Gold implementation

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
///////////////////////////////////////////////////////////////////
// Gold implementation

@ -44,6 +44,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
#if CUDA_VERSION >= 5000
struct Async : testing::TestWithParam<cv::gpu::DeviceInfo>

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
#define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
namespace
{
cv::Mat createTransfomMatrix(cv::Size srcSize, double angle)

@ -43,6 +43,8 @@
#ifdef HAVE_CUDA
using namespace cvtest;
namespace
{
cv::Mat createTransfomMatrix(cv::Size srcSize, double angle)

@ -1,407 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
using namespace std;
using namespace cv;
using namespace cv::gpu;
using namespace cvtest;
using namespace testing;
using namespace testing::internal;
//////////////////////////////////////////////////////////////////////
// random generators
int randomInt(int minVal, int maxVal)
{
RNG& rng = TS::ptr()->get_rng();
return rng.uniform(minVal, maxVal);
}
double randomDouble(double minVal, double maxVal)
{
RNG& rng = TS::ptr()->get_rng();
return rng.uniform(minVal, maxVal);
}
Size randomSize(int minVal, int maxVal)
{
return Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal));
}
Scalar randomScalar(double minVal, double maxVal)
{
return Scalar(randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal));
}
Mat randomMat(Size size, int type, double minVal, double maxVal)
{
return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false);
}
//////////////////////////////////////////////////////////////////////
// GpuMat create
GpuMat createMat(Size size, int type, bool useRoi)
{
Size size0 = size;
if (useRoi)
{
size0.width += randomInt(5, 15);
size0.height += randomInt(5, 15);
}
GpuMat d_m(size0, type);
if (size0 != size)
d_m = d_m(Rect((size0.width - size.width) / 2, (size0.height - size.height) / 2, size.width, size.height));
return d_m;
}
GpuMat loadMat(const Mat& m, bool useRoi)
{
GpuMat d_m = createMat(m.size(), m.type(), useRoi);
d_m.upload(m);
return d_m;
}
//////////////////////////////////////////////////////////////////////
// Image load
Mat readImage(const std::string& fileName, int flags)
{
return imread(TS::ptr()->get_data_path() + fileName, flags);
}
Mat readImageType(const std::string& fname, int type)
{
Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
if (CV_MAT_CN(type) == 4)
{
Mat temp;
cvtColor(src, temp, COLOR_BGR2BGRA);
swap(src, temp);
}
src.convertTo(src, CV_MAT_DEPTH(type), CV_MAT_DEPTH(type) == CV_32F ? 1.0 / 255.0 : 1.0);
return src;
}
//////////////////////////////////////////////////////////////////////
// Gpu devices
bool supportFeature(const DeviceInfo& info, FeatureSet feature)
{
return TargetArchs::builtWith(feature) && info.supports(feature);
}
DeviceManager& DeviceManager::instance()
{
static DeviceManager obj;
return obj;
}
void DeviceManager::load(int i)
{
devices_.clear();
devices_.reserve(1);
std::ostringstream msg;
if (i < 0 || i >= getCudaEnabledDeviceCount())
{
msg << "Incorrect device number - " << i;
throw runtime_error(msg.str());
}
DeviceInfo info(i);
if (!info.isCompatible())
{
msg << "Device " << i << " [" << info.name() << "] is NOT compatible with current GPU module build";
throw runtime_error(msg.str());
}
devices_.push_back(info);
}
void DeviceManager::loadAll()
{
int deviceCount = getCudaEnabledDeviceCount();
devices_.clear();
devices_.reserve(deviceCount);
for (int i = 0; i < deviceCount; ++i)
{
DeviceInfo info(i);
if (info.isCompatible())
{
devices_.push_back(info);
}
}
}
//////////////////////////////////////////////////////////////////////
// Additional assertion
namespace
{
template <typename T, typename OutT> std::string printMatValImpl(const Mat& m, Point p)
{
const int cn = m.channels();
std::ostringstream ostr;
ostr << "(";
p.x /= cn;
ostr << static_cast<OutT>(m.at<T>(p.y, p.x * cn));
for (int c = 1; c < m.channels(); ++c)
{
ostr << ", " << static_cast<OutT>(m.at<T>(p.y, p.x * cn + c));
}
ostr << ")";
return ostr.str();
}
std::string printMatVal(const Mat& m, Point p)
{
typedef std::string (*func_t)(const Mat& m, Point p);
static const func_t funcs[] =
{
printMatValImpl<uchar, int>, printMatValImpl<schar, int>, printMatValImpl<ushort, int>, printMatValImpl<short, int>,
printMatValImpl<int, int>, printMatValImpl<float, float>, printMatValImpl<double, double>
};
return funcs[m.depth()](m, p);
}
}
void minMaxLocGold(const Mat& src, double* minVal_, double* maxVal_, Point* minLoc_, Point* maxLoc_, const Mat& mask)
{
if (src.depth() != CV_8S)
{
minMaxLoc(src, minVal_, maxVal_, minLoc_, maxLoc_, mask);
return;
}
// OpenCV's minMaxLoc doesn't support CV_8S type
double minVal = numeric_limits<double>::max();
Point minLoc(-1, -1);
double maxVal = -numeric_limits<double>::max();
Point maxLoc(-1, -1);
for (int y = 0; y < src.rows; ++y)
{
const schar* src_row = src.ptr<schar>(y);
const uchar* mask_row = mask.empty() ? 0 : mask.ptr<uchar>(y);
for (int x = 0; x < src.cols; ++x)
{
if (!mask_row || mask_row[x])
{
schar val = src_row[x];
if (val < minVal)
{
minVal = val;
minLoc = cv::Point(x, y);
}
if (val > maxVal)
{
maxVal = val;
maxLoc = cv::Point(x, y);
}
}
}
}
if (minVal_) *minVal_ = minVal;
if (maxVal_) *maxVal_ = maxVal;
if (minLoc_) *minLoc_ = minLoc;
if (maxLoc_) *maxLoc_ = maxLoc;
}
Mat getMat(InputArray arr)
{
if (arr.kind() == _InputArray::GPU_MAT)
{
Mat m;
arr.getGpuMat().download(m);
return m;
}
return arr.getMat();
}
AssertionResult assertMatNear(const char* expr1, const char* expr2, const char* eps_expr, InputArray m1_, InputArray m2_, double eps)
{
Mat m1 = getMat(m1_);
Mat m2 = getMat(m2_);
if (m1.size() != m2.size())
{
return AssertionFailure() << "Matrices \"" << expr1 << "\" and \"" << expr2 << "\" have different sizes : \""
<< expr1 << "\" [" << PrintToString(m1.size()) << "] vs \""
<< expr2 << "\" [" << PrintToString(m2.size()) << "]";
}
if (m1.type() != m2.type())
{
return AssertionFailure() << "Matrices \"" << expr1 << "\" and \"" << expr2 << "\" have different types : \""
<< expr1 << "\" [" << PrintToString(MatType(m1.type())) << "] vs \""
<< expr2 << "\" [" << PrintToString(MatType(m2.type())) << "]";
}
Mat diff;
absdiff(m1.reshape(1), m2.reshape(1), diff);
double maxVal = 0.0;
Point maxLoc;
minMaxLocGold(diff, 0, &maxVal, 0, &maxLoc);
if (maxVal > eps)
{
return AssertionFailure() << "The max difference between matrices \"" << expr1 << "\" and \"" << expr2
<< "\" is " << maxVal << " at (" << maxLoc.y << ", " << maxLoc.x / m1.channels() << ")"
<< ", which exceeds \"" << eps_expr << "\", where \""
<< expr1 << "\" at (" << maxLoc.y << ", " << maxLoc.x / m1.channels() << ") evaluates to " << printMatVal(m1, maxLoc) << ", \""
<< expr2 << "\" at (" << maxLoc.y << ", " << maxLoc.x / m1.channels() << ") evaluates to " << printMatVal(m2, maxLoc) << ", \""
<< eps_expr << "\" evaluates to " << eps;
}
return AssertionSuccess();
}
double checkSimilarity(InputArray m1, InputArray m2)
{
Mat diff;
matchTemplate(getMat(m1), getMat(m2), diff, CV_TM_CCORR_NORMED);
return std::abs(diff.at<float>(0, 0) - 1.f);
}
//////////////////////////////////////////////////////////////////////
// Helper structs for value-parameterized tests
vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
{
vector<MatType> v;
v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1));
for (int depth = depth_start; depth <= depth_end; ++depth)
{
for (int cn = cn_start; cn <= cn_end; ++cn)
{
v.push_back(MatType(CV_MAKE_TYPE(depth, cn)));
}
}
return v;
}
const vector<MatType>& all_types()
{
static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
return v;
}
void cv::gpu::PrintTo(const DeviceInfo& info, ostream* os)
{
(*os) << info.name();
}
void PrintTo(const UseRoi& useRoi, std::ostream* os)
{
if (useRoi)
(*os) << "sub matrix";
else
(*os) << "whole matrix";
}
void PrintTo(const Inverse& inverse, std::ostream* os)
{
if (inverse)
(*os) << "inverse";
else
(*os) << "direct";
}
//////////////////////////////////////////////////////////////////////
// Other
void dumpImage(const std::string& fileName, const Mat& image)
{
imwrite(TS::ptr()->get_data_path() + fileName, image);
}
void showDiff(InputArray gold_, InputArray actual_, double eps)
{
Mat gold = getMat(gold_);
Mat actual = getMat(actual_);
Mat diff;
absdiff(gold, actual, diff);
threshold(diff, diff, eps, 255.0, cv::THRESH_BINARY);
namedWindow("gold", WINDOW_NORMAL);
namedWindow("actual", WINDOW_NORMAL);
namedWindow("diff", WINDOW_NORMAL);
imshow("gold", gold);
imshow("actual", actual);
imshow("diff", diff);
waitKey();
}
#endif // HAVE_CUDA

@ -1,331 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TEST_UTILITY_HPP__
#define __OPENCV_GPU_TEST_UTILITY_HPP__
#include "opencv2/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/ts.hpp"
//////////////////////////////////////////////////////////////////////
// random generators
int randomInt(int minVal, int maxVal);
double randomDouble(double minVal, double maxVal);
cv::Size randomSize(int minVal, int maxVal);
cv::Scalar randomScalar(double minVal, double maxVal);
cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0);
//////////////////////////////////////////////////////////////////////
// GpuMat create
cv::gpu::GpuMat createMat(cv::Size size, int type, bool useRoi = false);
cv::gpu::GpuMat loadMat(const cv::Mat& m, bool useRoi = false);
//////////////////////////////////////////////////////////////////////
// Image load
//! read image from testdata folder
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
//! read image from testdata folder and convert it to specified type
cv::Mat readImageType(const std::string& fname, int type);
//////////////////////////////////////////////////////////////////////
// Gpu devices
//! return true if device supports specified feature and gpu module was built with support the feature.
bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
class DeviceManager
{
public:
static DeviceManager& instance();
void load(int i);
void loadAll();
const std::vector<cv::gpu::DeviceInfo>& values() const { return devices_; }
private:
std::vector<cv::gpu::DeviceInfo> devices_;
};
#define ALL_DEVICES testing::ValuesIn(DeviceManager::instance().values())
//////////////////////////////////////////////////////////////////////
// Additional assertion
void minMaxLocGold(const cv::Mat& src, double* minVal_, double* maxVal_ = 0, cv::Point* minLoc_ = 0, cv::Point* maxLoc_ = 0, const cv::Mat& mask = cv::Mat());
cv::Mat getMat(cv::InputArray arr);
testing::AssertionResult assertMatNear(const char* expr1, const char* expr2, const char* eps_expr, cv::InputArray m1, cv::InputArray m2, double eps);
#define EXPECT_MAT_NEAR(m1, m2, eps) EXPECT_PRED_FORMAT3(assertMatNear, m1, m2, eps)
#define ASSERT_MAT_NEAR(m1, m2, eps) ASSERT_PRED_FORMAT3(assertMatNear, m1, m2, eps)
#define EXPECT_SCALAR_NEAR(s1, s2, eps) \
{ \
EXPECT_NEAR(s1[0], s2[0], eps); \
EXPECT_NEAR(s1[1], s2[1], eps); \
EXPECT_NEAR(s1[2], s2[2], eps); \
EXPECT_NEAR(s1[3], s2[3], eps); \
}
#define ASSERT_SCALAR_NEAR(s1, s2, eps) \
{ \
ASSERT_NEAR(s1[0], s2[0], eps); \
ASSERT_NEAR(s1[1], s2[1], eps); \
ASSERT_NEAR(s1[2], s2[2], eps); \
ASSERT_NEAR(s1[3], s2[3], eps); \
}
#define EXPECT_POINT2_NEAR(p1, p2, eps) \
{ \
EXPECT_NEAR(p1.x, p2.x, eps); \
EXPECT_NEAR(p1.y, p2.y, eps); \
}
#define ASSERT_POINT2_NEAR(p1, p2, eps) \
{ \
ASSERT_NEAR(p1.x, p2.x, eps); \
ASSERT_NEAR(p1.y, p2.y, eps); \
}
#define EXPECT_POINT3_NEAR(p1, p2, eps) \
{ \
EXPECT_NEAR(p1.x, p2.x, eps); \
EXPECT_NEAR(p1.y, p2.y, eps); \
EXPECT_NEAR(p1.z, p2.z, eps); \
}
#define ASSERT_POINT3_NEAR(p1, p2, eps) \
{ \
ASSERT_NEAR(p1.x, p2.x, eps); \
ASSERT_NEAR(p1.y, p2.y, eps); \
ASSERT_NEAR(p1.z, p2.z, eps); \
}
double checkSimilarity(cv::InputArray m1, cv::InputArray m2);
#define EXPECT_MAT_SIMILAR(mat1, mat2, eps) \
{ \
ASSERT_EQ(mat1.type(), mat2.type()); \
ASSERT_EQ(mat1.size(), mat2.size()); \
EXPECT_LE(checkSimilarity(mat1, mat2), eps); \
}
#define ASSERT_MAT_SIMILAR(mat1, mat2, eps) \
{ \
ASSERT_EQ(mat1.type(), mat2.type()); \
ASSERT_EQ(mat1.size(), mat2.size()); \
ASSERT_LE(checkSimilarity(mat1, mat2), eps); \
}
//////////////////////////////////////////////////////////////////////
// Helper structs for value-parameterized tests
#define GPU_TEST_P(test_case_name, test_name) \
class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
: public test_case_name { \
public: \
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
virtual void TestBody(); \
private: \
void UnsafeTestBody(); \
static int AddToRegistry() { \
::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
GetTestCasePatternHolder<test_case_name>(\
#test_case_name, __FILE__, __LINE__)->AddTestPattern(\
#test_case_name, \
#test_name, \
new ::testing::internal::TestMetaFactory< \
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \
return 0; \
} \
static int gtest_registering_dummy_; \
GTEST_DISALLOW_COPY_AND_ASSIGN_(\
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
}; \
int GTEST_TEST_CLASS_NAME_(test_case_name, \
test_name)::gtest_registering_dummy_ = \
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() \
{ \
try \
{ \
UnsafeTestBody(); \
} \
catch (...) \
{ \
cv::gpu::resetDevice(); \
throw; \
} \
} \
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::UnsafeTestBody()
#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
namespace cv { namespace gpu
{
void PrintTo(const DeviceInfo& info, std::ostream* os);
}}
#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
// Depth
using perf::MatDepth;
#define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F), MatDepth(CV_64F))
#define DEPTH_PAIRS testing::Values(std::make_pair(MatDepth(CV_8U), MatDepth(CV_8U)), \
std::make_pair(MatDepth(CV_8U), MatDepth(CV_16U)), \
std::make_pair(MatDepth(CV_8U), MatDepth(CV_16S)), \
std::make_pair(MatDepth(CV_8U), MatDepth(CV_32S)), \
std::make_pair(MatDepth(CV_8U), MatDepth(CV_32F)), \
std::make_pair(MatDepth(CV_8U), MatDepth(CV_64F)), \
\
std::make_pair(MatDepth(CV_16U), MatDepth(CV_16U)), \
std::make_pair(MatDepth(CV_16U), MatDepth(CV_32S)), \
std::make_pair(MatDepth(CV_16U), MatDepth(CV_32F)), \
std::make_pair(MatDepth(CV_16U), MatDepth(CV_64F)), \
\
std::make_pair(MatDepth(CV_16S), MatDepth(CV_16S)), \
std::make_pair(MatDepth(CV_16S), MatDepth(CV_32S)), \
std::make_pair(MatDepth(CV_16S), MatDepth(CV_32F)), \
std::make_pair(MatDepth(CV_16S), MatDepth(CV_64F)), \
\
std::make_pair(MatDepth(CV_32S), MatDepth(CV_32S)), \
std::make_pair(MatDepth(CV_32S), MatDepth(CV_32F)), \
std::make_pair(MatDepth(CV_32S), MatDepth(CV_64F)), \
\
std::make_pair(MatDepth(CV_32F), MatDepth(CV_32F)), \
std::make_pair(MatDepth(CV_32F), MatDepth(CV_64F)), \
\
std::make_pair(MatDepth(CV_64F), MatDepth(CV_64F)))
// Type
using perf::MatType;
//! return vector with types from specified range.
std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
const std::vector<MatType>& all_types();
#define ALL_TYPES testing::ValuesIn(all_types())
#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
// ROI
class UseRoi
{
public:
inline UseRoi(bool val = false) : val_(val) {}
inline operator bool() const { return val_; }
private:
bool val_;
};
void PrintTo(const UseRoi& useRoi, std::ostream* os);
#define WHOLE_SUBMAT testing::Values(UseRoi(false), UseRoi(true))
// Direct/Inverse
class Inverse
{
public:
inline Inverse(bool val = false) : val_(val) {}
inline operator bool() const { return val_; }
private:
bool val_;
};
void PrintTo(const Inverse& useRoi, std::ostream* os);
#define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true))
// Param class
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
}
IMPLEMENT_PARAM_CLASS(Channels, int)
#define ALL_CHANNELS testing::Values(Channels(1), Channels(2), Channels(3), Channels(4))
#define IMAGE_CHANNELS testing::Values(Channels(1), Channels(3), Channels(4))
// Flags and enums
CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
CV_ENUM(BorderType, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
#define ALL_BORDER_TYPES testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP))
CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP)
//////////////////////////////////////////////////////////////////////
// Other
void dumpImage(const std::string& fileName, const cv::Mat& image);
void showDiff(cv::InputArray gold, cv::InputArray actual, double eps);
#endif // __OPENCV_GPU_TEST_UTILITY_HPP__

@ -7,7 +7,7 @@
// copy or use the software.
//
//
// License Agreement
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.

@ -1,7 +1,7 @@
#/usr/bin/env python
import os, sys, re, string, fnmatch
allmodules = ["core", "flann", "imgproc", "ml", "highgui", "video", "features2d", "calib3d", "objdetect", "legacy", "contrib", "gpu", "androidcamera", "java", "python", "stitching", "ts", "photo", "nonfree", "videostab", "ocl", "softcascade"]
allmodules = ["core", "flann", "imgproc", "ml", "highgui", "video", "features2d", "calib3d", "objdetect", "legacy", "contrib", "gpu", "androidcamera", "java", "python", "stitching", "ts", "photo", "nonfree", "videostab", "ocl", "softcascade", "superres"]
verbose = False
show_warnings = True
show_errors = True
@ -380,7 +380,7 @@ class RstParser(object):
@classmethod
def parse_namespace(cls, func, section_name):
known_namespaces = ["cv", "gpu", "flann"]
known_namespaces = ["cv", "gpu", "flann", "superres"]
l = section_name.strip()
for namespace in known_namespaces:
if l.startswith(namespace + "::"):

@ -15,6 +15,7 @@ import android.content.DialogInterface;
import android.content.res.TypedArray;
import android.graphics.Bitmap;
import android.graphics.Canvas;
import android.graphics.Rect;
import android.util.AttributeSet;
import android.util.Log;
import android.view.SurfaceHolder;
@ -44,6 +45,7 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac
protected int mFrameHeight;
protected int mMaxHeight;
protected int mMaxWidth;
protected float mScale = 0;
protected int mPreviewFormat = Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA;
protected int mCameraIndex = -1;
protected boolean mEnabled;
@ -389,7 +391,22 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac
Canvas canvas = getHolder().lockCanvas();
if (canvas != null) {
canvas.drawColor(0, android.graphics.PorterDuff.Mode.CLEAR);
canvas.drawBitmap(mCacheBitmap, (canvas.getWidth() - mCacheBitmap.getWidth()) / 2, (canvas.getHeight() - mCacheBitmap.getHeight()) / 2, null);
Log.d(TAG, "mStretch value: " + mScale);
if (mScale != 0) {
canvas.drawBitmap(mCacheBitmap, new Rect(0,0,mCacheBitmap.getWidth(), mCacheBitmap.getHeight()),
new Rect((int)((canvas.getWidth() - mScale*mCacheBitmap.getWidth()) / 2),
(int)((canvas.getHeight() - mScale*mCacheBitmap.getHeight()) / 2),
(int)((canvas.getWidth() - mScale*mCacheBitmap.getWidth()) / 2 + mScale*mCacheBitmap.getWidth()),
(int)((canvas.getHeight() - mScale*mCacheBitmap.getHeight()) / 2 + mScale*mCacheBitmap.getHeight())), null);
} else {
canvas.drawBitmap(mCacheBitmap, new Rect(0,0,mCacheBitmap.getWidth(), mCacheBitmap.getHeight()),
new Rect((canvas.getWidth() - mCacheBitmap.getWidth()) / 2,
(canvas.getHeight() - mCacheBitmap.getHeight()) / 2,
(canvas.getWidth() - mCacheBitmap.getWidth()) / 2 + mCacheBitmap.getWidth(),
(canvas.getHeight() - mCacheBitmap.getHeight()) / 2 + mCacheBitmap.getHeight()), null);
}
if (mFpsMeter != null) {
mFpsMeter.measure();
mFpsMeter.draw(canvas, 20, 30);

@ -10,6 +10,7 @@ import android.hardware.Camera.PreviewCallback;
import android.os.Build;
import android.util.AttributeSet;
import android.util.Log;
import android.view.ViewGroup.LayoutParams;
import org.opencv.core.CvType;
import org.opencv.core.Mat;
@ -130,6 +131,11 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb
mFrameWidth = params.getPreviewSize().width;
mFrameHeight = params.getPreviewSize().height;
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
else
mScale = 0;
if (mFpsMeter != null) {
mFpsMeter.setResolution(mFrameWidth, mFrameHeight);
}

@ -8,6 +8,7 @@ import org.opencv.highgui.VideoCapture;
import android.content.Context;
import android.util.AttributeSet;
import android.util.Log;
import android.view.ViewGroup.LayoutParams;
/**
* This class is an implementation of a bridge between SurfaceView and native OpenCV camera.
@ -102,6 +103,11 @@ public class NativeCameraView extends CameraBridgeViewBase {
mFrameWidth = (int)frameSize.width;
mFrameHeight = (int)frameSize.height;
if ((getLayoutParams().width == LayoutParams.MATCH_PARENT) && (getLayoutParams().height == LayoutParams.MATCH_PARENT))
mScale = Math.min(((float)height)/mFrameHeight, ((float)width)/mFrameWidth);
else
mScale = 0;
if (mFpsMeter != null) {
mFpsMeter.setResolution(mFrameWidth, mFrameHeight);
}

@ -537,6 +537,8 @@ protected:
virtual void write_params( CvFileStorage* fs ) const;
virtual void read_params( CvFileStorage* fs, CvFileNode* node );
void optimize_linear_svm();
CvSVMParams params;
CvMat* class_labels;
int var_all;

@ -1566,6 +1566,7 @@ bool CvSVM::do_train( int svm_type, int sample_count, int var_count, const float
}
}
optimize_linear_svm();
ok = true;
__END__;
@ -1573,6 +1574,59 @@ bool CvSVM::do_train( int svm_type, int sample_count, int var_count, const float
return ok;
}
void CvSVM::optimize_linear_svm()
{
// we optimize only linear SVM: compress all the support vectors into one.
if( params.kernel_type != LINEAR )
return;
int class_count = class_labels ? class_labels->cols :
params.svm_type == CvSVM::ONE_CLASS ? 1 : 0;
int i, df_count = class_count > 1 ? class_count*(class_count-1)/2 : 1;
CvSVMDecisionFunc* df = decision_func;
for( i = 0; i < df_count; i++ )
{
int sv_count = df[i].sv_count;
if( sv_count != 1 )
break;
}
// if every decision functions uses a single support vector;
// it's already compressed. skip it then.
if( i == df_count )
return;
int var_count = get_var_count();
int sample_size = (int)(var_count*sizeof(sv[0][0]));
float** new_sv = (float**)cvMemStorageAlloc(storage, df_count*sizeof(new_sv[0]));
for( i = 0; i < df_count; i++ )
{
new_sv[i] = (float*)cvMemStorageAlloc(storage, sample_size);
float* dst = new_sv[i];
memset(dst, 0, sample_size);
int j, k, sv_count = df[i].sv_count;
for( j = 0; j < sv_count; j++ )
{
const float* src = class_count > 1 ? sv[df[i].sv_index[j]] : sv[j];
double a = df[i].alpha[j];
for( k = 0; k < var_count; k++ )
dst[k] = (float)(dst[k] + src[k]*a);
}
df[i].sv_count = 1;
df[i].alpha[0] = 1.;
if( class_count > 1 )
df[i].sv_index[0] = i;
}
sv = new_sv;
sv_total = df_count;
}
bool CvSVM::train( const CvMat* _train_data, const CvMat* _responses,
const CvMat* _var_idx, const CvMat* _sample_idx, CvSVMParams _params )
{
@ -2565,6 +2619,7 @@ void CvSVM::read( CvFileStorage* fs, CvFileNode* svm_node )
CV_NEXT_SEQ_ELEM( df_node->data.seq->elem_size, reader );
}
optimize_linear_svm();
create_kernel();
__END__;

@ -3,4 +3,28 @@ if(BUILD_ANDROID_PACKAGE)
endif()
set(the_description "Functionality with possible limitations on the use")
ocv_define_module(nonfree opencv_imgproc opencv_features2d opencv_calib3d)
ocv_add_module(nonfree opencv_imgproc opencv_features2d opencv_calib3d OPTIONAL opencv_gpu opencv_ocl)
ocv_module_include_directories()
if(HAVE_CUDA AND HAVE_opencv_gpu)
ocv_source_group("Src\\Cuda" GLOB "src/cuda/*.cu")
ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include" ${CUDA_INCLUDE_DIRS})
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
file(GLOB lib_cuda "src/cuda/*.cu")
ocv_cuda_compile(cuda_objs ${lib_cuda})
set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
else()
set(lib_cuda "")
set(cuda_objs "")
set(cuda_link_libs "")
endif()
ocv_glob_module_sources(SOURCES ${lib_cuda} ${cuda_objs})
ocv_create_module(${cuda_link_libs})
ocv_add_precompiled_headers(${the_module})
ocv_add_accuracy_tests()
ocv_add_perf_tests()

@ -0,0 +1,79 @@
Background Subtraction
======================
.. highlight:: cpp
gpu::VIBE_GPU
-------------
.. ocv:class:: gpu::VIBE_GPU
Class used for background/foreground segmentation. ::
class VIBE_GPU
{
public:
explicit VIBE_GPU(unsigned long rngSeed = 1234567);
void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null());
void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null());
void release();
...
};
The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [VIBE2011]_.
gpu::VIBE_GPU::VIBE_GPU
-----------------------
The constructor.
.. ocv:function:: gpu::VIBE_GPU::VIBE_GPU(unsigned long rngSeed = 1234567)
:param rngSeed: Value used to initiate a random sequence.
Default constructor sets all parameters to default values.
gpu::VIBE_GPU::initialize
-------------------------
Initialize background model and allocates all inner buffers.
.. ocv:function:: void gpu::VIBE_GPU::initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null())
:param firstFrame: First frame from video sequence.
:param stream: Stream for the asynchronous version.
gpu::VIBE_GPU::operator()
-------------------------
Updates the background model and returns the foreground mask
.. ocv:function:: void gpu::VIBE_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null())
:param frame: Next video frame.
:param fgmask: The output foreground mask as an 8-bit binary image.
:param stream: Stream for the asynchronous version.
gpu::VIBE_GPU::release
----------------------
Releases all inner buffer's memory.
.. ocv:function:: void gpu::VIBE_GPU::release()
.. [VIBE2011] O. Barnich and M. Van D Roogenbroeck. *ViBe: A universal background subtraction algorithm for video sequences*. IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011

@ -127,3 +127,204 @@ Detects keypoints and computes SURF descriptors for them.
The function is parallelized with the TBB library.
If you are using the C version, make sure you call ``cv::initModule_nonfree()`` from ``nonfree/nonfree.hpp``.
gpu::SURF_GPU
-------------
.. ocv:class:: gpu::SURF_GPU
Class used for extracting Speeded Up Robust Features (SURF) from an image. ::
class SURF_GPU
{
public:
enum KeypointLayout
{
X_ROW = 0,
Y_ROW,
LAPLACIAN_ROW,
OCTAVE_ROW,
SIZE_ROW,
ANGLE_ROW,
HESSIAN_ROW,
ROWS_COUNT
};
//! the default constructor
SURF_GPU();
//! the full constructor taking all the necessary parameters
explicit SURF_GPU(double _hessianThreshold, int _nOctaves=4,
int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f);
//! returns the descriptor size in float's (64 or 128)
int descriptorSize() const;
//! upload host keypoints to device memory
void uploadKeypoints(const vector<KeyPoint>& keypoints,
GpuMat& keypointsGPU);
//! download keypoints from device to host memory
void downloadKeypoints(const GpuMat& keypointsGPU,
vector<KeyPoint>& keypoints);
//! download descriptors from device to host memory
void downloadDescriptors(const GpuMat& descriptorsGPU,
vector<float>& descriptors);
void operator()(const GpuMat& img, const GpuMat& mask,
GpuMat& keypoints);
void operator()(const GpuMat& img, const GpuMat& mask,
GpuMat& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints = false,
bool calcOrientation = true);
void operator()(const GpuMat& img, const GpuMat& mask,
std::vector<KeyPoint>& keypoints);
void operator()(const GpuMat& img, const GpuMat& mask,
std::vector<KeyPoint>& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints = false,
bool calcOrientation = true);
void operator()(const GpuMat& img, const GpuMat& mask,
std::vector<KeyPoint>& keypoints,
std::vector<float>& descriptors,
bool useProvidedKeypoints = false,
bool calcOrientation = true);
void releaseMemory();
// SURF parameters
double hessianThreshold;
int nOctaves;
int nOctaveLayers;
bool extended;
bool upright;
//! max keypoints = keypointsRatio * img.size().area()
float keypointsRatio;
GpuMat sum, mask1, maskSum, intBuffer;
GpuMat det, trace;
GpuMat maxPosBuffer;
};
The class ``SURF_GPU`` implements Speeded Up Robust Features descriptor. There is a fast multi-scale Hessian keypoint detector that can be used to find the keypoints (which is the default option). But the descriptors can also be computed for the user-specified keypoints. Only 8-bit grayscale images are supported.
The class ``SURF_GPU`` can store results in the GPU and CPU memory. It provides functions to convert results between CPU and GPU version ( ``uploadKeypoints``, ``downloadKeypoints``, ``downloadDescriptors`` ). The format of CPU results is the same as ``SURF`` results. GPU results are stored in ``GpuMat``. The ``keypoints`` matrix is :math:`\texttt{nFeatures} \times 7` matrix with the ``CV_32FC1`` type.
* ``keypoints.ptr<float>(X_ROW)[i]`` contains x coordinate of the i-th feature.
* ``keypoints.ptr<float>(Y_ROW)[i]`` contains y coordinate of the i-th feature.
* ``keypoints.ptr<float>(LAPLACIAN_ROW)[i]`` contains the laplacian sign of the i-th feature.
* ``keypoints.ptr<float>(OCTAVE_ROW)[i]`` contains the octave of the i-th feature.
* ``keypoints.ptr<float>(SIZE_ROW)[i]`` contains the size of the i-th feature.
* ``keypoints.ptr<float>(ANGLE_ROW)[i]`` contain orientation of the i-th feature.
* ``keypoints.ptr<float>(HESSIAN_ROW)[i]`` contains the response of the i-th feature.
The ``descriptors`` matrix is :math:`\texttt{nFeatures} \times \texttt{descriptorSize}` matrix with the ``CV_32FC1`` type.
The class ``SURF_GPU`` uses some buffers and provides access to it. All buffers can be safely released between function calls.
.. seealso:: :ocv:class:`SURF`
ocl::SURF_OCL
-------------
.. ocv:class:: ocl::SURF_OCL
Class used for extracting Speeded Up Robust Features (SURF) from an image. ::
class SURF_OCL
{
public:
enum KeypointLayout
{
X_ROW = 0,
Y_ROW,
LAPLACIAN_ROW,
OCTAVE_ROW,
SIZE_ROW,
ANGLE_ROW,
HESSIAN_ROW,
ROWS_COUNT
};
//! the default constructor
SURF_OCL();
//! the full constructor taking all the necessary parameters
explicit SURF_OCL(double _hessianThreshold, int _nOctaves=4,
int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false);
//! returns the descriptor size in float's (64 or 128)
int descriptorSize() const;
//! upload host keypoints to device memory
void uploadKeypoints(const vector<KeyPoint>& keypoints,
oclMat& keypointsocl);
//! download keypoints from device to host memory
void downloadKeypoints(const oclMat& keypointsocl,
vector<KeyPoint>& keypoints);
//! download descriptors from device to host memory
void downloadDescriptors(const oclMat& descriptorsocl,
vector<float>& descriptors);
void operator()(const oclMat& img, const oclMat& mask,
oclMat& keypoints);
void operator()(const oclMat& img, const oclMat& mask,
oclMat& keypoints, oclMat& descriptors,
bool useProvidedKeypoints = false);
void operator()(const oclMat& img, const oclMat& mask,
std::vector<KeyPoint>& keypoints);
void operator()(const oclMat& img, const oclMat& mask,
std::vector<KeyPoint>& keypoints, oclMat& descriptors,
bool useProvidedKeypoints = false);
void operator()(const oclMat& img, const oclMat& mask,
std::vector<KeyPoint>& keypoints,
std::vector<float>& descriptors,
bool useProvidedKeypoints = false);
void releaseMemory();
// SURF parameters
double hessianThreshold;
int nOctaves;
int nOctaveLayers;
bool extended;
bool upright;
//! max keypoints = min(keypointsRatio * img.size().area(), 65535)
float keypointsRatio;
oclMat sum, mask1, maskSum, intBuffer;
oclMat det, trace;
oclMat maxPosBuffer;
};
The class ``SURF_OCL`` implements Speeded Up Robust Features descriptor. There is a fast multi-scale Hessian keypoint detector that can be used to find the keypoints (which is the default option). But the descriptors can also be computed for the user-specified keypoints. Only 8-bit grayscale images are supported.
The class ``SURF_OCL`` can store results in the GPU and CPU memory. It provides functions to convert results between CPU and GPU version ( ``uploadKeypoints``, ``downloadKeypoints``, ``downloadDescriptors`` ). The format of CPU results is the same as ``SURF`` results. GPU results are stored in ``oclMat``. The ``keypoints`` matrix is :math:`\texttt{nFeatures} \times 7` matrix with the ``CV_32FC1`` type.
* ``keypoints.ptr<float>(X_ROW)[i]`` contains x coordinate of the i-th feature.
* ``keypoints.ptr<float>(Y_ROW)[i]`` contains y coordinate of the i-th feature.
* ``keypoints.ptr<float>(LAPLACIAN_ROW)[i]`` contains the laplacian sign of the i-th feature.
* ``keypoints.ptr<float>(OCTAVE_ROW)[i]`` contains the octave of the i-th feature.
* ``keypoints.ptr<float>(SIZE_ROW)[i]`` contains the size of the i-th feature.
* ``keypoints.ptr<float>(ANGLE_ROW)[i]`` contain orientation of the i-th feature.
* ``keypoints.ptr<float>(HESSIAN_ROW)[i]`` contains the response of the i-th feature.
The ``descriptors`` matrix is :math:`\texttt{nFeatures} \times \texttt{descriptorSize}` matrix with the ``CV_32FC1`` type.
The class ``SURF_OCL`` uses some buffers and provides access to it. All buffers can be safely released between function calls.
.. seealso:: :ocv:class:`SURF`

@ -8,3 +8,4 @@ The module contains algorithms that may be patented in some countries or have so
:maxdepth: 2
feature_detection
background_subtraction

@ -0,0 +1,169 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_NONFREE_GPU_HPP__
#define __OPENCV_NONFREE_GPU_HPP__
#include "opencv2/opencv_modules.hpp"
#if defined(HAVE_OPENCV_GPU)
#include "opencv2/gpu.hpp"
namespace cv { namespace gpu {
class CV_EXPORTS SURF_GPU
{
public:
enum KeypointLayout
{
X_ROW = 0,
Y_ROW,
LAPLACIAN_ROW,
OCTAVE_ROW,
SIZE_ROW,
ANGLE_ROW,
HESSIAN_ROW,
ROWS_COUNT
};
//! the default constructor
SURF_GPU();
//! the full constructor taking all the necessary parameters
explicit SURF_GPU(double _hessianThreshold, int _nOctaves=4,
int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false);
//! returns the descriptor size in float's (64 or 128)
int descriptorSize() const;
//! upload host keypoints to device memory
void uploadKeypoints(const std::vector<KeyPoint>& keypoints, GpuMat& keypointsGPU);
//! download keypoints from device to host memory
void downloadKeypoints(const GpuMat& keypointsGPU, std::vector<KeyPoint>& keypoints);
//! download descriptors from device to host memory
void downloadDescriptors(const GpuMat& descriptorsGPU, std::vector<float>& descriptors);
//! finds the keypoints using fast hessian detector used in SURF
//! supports CV_8UC1 images
//! keypoints will have nFeature cols and 6 rows
//! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
//! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
//! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
//! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
//! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
//! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
//! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints);
//! finds the keypoints and computes their descriptors.
//! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints = false);
void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints = false);
void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, std::vector<float>& descriptors,
bool useProvidedKeypoints = false);
void releaseMemory();
// SURF parameters
double hessianThreshold;
int nOctaves;
int nOctaveLayers;
bool extended;
bool upright;
//! max keypoints = min(keypointsRatio * img.size().area(), 65535)
float keypointsRatio;
GpuMat sum, mask1, maskSum, intBuffer;
GpuMat det, trace;
GpuMat maxPosBuffer;
};
/*!
* The class implements the following algorithm:
* "ViBe: A universal background subtraction algorithm for video sequences"
* O. Barnich and M. Van D Roogenbroeck
* IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011
*/
class CV_EXPORTS VIBE_GPU
{
public:
//! the default constructor
explicit VIBE_GPU(unsigned long rngSeed = 1234567);
//! re-initiaization method
void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null());
//! the update operator
void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null());
//! releases all inner buffers
void release();
int nbSamples; // number of samples per pixel
int reqMatches; // #_min
int radius; // R
int subsamplingFactor; // amount of random subsampling
private:
Size frameSize_;
unsigned long rngSeed_;
GpuMat randStates_;
GpuMat samples_;
};
} // namespace gpu
} // namespace cv
#endif // defined(HAVE_OPENCV_GPU)
#endif // __OPENCV_NONFREE_GPU_HPP__

@ -0,0 +1,124 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_NONFREE_OCL_HPP__
#define __OPENCV_NONFREE_OCL_HPP__
#include "opencv2/ocl.hpp"
namespace cv
{
namespace ocl
{
//! Speeded up robust features, port from GPU module.
////////////////////////////////// SURF //////////////////////////////////////////
class CV_EXPORTS SURF_OCL
{
public:
enum KeypointLayout
{
X_ROW = 0,
Y_ROW,
LAPLACIAN_ROW,
OCTAVE_ROW,
SIZE_ROW,
ANGLE_ROW,
HESSIAN_ROW,
ROWS_COUNT
};
//! the default constructor
SURF_OCL();
//! the full constructor taking all the necessary parameters
explicit SURF_OCL(double _hessianThreshold, int _nOctaves = 4,
int _nOctaveLayers = 2, bool _extended = false, float _keypointsRatio = 0.01f, bool _upright = false);
//! returns the descriptor size in float's (64 or 128)
int descriptorSize() const;
//! upload host keypoints to device memory
void uploadKeypoints(const std::vector<cv::KeyPoint> &keypoints, oclMat &keypointsocl);
//! download keypoints from device to host memory
void downloadKeypoints(const oclMat &keypointsocl, std::vector<KeyPoint> &keypoints);
//! download descriptors from device to host memory
void downloadDescriptors(const oclMat &descriptorsocl, std::vector<float> &descriptors);
//! finds the keypoints using fast hessian detector used in SURF
//! supports CV_8UC1 images
//! keypoints will have nFeature cols and 6 rows
//! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
//! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
//! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
//! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
//! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
//! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
//! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
void operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints);
//! finds the keypoints and computes their descriptors.
//! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
void operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints, oclMat &descriptors,
bool useProvidedKeypoints = false);
void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints);
void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints, oclMat &descriptors,
bool useProvidedKeypoints = false);
void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints, std::vector<float> &descriptors,
bool useProvidedKeypoints = false);
void releaseMemory();
// SURF parameters
float hessianThreshold;
int nOctaves;
int nOctaveLayers;
bool extended;
bool upright;
//! max keypoints = min(keypointsRatio * img.size().area(), 65535)
float keypointsRatio;
oclMat sum, mask1, maskSum, intBuffer;
oclMat det, trace;
oclMat maxPosBuffer;
};
}
}
#endif //__OPENCV_NONFREE_OCL_HPP__

@ -0,0 +1,138 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
using namespace perf;
#if defined(HAVE_XINE) || \
defined(HAVE_GSTREAMER) || \
defined(HAVE_QUICKTIME) || \
defined(HAVE_AVFOUNDATION) || \
defined(HAVE_FFMPEG) || \
defined(WIN32) /* assume that we have ffmpeg */
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
#else
# define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
#endif
#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA)
//////////////////////////////////////////////////////////////////////
// SURF
DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, GPU_SURF,
Values<string>("gpu/perf/aloe.png"))
{
declare.time(50.0);
const cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::SURF_GPU d_surf;
const cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints, d_descriptors;
TEST_CYCLE() d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
std::vector<cv::KeyPoint> gpu_keypoints;
d_surf.downloadKeypoints(d_keypoints, gpu_keypoints);
cv::Mat gpu_descriptors(d_descriptors);
sortKeyPoints(gpu_keypoints, gpu_descriptors);
SANITY_CHECK_KEYPOINTS(gpu_keypoints);
SANITY_CHECK(gpu_descriptors, 1e-3);
}
else
{
cv::SURF surf;
std::vector<cv::KeyPoint> cpu_keypoints;
cv::Mat cpu_descriptors;
TEST_CYCLE() surf(img, cv::noArray(), cpu_keypoints, cpu_descriptors);
SANITY_CHECK_KEYPOINTS(cpu_keypoints);
SANITY_CHECK(cpu_descriptors);
}
}
//////////////////////////////////////////////////////
// VIBE
#if BUILD_WITH_VIDEO_INPUT_SUPPORT
DEF_PARAM_TEST(Video_Cn, string, int);
PERF_TEST_P(Video_Cn, GPU_VIBE,
Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
GPU_CHANNELS_1_3_4))
{
const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
const int cn = GET_PARAM(1);
cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened());
cv::Mat frame;
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_frame(frame);
cv::gpu::VIBE_GPU vibe;
cv::gpu::GpuMat foreground;
vibe(d_frame, foreground);
for (int i = 0; i < 10; ++i)
{
cap >> frame;
ASSERT_FALSE(frame.empty());
if (cn != 3)
{
cv::Mat temp;
if (cn == 1)
cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
else
cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
cv::swap(temp, frame);
}
d_frame.upload(frame);
startTimer(); next();
vibe(d_frame, foreground);
stopTimer();
}
GPU_SANITY_CHECK(foreground);
}
else
{
FAIL_NO_CPU();
}
}
#endif
#endif

@ -1,3 +1,4 @@
#include "perf_precomp.hpp"
#include "opencv2/ts/gpu_perf.hpp"
CV_PERF_TEST_MAIN(nonfree)
CV_PERF_TEST_MAIN(nonfree, perf::printCudaInfo())

@ -13,6 +13,15 @@
#include "opencv2/nonfree.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_OCL
# include "opencv2/nonfree/ocl.hpp"
#endif
#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA)
#include "opencv2/nonfree/gpu.hpp"
#endif
#ifdef GTEST_CREATE_SHARED_LIBRARY
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
#endif

@ -43,61 +43,69 @@
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#include "perf_precomp.hpp"
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENCV_OCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
#define FILTER_IMAGE "../../../samples/gpu/road.png"
typedef perf::TestBaseWithParam<std::string> OCL_SURF;
TEST(SURF, Performance)
#define SURF_IMAGES \
"cv/detectors_descriptors_evaluation/images_datasets/leuven/img1.png",\
"stitching/a3.png"
PERF_TEST_P(OCL_SURF, DISABLED_with_data_transfer, testing::Values(SURF_IMAGES))
{
cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);
string filename = getDataPath(GetParam());
Mat img = imread(filename, IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
ocl::SURF_OCL d_surf;
ocl::oclMat d_keypoints;
ocl::oclMat d_descriptors;
SURF_OCL d_surf;
oclMat d_keypoints;
oclMat d_descriptors;
Mat cpu_kp;
Mat cpu_dp;
double totalgputick = 0;
double totalgputick_kernel = 0;
declare.time(60);
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
TEST_CYCLE()
{
t1 = (double)cvGetTickCount();//gpu start1
ocl::oclMat d_src(img);//upload
t2 = (double)cvGetTickCount(); //kernel
d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors);
t2 = (double)cvGetTickCount() - t2;//kernel
oclMat d_src(img);
cv::Mat cpu_kp, cpu_dp;
d_keypoints.download (cpu_kp);//download
d_descriptors.download (cpu_dp);//download
d_surf(d_src, oclMat(), d_keypoints, d_descriptors);
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
d_keypoints.download(cpu_kp);
d_descriptors.download(cpu_dp);
}
totalgputick = t1 + totalgputick;
SANITY_CHECK(cpu_kp, 1);
SANITY_CHECK(cpu_dp, 1);
}
totalgputick_kernel = t2 + totalgputick_kernel;
PERF_TEST_P(OCL_SURF, DISABLED_without_data_transfer, testing::Values(SURF_IMAGES))
{
string filename = getDataPath(GetParam());
Mat img = imread(filename, IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
}
SURF_OCL d_surf;
oclMat d_keypoints;
oclMat d_descriptors;
oclMat d_src(img);
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
declare.time(60);
TEST_CYCLE() d_surf(d_src, oclMat(), d_keypoints, d_descriptors);
Mat cpu_kp;
Mat cpu_dp;
d_keypoints.download(cpu_kp);
d_descriptors.download(cpu_dp);
SANITY_CHECK(cpu_kp, 1);
SANITY_CHECK(cpu_dp, 1);
}
#endif //Have opencl
#endif // HAVE_OPENCV_OCL

@ -55,6 +55,33 @@
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/filters.hpp"
namespace cv { namespace gpu { namespace device
{
namespace surf
{
void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
void bindImgTex(PtrStepSzb img);
size_t bindSumTex(PtrStepSz<unsigned int> sum);
size_t bindMaskSumTex(PtrStepSz<unsigned int> maskSum);
void icvCalcLayerDetAndTrace_gpu(const PtrStepf& det, const PtrStepf& trace, int img_rows, int img_cols,
int octave, int nOctaveLayer);
void icvFindMaximaInLayer_gpu(const PtrStepf& det, const PtrStepf& trace, int4* maxPosBuffer, unsigned int* maxCounter,
int img_rows, int img_cols, int octave, bool use_mask, int nLayers);
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian,
unsigned int* featureCounter);
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures);
void compute_descriptors_gpu(PtrStepSz<float4> descriptors, const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures);
}
}}}
namespace cv { namespace gpu { namespace device
{
namespace surf
@ -717,6 +744,9 @@ namespace cv { namespace gpu { namespace device
int height;
};
__device__ void calc_dx_dy(const float* featureX, const float* featureY, const float* featureSize, const float* featureDir,
float& dx, float& dy);
__device__ void calc_dx_dy(const float* featureX, const float* featureY, const float* featureSize, const float* featureDir,
float& dx, float& dy)
{

@ -44,6 +44,18 @@
#include "opencv2/gpu/device/common.hpp"
namespace cv { namespace gpu { namespace device
{
namespace vibe
{
void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor);
void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
}
}}}
namespace cv { namespace gpu { namespace device
{
namespace vibe
@ -255,4 +267,4 @@ namespace cv { namespace gpu { namespace device
}}}
#endif /* CUDA_DISABLER */
#endif /* CUDA_DISABLER */

@ -78,7 +78,12 @@ uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int col
// dynamically change the precision used for floating type
#if defined DOUBLE_SUPPORT
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#define F double
#else
#define F float
@ -744,13 +749,19 @@ void reduce_32_sum(volatile __local float * data, volatile float* partial_reduc
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 16]);
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 8 ]);
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 4 ]);
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 2 ]);
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1)
data[tid] = *partial_reduction = op(partial_reduction, data[tid + 1 ]);
}
#undef op
}
@ -892,9 +903,9 @@ __kernel
kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F;
//kp_dir = 360.0f - kp_dir;
//if (fabs(kp_dir - 360.f) < FLT_EPSILON)
// kp_dir = 0.f;
kp_dir = 360.0f - kp_dir;
if (fabs(kp_dir - 360.f) < FLT_EPSILON)
kp_dir = 0.f;
featureDir[get_group_id(0)] = kp_dir;
}
@ -913,7 +924,7 @@ __kernel
if(get_global_id(0) <= nFeatures)
{
featureDir[get_global_id(0)] = 90.0f;
featureDir[get_global_id(0)] = 270.0f;
}
}
@ -1011,7 +1022,12 @@ void calc_dx_dy(
const float centerX = featureX[get_group_id(0)];
const float centerY = featureY[get_group_id(0)];
const float size = featureSize[get_group_id(0)];
float descriptor_dir = featureDir[get_group_id(0)] * (float)(CV_PI_F / 180.0f);
float descriptor_dir = 360.0f - featureDir[get_group_id(0)];
if(fabs(descriptor_dir - 360.0f) < FLT_EPSILON)
{
descriptor_dir = 0.0f;
}
descriptor_dir *= (float)(CV_PI_F / 180.0f);
/* The sampling intervals and wavelet sized for selecting an orientation
and building the keypoint descriptor are defined relative to 's' */

@ -53,4 +53,24 @@
#include "opencv2/core/utility.hpp"
#include "opencv2/core/internal.hpp"
#include "opencv2/opencv_modules.hpp"
#if defined(HAVE_OPENCV_GPU)
#include "opencv2/nonfree/gpu.hpp"
#if defined(HAVE_CUDA)
#include "opencv2/gpu/stream_accessor.hpp"
#include "opencv2/gpu/device/common.hpp"
static inline void throw_nogpu() { CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); }
#else
static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The library is compiled without GPU support"); }
#endif
#endif
#ifdef HAVE_OPENCV_OCL
# include "opencv2/nonfree/ocl.hpp"
# include "opencv2/ocl/private/util.hpp"
#endif
#endif

@ -42,9 +42,9 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iomanip>
#include "precomp.hpp"
#include "mcwutil.hpp"
#ifdef HAVE_OPENCV_OCL
using namespace cv;
using namespace cv::ocl;
@ -54,7 +54,7 @@ namespace cv
namespace ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *nonfree_surf;
extern const char *surf;
const char* noImage2dOption = "-D DISABLE_IMAGE2D";
@ -74,10 +74,11 @@ namespace cv
}
static inline int divUp(int total, int grain)
static inline size_t divUp(size_t total, size_t grain)
{
return (total + grain - 1) / grain;
}
static inline int calcSize(int octave, int layer)
{
/* Wavelet size at first layer of first octave. */
@ -150,7 +151,7 @@ public:
integral(img, surf_.sum);
if(support_image2d())
{
bindImgTex(img, imgTex);
bindImgTex(img, imgTex);
bindImgTex(surf_.sum, sumTex);
}
@ -158,7 +159,7 @@ public:
if (use_mask)
{
throw std::exception();
CV_Error(CV_StsBadFunc, "Masked SURF detector is not implemented yet");
//!FIXME
// temp fix for missing min overload
//oclMat temp(mask.size(), mask.type());
@ -508,11 +509,11 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i
divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2),
1
};
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols)
int octave, bool useMask, int nLayers, int layer_rows, int layer_cols)
{
const int min_margin = ((calcSize(octave, 2) >> 1) >> octave) + 1;
@ -536,7 +537,7 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat
args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxCandidates));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&surf_.hessianThreshold));
if(use_mask)
if(useMask)
{
if(maskSumTex)
{
@ -554,11 +555,11 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat
1
};
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter,
oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures)
oclMat &keypoints, oclMat &counters_, int octave, int layer_rows, int max_features)
{
Context *clCxt = det.clCxt;
std::string kernelName = "icvInterpolateKeypoint";
@ -567,19 +568,19 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMa
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&det.data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counters.data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counters_.data));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&det.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxFeatures));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&max_features));
size_t localThreads[3] = {3, 3, 3};
size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1};
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures)
@ -606,7 +607,7 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
size_t localThreads[3] = {32, 4, 1};
size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures)
@ -621,9 +622,9 @@ void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures)
args.push_back( std::make_pair( sizeof(cl_int), (void *)&nFeatures));
size_t localThreads[3] = {256, 1, 1};
size_t globalThreads[3] = {nFeatures, 1, 1};
size_t globalThreads[3] = {saturate_cast<size_t>(nFeatures), 1, 1};
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
@ -631,7 +632,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
{
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
Context *clCxt = descriptors.clCxt;
std::string kernelName = "";
std::string kernelName;
std::vector< std::pair<size_t, const void *> > args;
size_t localThreads[3] = {1, 1, 1};
size_t globalThreads[3] = {1, 1, 1};
@ -663,7 +664,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
kernelName = "normalize_descriptors64";
@ -677,7 +678,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
else
{
@ -706,7 +707,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
kernelName = "normalize_descriptors128";
@ -720,7 +721,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
}
#endif //HAVE_OPENCV_OCL

@ -42,10 +42,12 @@
#include "precomp.hpp"
#if defined(HAVE_OPENCV_GPU)
using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
#if !defined (HAVE_CUDA)
cv::gpu::SURF_GPU::SURF_GPU() { throw_nogpu(); }
cv::gpu::SURF_GPU::SURF_GPU(double, int, int, bool, float, bool) { throw_nogpu(); }
@ -60,7 +62,7 @@ void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<Key
void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, std::vector<float>&, bool) { throw_nogpu(); }
void cv::gpu::SURF_GPU::releaseMemory() { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */
#else // !defined (HAVE_CUDA)
namespace cv { namespace gpu { namespace device
{
@ -415,4 +417,6 @@ void cv::gpu::SURF_GPU::releaseMemory()
maxPosBuffer.release();
}
#endif /* !defined (HAVE_CUDA) */
#endif // !defined (HAVE_CUDA)
#endif // defined(HAVE_OPENCV_GPU)

@ -42,6 +42,8 @@
#include "precomp.hpp"
#if defined(HAVE_OPENCV_GPU)
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
cv::gpu::VIBE_GPU::VIBE_GPU(unsigned long) { throw_nogpu(); }
@ -135,3 +137,5 @@ void cv::gpu::VIBE_GPU::release()
}
#endif
#endif // defined(HAVE_OPENCV_GPU)

@ -0,0 +1,285 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA)
using namespace cvtest;
/////////////////////////////////////////////////////////////////////////////////////////////////
// SURF
namespace
{
IMPLEMENT_PARAM_CLASS(SURF_HessianThreshold, double)
IMPLEMENT_PARAM_CLASS(SURF_Octaves, int)
IMPLEMENT_PARAM_CLASS(SURF_OctaveLayers, int)
IMPLEMENT_PARAM_CLASS(SURF_Extended, bool)
IMPLEMENT_PARAM_CLASS(SURF_Upright, bool)
}
PARAM_TEST_CASE(SURF, cv::gpu::DeviceInfo, SURF_HessianThreshold, SURF_Octaves, SURF_OctaveLayers, SURF_Extended, SURF_Upright)
{
cv::gpu::DeviceInfo devInfo;
double hessianThreshold;
int nOctaves;
int nOctaveLayers;
bool extended;
bool upright;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
hessianThreshold = GET_PARAM(1);
nOctaves = GET_PARAM(2);
nOctaveLayers = GET_PARAM(3);
extended = GET_PARAM(4);
upright = GET_PARAM(5);
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(SURF, Detector)
{
cv::Mat image = readImage("../gpu/features2d/aloe.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
cv::gpu::SURF_GPU surf;
surf.hessianThreshold = hessianThreshold;
surf.nOctaves = nOctaves;
surf.nOctaveLayers = nOctaveLayers;
surf.extended = extended;
surf.upright = upright;
surf.keypointsRatio = 0.05f;
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, cv::noArray(), keypoints_gold);
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
EXPECT_GT(matchedRatio, 0.95);
}
}
GPU_TEST_P(SURF, Detector_Masked)
{
cv::Mat image = readImage("../gpu/features2d/aloe.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1));
mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
cv::gpu::SURF_GPU surf;
surf.hessianThreshold = hessianThreshold;
surf.nOctaves = nOctaves;
surf.nOctaveLayers = nOctaveLayers;
surf.extended = extended;
surf.upright = upright;
surf.keypointsRatio = 0.05f;
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), loadMat(mask), keypoints);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), loadMat(mask), keypoints);
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, mask, keypoints_gold);
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
EXPECT_GT(matchedRatio, 0.95);
}
}
GPU_TEST_P(SURF, Descriptor)
{
cv::Mat image = readImage("../gpu/features2d/aloe.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
cv::gpu::SURF_GPU surf;
surf.hessianThreshold = hessianThreshold;
surf.nOctaves = nOctaves;
surf.nOctaveLayers = nOctaveLayers;
surf.extended = extended;
surf.upright = upright;
surf.keypointsRatio = 0.05f;
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
cv::gpu::GpuMat descriptors;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
surf_gold(image, cv::noArray(), keypoints);
cv::gpu::GpuMat descriptors;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true);
cv::Mat descriptors_gold;
surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
cv::BFMatcher matcher(cv::NORM_L2);
std::vector<cv::DMatch> matches;
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches);
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
EXPECT_GT(matchedRatio, 0.6);
}
}
INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine(
ALL_DEVICES,
testing::Values(SURF_HessianThreshold(100.0), SURF_HessianThreshold(500.0), SURF_HessianThreshold(1000.0)),
testing::Values(SURF_Octaves(3), SURF_Octaves(4)),
testing::Values(SURF_OctaveLayers(2), SURF_OctaveLayers(3)),
testing::Values(SURF_Extended(false), SURF_Extended(true)),
testing::Values(SURF_Upright(false), SURF_Upright(true))));
//////////////////////////////////////////////////////
// VIBE
PARAM_TEST_CASE(VIBE, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
{
};
GPU_TEST_P(VIBE, Accuracy)
{
const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
const cv::Size size = GET_PARAM(1);
const int type = GET_PARAM(2);
const bool useRoi = GET_PARAM(3);
const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255));
cv::Mat frame = randomMat(size, type, 0.0, 100);
cv::gpu::GpuMat d_frame = loadMat(frame, useRoi);
cv::gpu::VIBE_GPU vibe;
cv::gpu::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi);
vibe.initialize(d_frame);
for (int i = 0; i < 20; ++i)
vibe(d_frame, d_fgmask);
frame = randomMat(size, type, 160, 255);
d_frame = loadMat(frame, useRoi);
vibe(d_frame, d_fgmask);
// now fgmask should be entirely foreground
ASSERT_MAT_NEAR(fullfg, d_fgmask, 0);
}
INSTANTIATE_TEST_CASE_P(GPU_Video, VIBE, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4)),
WHOLE_SUBMAT));
#endif

@ -1,3 +1,73 @@
#include "test_precomp.hpp"
#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA)
using namespace cv;
using namespace cv::gpu;
using namespace cvtest;
using namespace testing;
int main(int argc, char **argv)
{
try
{
const char* keys =
"{ h | help ? | false | Print help}"
"{ i | info | false | Print information about system and exit }"
"{ d | device | -1 | Device on which tests will be executed (-1 means all devices) }"
;
CommandLineParser cmd(argc, (const char**)argv, keys);
if (cmd.get<bool>("help"))
{
cmd.printParams();
return 0;
}
printCudaInfo();
if (cmd.get<bool>("info"))
{
return 0;
}
int device = cmd.get<int>("device");
if (device < 0)
{
DeviceManager::instance().loadAll();
std::cout << "Run tests on all supported devices \n" << std::endl;
}
else
{
DeviceManager::instance().load(device);
DeviceInfo info(device);
std::cout << "Run tests on device " << device << " [" << info.name() << "] \n" << std::endl;
}
TS::ptr()->init("cv");
InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
catch (const std::exception& e)
{
std::cerr << e.what() << std::endl;
return -1;
}
catch (...)
{
std::cerr << "Unknown error" << std::endl;
return -1;
}
return 0;
}
#else // HAVE_CUDA
CV_TEST_MAIN("cv")
#endif // HAVE_CUDA

@ -15,4 +15,14 @@
#include "opencv2/highgui.hpp"
#include "opencv2/nonfree.hpp"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_OCL
# include "opencv2/nonfree/ocl.hpp"
#endif
#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA)
#include "opencv2/ts/gpu_test.hpp"
#include "opencv2/nonfree/gpu.hpp"
#endif
#endif

@ -0,0 +1,226 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#ifdef HAVE_OPENCV_OCL
using namespace std;
using std::tr1::get;
static bool keyPointsEquals(const cv::KeyPoint& p1, const cv::KeyPoint& p2)
{
const double maxPtDif = 0.1;
const double maxSizeDif = 0.1;
const double maxAngleDif = 0.1;
const double maxResponseDif = 0.01;
double dist = cv::norm(p1.pt - p2.pt);
if (dist < maxPtDif &&
fabs(p1.size - p2.size) < maxSizeDif &&
abs(p1.angle - p2.angle) < maxAngleDif &&
abs(p1.response - p2.response) < maxResponseDif &&
p1.octave == p2.octave &&
p1.class_id == p2.class_id)
{
return true;
}
return false;
}
static int getMatchedPointsCount(std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual)
{
std::sort(actual.begin(), actual.end(), perf::comparators::KeypointGreater());
std::sort(gold.begin(), gold.end(), perf::comparators::KeypointGreater());
int validCount = 0;
for (size_t i = 0; i < gold.size(); ++i)
{
const cv::KeyPoint& p1 = gold[i];
const cv::KeyPoint& p2 = actual[i];
if (keyPointsEquals(p1, p2))
++validCount;
}
return validCount;
}
static int getMatchedPointsCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches)
{
int validCount = 0;
for (size_t i = 0; i < matches.size(); ++i)
{
const cv::DMatch& m = matches[i];
const cv::KeyPoint& p1 = keypoints1[m.queryIdx];
const cv::KeyPoint& p2 = keypoints2[m.trainIdx];
if (keyPointsEquals(p1, p2))
++validCount;
}
return validCount;
}
#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
#define IMPLEMENT_PARAM_CLASS(name, type) \
namespace { class name { \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) {*os << #name << "=" << testing::PrintToString(static_cast< type >(param));}}
IMPLEMENT_PARAM_CLASS(HessianThreshold, double)
IMPLEMENT_PARAM_CLASS(Octaves, int)
IMPLEMENT_PARAM_CLASS(OctaveLayers, int)
IMPLEMENT_PARAM_CLASS(Extended, bool)
IMPLEMENT_PARAM_CLASS(Upright, bool)
PARAM_TEST_CASE(SURF, HessianThreshold, Octaves, OctaveLayers, Extended, Upright)
{
double hessianThreshold;
int nOctaves;
int nOctaveLayers;
bool extended;
bool upright;
virtual void SetUp()
{
hessianThreshold = get<0>(GetParam());
nOctaves = get<1>(GetParam());
nOctaveLayers = get<2>(GetParam());
extended = get<3>(GetParam());
upright = get<4>(GetParam());
}
};
TEST_P(SURF, DISABLED_Detector)
{
cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "shared/fruits.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
cv::ocl::SURF_OCL surf;
surf.hessianThreshold = static_cast<float>(hessianThreshold);
surf.nOctaves = nOctaves;
surf.nOctaveLayers = nOctaveLayers;
surf.extended = extended;
surf.upright = upright;
surf.keypointsRatio = 0.05f;
std::vector<cv::KeyPoint> keypoints;
surf(cv::ocl::oclMat(image), cv::ocl::oclMat(), keypoints);
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, cv::noArray(), keypoints_gold);
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
EXPECT_GT(matchedRatio, 0.99);
}
TEST_P(SURF, DISABLED_Descriptor)
{
cv::Mat image = cv::imread(string(cvtest::TS::ptr()->get_data_path()) + "shared/fruits.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
cv::ocl::SURF_OCL surf;
surf.hessianThreshold = static_cast<float>(hessianThreshold);
surf.nOctaves = nOctaves;
surf.nOctaveLayers = nOctaveLayers;
surf.extended = extended;
surf.upright = upright;
surf.keypointsRatio = 0.05f;
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
std::vector<cv::KeyPoint> keypoints;
surf_gold(image, cv::noArray(), keypoints);
cv::ocl::oclMat descriptors;
surf(cv::ocl::oclMat(image), cv::ocl::oclMat(), keypoints, descriptors, true);
cv::Mat descriptors_gold;
surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
cv::BFMatcher matcher(cv::NORM_L2);
std::vector<cv::DMatch> matches;
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches);
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
EXPECT_GT(matchedRatio, 0.35);
}
INSTANTIATE_TEST_CASE_P(OCL_Features2D, SURF, testing::Combine(
testing::Values(HessianThreshold(500.0), HessianThreshold(1000.0)),
testing::Values(Octaves(3), Octaves(4)),
testing::Values(OctaveLayers(2), OctaveLayers(3)),
testing::Values(Extended(false), Extended(true)),
testing::Values(Upright(false), Upright(true))));
#endif // HAVE_OPENCV_OCL

@ -1,69 +1,7 @@
# Will be modified later
if(NOT HAVE_OPENCL)
ocv_module_disable(ocl)
endif()
set(the_description "OpenCL-accelerated Computer Vision")
ocv_add_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video)
ocv_module_include_directories()
file(GLOB CL_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/*.cl")
set(kernels_cpp "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
set(cl2cpp_script "${CMAKE_CURRENT_SOURCE_DIR}/cl2cpp.cmake")
add_custom_command(
OUTPUT ${kernels_cpp}
COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/kernels" -DOUTPUT="${kernels_cpp}" -P ${cl2cpp_script}
DEPENDS ${CL_FILES} ${cl2cpp_script})
file(GLOB lib_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
file(GLOB lib_srcs "src/*.cpp")
file(GLOB lib_int_hdrs "src/*.h*")
source_group("Include" FILES ${lib_hdrs})
source_group("Src\\Host" FILES ${lib_srcs} ${lib_int_hdrs} ${kernels_cpp})
if (HAVE_OPENCL)
set(ocl_link_libs ${OPENCL_LIBRARIES})
if(OPENCL_INCLUDE_DIR)
ocv_include_directories(${OPENCL_INCLUDE_DIR})
endif()
if (HAVE_CLAMDFFT)
set(ocl_link_libs ${ocl_link_libs} ${CLAMDFFT_LIBRARIES})
ocv_include_directories(${CLAMDFFT_INCLUDE_DIR})
endif()
if (HAVE_CLAMDBLAS)
set(ocl_link_libs ${ocl_link_libs} ${CLAMDBLAS_LIBRARIES})
ocv_include_directories(${CLAMDBLAS_INCLUDE_DIR})
endif()
endif()
ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
ocv_set_module_sources(HEADERS ${lib_hdrs} SOURCES ${lib_int_hdrs} ${lib_srcs} ${kernels_cpp})
ocv_create_module(${ocl_link_libs})
install(FILES ${lib_hdrs}
DESTINATION include/opencv2/${name}
COMPONENT main)
ocv_add_precompiled_headers(${the_module})
################################################################################################################
################################ OpenCL Module Tests ##################################################
################################################################################################################
file(GLOB test_srcs "test/*.cpp")
file(GLOB test_hdrs "test/*.hpp" "test/*.h")
ocv_add_accuracy_tests(FILES "Include" ${test_hdrs}
FILES "Src" ${test_srcs})
################################################################################################################
################################ OpenCL Module Performance ##################################################
################################################################################################################
file(GLOB perf_srcs "perf/*.cpp")
file(GLOB perf_hdrs "perf/*.hpp" "perf/*.h")
ocv_add_perf_tests(FILES "Include" ${perf_hdrs}
FILES "Src" ${perf_srcs})

@ -88,102 +88,3 @@ Computes a proximity map for a raster template and an image where the template i
* ``CV_TM_CCORR``
.. seealso:: :ocv:func:`matchTemplate`
ocl::SURF_OCL
-------------
.. ocv:class:: ocl::SURF_OCL
Class used for extracting Speeded Up Robust Features (SURF) from an image. ::
class SURF_OCL
{
public:
enum KeypointLayout
{
X_ROW = 0,
Y_ROW,
LAPLACIAN_ROW,
OCTAVE_ROW,
SIZE_ROW,
ANGLE_ROW,
HESSIAN_ROW,
ROWS_COUNT
};
//! the default constructor
SURF_OCL();
//! the full constructor taking all the necessary parameters
explicit SURF_OCL(double _hessianThreshold, int _nOctaves=4,
int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false);
//! returns the descriptor size in float's (64 or 128)
int descriptorSize() const;
//! upload host keypoints to device memory
void uploadKeypoints(const vector<KeyPoint>& keypoints,
oclMat& keypointsocl);
//! download keypoints from device to host memory
void downloadKeypoints(const oclMat& keypointsocl,
vector<KeyPoint>& keypoints);
//! download descriptors from device to host memory
void downloadDescriptors(const oclMat& descriptorsocl,
vector<float>& descriptors);
void operator()(const oclMat& img, const oclMat& mask,
oclMat& keypoints);
void operator()(const oclMat& img, const oclMat& mask,
oclMat& keypoints, oclMat& descriptors,
bool useProvidedKeypoints = false);
void operator()(const oclMat& img, const oclMat& mask,
std::vector<KeyPoint>& keypoints);
void operator()(const oclMat& img, const oclMat& mask,
std::vector<KeyPoint>& keypoints, oclMat& descriptors,
bool useProvidedKeypoints = false);
void operator()(const oclMat& img, const oclMat& mask,
std::vector<KeyPoint>& keypoints,
std::vector<float>& descriptors,
bool useProvidedKeypoints = false);
void releaseMemory();
// SURF parameters
double hessianThreshold;
int nOctaves;
int nOctaveLayers;
bool extended;
bool upright;
//! max keypoints = min(keypointsRatio * img.size().area(), 65535)
float keypointsRatio;
oclMat sum, mask1, maskSum, intBuffer;
oclMat det, trace;
oclMat maxPosBuffer;
};
The class ``SURF_OCL`` implements Speeded Up Robust Features descriptor. There is a fast multi-scale Hessian keypoint detector that can be used to find the keypoints (which is the default option). But the descriptors can also be computed for the user-specified keypoints. Only 8-bit grayscale images are supported.
The class ``SURF_OCL`` can store results in the GPU and CPU memory. It provides functions to convert results between CPU and GPU version ( ``uploadKeypoints``, ``downloadKeypoints``, ``downloadDescriptors`` ). The format of CPU results is the same as ``SURF`` results. GPU results are stored in ``oclMat``. The ``keypoints`` matrix is :math:`\texttt{nFeatures} \times 7` matrix with the ``CV_32FC1`` type.
* ``keypoints.ptr<float>(X_ROW)[i]`` contains x coordinate of the i-th feature.
* ``keypoints.ptr<float>(Y_ROW)[i]`` contains y coordinate of the i-th feature.
* ``keypoints.ptr<float>(LAPLACIAN_ROW)[i]`` contains the laplacian sign of the i-th feature.
* ``keypoints.ptr<float>(OCTAVE_ROW)[i]`` contains the octave of the i-th feature.
* ``keypoints.ptr<float>(SIZE_ROW)[i]`` contains the size of the i-th feature.
* ``keypoints.ptr<float>(ANGLE_ROW)[i]`` contain orientation of the i-th feature.
* ``keypoints.ptr<float>(HESSIAN_ROW)[i]`` contains the response of the i-th feature.
The ``descriptors`` matrix is :math:`\texttt{nFeatures} \times \texttt{descriptorSize}` matrix with the ``CV_32FC1`` type.
The class ``SURF_OCL`` uses some buffers and provides access to it. All buffers can be safely released between function calls.
.. seealso:: :ocv:class:`SURF`

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save