diff --git a/3rdparty/libjasper/CMakeLists.txt b/3rdparty/libjasper/CMakeLists.txt index 7a70a19cf0..4e6aa45a78 100644 --- a/3rdparty/libjasper/CMakeLists.txt +++ b/3rdparty/libjasper/CMakeLists.txt @@ -47,5 +47,5 @@ if(ENABLE_SOLUTION_FOLDERS) endif() if(NOT BUILD_SHARED_LIBS) - install(TARGETS ${JASPER_LIBRARY} ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) + ocv_install_target(${JASPER_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) endif() diff --git a/3rdparty/libjpeg/CMakeLists.txt b/3rdparty/libjpeg/CMakeLists.txt index ecc57cf7fc..49730edf8e 100644 --- a/3rdparty/libjpeg/CMakeLists.txt +++ b/3rdparty/libjpeg/CMakeLists.txt @@ -46,5 +46,5 @@ if(ENABLE_SOLUTION_FOLDERS) endif() if(NOT BUILD_SHARED_LIBS) - install(TARGETS ${JPEG_LIBRARY} ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) + ocv_install_target(${JPEG_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) endif() diff --git a/3rdparty/libpng/CMakeLists.txt b/3rdparty/libpng/CMakeLists.txt index 141c9d1518..2ecbe3f83b 100644 --- a/3rdparty/libpng/CMakeLists.txt +++ b/3rdparty/libpng/CMakeLists.txt @@ -55,5 +55,5 @@ if(ENABLE_SOLUTION_FOLDERS) endif() if(NOT BUILD_SHARED_LIBS) - install(TARGETS ${PNG_LIBRARY} ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) + ocv_install_target(${PNG_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) endif() diff --git a/3rdparty/libtiff/CMakeLists.txt b/3rdparty/libtiff/CMakeLists.txt index cea2f906fd..16d23c132b 100644 --- a/3rdparty/libtiff/CMakeLists.txt +++ b/3rdparty/libtiff/CMakeLists.txt @@ -84,7 +84,7 @@ if(WIN32 AND NOT HAVE_WINRT) else() list(APPEND lib_srcs tif_unix.c) endif() - + ocv_warnings_disable(CMAKE_C_FLAGS -Wno-unused-but-set-variable -Wmissing-prototypes -Wmissing-declarations -Wundef -Wunused -Wsign-compare -Wcast-align -Wshadow -Wno-maybe-uninitialized -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast) ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter) # clang @@ -115,5 +115,5 @@ if(ENABLE_SOLUTION_FOLDERS) endif() if(NOT BUILD_SHARED_LIBS) - install(TARGETS ${TIFF_LIBRARY} ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) + ocv_install_target(${TIFF_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) endif() diff --git a/3rdparty/libwebp/CMakeLists.txt b/3rdparty/libwebp/CMakeLists.txt index b54819e7b6..a7271796a0 100644 --- a/3rdparty/libwebp/CMakeLists.txt +++ b/3rdparty/libwebp/CMakeLists.txt @@ -54,5 +54,6 @@ if(ENABLE_SOLUTION_FOLDERS) endif() if(NOT BUILD_SHARED_LIBS) - install(TARGETS ${WEBP_LIBRARY} ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) + ocv_install_target(${WEBP_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) endif() + diff --git a/3rdparty/openexr/CMakeLists.txt b/3rdparty/openexr/CMakeLists.txt index e10b940079..1d48c7c7da 100644 --- a/3rdparty/openexr/CMakeLists.txt +++ b/3rdparty/openexr/CMakeLists.txt @@ -64,7 +64,7 @@ if(ENABLE_SOLUTION_FOLDERS) endif() if(NOT BUILD_SHARED_LIBS) - install(TARGETS IlmImf ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) + ocv_install_target(IlmImf EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) endif() set(OPENEXR_INCLUDE_PATHS ${OPENEXR_INCLUDE_PATHS} PARENT_SCOPE) diff --git a/3rdparty/tbb/CMakeLists.txt b/3rdparty/tbb/CMakeLists.txt index 378232873c..49ad95e7d7 100644 --- a/3rdparty/tbb/CMakeLists.txt +++ b/3rdparty/tbb/CMakeLists.txt @@ -231,8 +231,8 @@ if(ENABLE_SOLUTION_FOLDERS) set_target_properties(tbb PROPERTIES FOLDER "3rdparty") endif() -install(TARGETS tbb - RUNTIME DESTINATION bin COMPONENT main +install(TARGETS tbb EXPORT OpenCVModules + RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT main LIBRARY DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main ) diff --git a/3rdparty/zlib/CMakeLists.txt b/3rdparty/zlib/CMakeLists.txt index 537690ce90..f1b28fd396 100644 --- a/3rdparty/zlib/CMakeLists.txt +++ b/3rdparty/zlib/CMakeLists.txt @@ -95,5 +95,5 @@ if(ENABLE_SOLUTION_FOLDERS) endif() if(NOT BUILD_SHARED_LIBS) - install(TARGETS ${ZLIB_LIBRARY} ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) + ocv_install_target(${ZLIB_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main) endif() diff --git a/CMakeLists.txt b/CMakeLists.txt index 95ce619a21..1ab4656f0d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,10 +24,6 @@ if(NOT CMAKE_TOOLCHAIN_FILE) else() set(CMAKE_INSTALL_PREFIX "/usr/local" CACHE PATH "Installation Directory") endif() - - if(MSVC) - set(CMAKE_USE_RELATIVE_PATHS ON CACHE INTERNAL "" FORCE) - endif() else(NOT CMAKE_TOOLCHAIN_FILE) #Android: set output folder to ${CMAKE_BINARY_DIR} set( LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_BINARY_DIR} CACHE PATH "root for library output, set this to change where android libs are compiled to" ) @@ -44,6 +40,10 @@ endif() project(OpenCV CXX C) +if(MSVC) + set(CMAKE_USE_RELATIVE_PATHS ON CACHE INTERNAL "" FORCE) +endif() + include(cmake/OpenCVUtils.cmake) # ---------------------------------------------------------------------------- @@ -176,6 +176,7 @@ OCV_OPTION(BUILD_TBB "Download and build TBB from source" ANDROID # OpenCV installation options # =================================================== +OCV_OPTION(INSTALL_CREATE_DISTRIB "Change install rules to build the distribution package" OFF ) OCV_OPTION(INSTALL_C_EXAMPLES "Install C examples" OFF ) OCV_OPTION(INSTALL_PYTHON_EXAMPLES "Install Python examples" OFF ) OCV_OPTION(INSTALL_ANDROID_EXAMPLES "Install Android examples" OFF IF ANDROID ) @@ -224,6 +225,21 @@ else() set(OPENCV_DOC_INSTALL_PATH share/OpenCV/doc) endif() +if(WIN32) + if(DEFINED OpenCV_RUNTIME AND DEFINED OpenCV_ARCH) + set(OpenCV_INSTALL_BINARIES_PREFIX "${OpenCV_ARCH}/${OpenCV_RUNTIME}/") + else() + message(STATUS "Can't detect runtime and/or arch") + set(OpenCV_INSTALL_BINARIES_PREFIX "") + endif() +else() + set(OpenCV_INSTALL_BINARIES_PREFIX "") +endif() + +set(OPENCV_SAMPLES_BIN_INSTALL_PATH "${OpenCV_INSTALL_BINARIES_PREFIX}samples") + +set(OPENCV_BIN_INSTALL_PATH "${OpenCV_INSTALL_BINARIES_PREFIX}bin") + if(ANDROID) set(LIBRARY_OUTPUT_PATH "${OpenCV_BINARY_DIR}/lib/${ANDROID_NDK_ABI_NAME}") set(3P_LIBRARY_OUTPUT_PATH "${OpenCV_BINARY_DIR}/3rdparty/lib/${ANDROID_NDK_ABI_NAME}") @@ -234,9 +250,18 @@ if(ANDROID) else() set(LIBRARY_OUTPUT_PATH "${OpenCV_BINARY_DIR}/lib") set(3P_LIBRARY_OUTPUT_PATH "${OpenCV_BINARY_DIR}/3rdparty/lib${LIB_SUFFIX}") - set(OPENCV_LIB_INSTALL_PATH lib${LIB_SUFFIX}) - set(OPENCV_3P_LIB_INSTALL_PATH share/OpenCV/3rdparty/${OPENCV_LIB_INSTALL_PATH}) - set(OPENCV_INCLUDE_INSTALL_PATH include) + if(WIN32) + if(OpenCV_STATIC) + set(OPENCV_LIB_INSTALL_PATH "${OpenCV_INSTALL_BINARIES_PREFIX}staticlib${LIB_SUFFIX}") + else() + set(OPENCV_LIB_INSTALL_PATH "${OpenCV_INSTALL_BINARIES_PREFIX}lib${LIB_SUFFIX}") + endif() + set(OPENCV_3P_LIB_INSTALL_PATH "${OpenCV_INSTALL_BINARIES_PREFIX}staticlib${LIB_SUFFIX}") + else() + set(OPENCV_LIB_INSTALL_PATH lib${LIB_SUFFIX}) + set(OPENCV_3P_LIB_INSTALL_PATH share/OpenCV/3rdparty/${OPENCV_LIB_INSTALL_PATH}) + endif() + set(OPENCV_INCLUDE_INSTALL_PATH "include") math(EXPR SIZEOF_VOID_P_BITS "8 * ${CMAKE_SIZEOF_VOID_P}") if(LIB_SUFFIX AND NOT SIZEOF_VOID_P_BITS EQUAL LIB_SUFFIX) @@ -812,7 +837,20 @@ if(HAVE_OPENCL) status(" Include path:" ${OPENCL_INCLUDE_DIRS}) endif() if(OPENCL_LIBRARIES) - status(" libraries:" ${OPENCL_LIBRARIES}) + set(__libs "") + foreach(l ${OPENCL_LIBRARIES}) + if(TARGET ${l}) + get_target_property(p ${l} LOCATION) + if(p MATCHES NOTFOUND) + list(APPEND __libs "${l}") + else() + list(APPEND __libs "${p}") + endif() + else() + list(APPEND __libs "${l}") + endif() + endforeach() + status(" libraries:" ${__libs}) endif() status(" Use AMDFFT:" HAVE_CLAMDFFT THEN YES ELSE NO) status(" Use AMDBLAS:" HAVE_CLAMDBLAS THEN YES ELSE NO) diff --git a/apps/haartraining/CMakeLists.txt b/apps/haartraining/CMakeLists.txt index 7a197db837..92fdf914b1 100644 --- a/apps/haartraining/CMakeLists.txt +++ b/apps/haartraining/CMakeLists.txt @@ -69,9 +69,17 @@ set_target_properties(opencv_performance PROPERTIES # Install part # ----------------------------------------------------------- -install(TARGETS opencv_haartraining RUNTIME DESTINATION bin COMPONENT main) -install(TARGETS opencv_createsamples RUNTIME DESTINATION bin COMPONENT main) -install(TARGETS opencv_performance RUNTIME DESTINATION bin COMPONENT main) +if(INSTALL_CREATE_DISTRIB) + if(BUILD_SHARED_LIBS) + install(TARGETS opencv_haartraining RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT main) + install(TARGETS opencv_createsamples RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT main) + install(TARGETS opencv_performance RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT main) + endif() +else() + install(TARGETS opencv_haartraining RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT main) + install(TARGETS opencv_createsamples RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT main) + install(TARGETS opencv_performance RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT main) +endif() if(ENABLE_SOLUTION_FOLDERS) set_target_properties(opencv_performance PROPERTIES FOLDER "applications") diff --git a/apps/traincascade/CMakeLists.txt b/apps/traincascade/CMakeLists.txt index eb0c83df5a..2d11621756 100644 --- a/apps/traincascade/CMakeLists.txt +++ b/apps/traincascade/CMakeLists.txt @@ -33,4 +33,10 @@ if(ENABLE_SOLUTION_FOLDERS) set_target_properties(${the_target} PROPERTIES FOLDER "applications") endif() -install(TARGETS ${the_target} RUNTIME DESTINATION bin COMPONENT main) +if(INSTALL_CREATE_DISTRIB) + if(BUILD_SHARED_LIBS) + install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT main) + endif() +else() + install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT main) +endif() diff --git a/cmake/OpenCVConfig.cmake b/cmake/OpenCVConfig.cmake new file mode 100644 index 0000000000..c7839c61b8 --- /dev/null +++ b/cmake/OpenCVConfig.cmake @@ -0,0 +1,166 @@ +# =================================================================================== +# The OpenCV CMake configuration file +# +# ** File generated automatically, do not modify ** +# +# Usage from an external project: +# In your CMakeLists.txt, add these lines: +# +# FIND_PACKAGE(OpenCV REQUIRED) +# TARGET_LINK_LIBRARIES(MY_TARGET_NAME ${OpenCV_LIBS}) +# +# Or you can search for specific OpenCV modules: +# +# FIND_PACKAGE(OpenCV REQUIRED core highgui) +# +# If the module is found then OPENCV__FOUND is set to TRUE. +# +# This file will define the following variables: +# - OpenCV_LIBS : The list of libraries to links against. +# - OpenCV_LIB_DIR : The directory(es) where lib files are. Calling LINK_DIRECTORIES +# with this path is NOT needed. +# - OpenCV_INCLUDE_DIRS : The OpenCV include directories. +# - OpenCV_COMPUTE_CAPABILITIES : The version of compute capability +# - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API +# - OpenCV_VERSION : The version of this OpenCV build. Example: "2.4.0" +# - OpenCV_VERSION_MAJOR : Major version part of OpenCV_VERSION. Example: "2" +# - OpenCV_VERSION_MINOR : Minor version part of OpenCV_VERSION. Example: "4" +# - OpenCV_VERSION_PATCH : Patch version part of OpenCV_VERSION. Example: "0" +# +# Advanced variables: +# - OpenCV_SHARED +# - OpenCV_CONFIG_PATH +# - OpenCV_LIB_COMPONENTS +# +# =================================================================================== +# +# Windows pack specific options: +# - OpenCV_STATIC +# - OpenCV_CUDA + +if(CMAKE_VERSION VERSION_GREATER 2.6) + get_property(OpenCV_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) + if(NOT ";${OpenCV_LANGUAGES};" MATCHES ";CXX;") + enable_language(CXX) + endif() +endif() + +if(NOT DEFINED OpenCV_STATIC) + # look for global setting + if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS) + set(OpenCV_STATIC OFF) + else() + set(OpenCV_STATIC ON) + endif() +endif() + +if(NOT DEFINED OpenCV_CUDA) + # if user' app uses CUDA, then it probably wants CUDA-enabled OpenCV binaries + if(CUDA_FOUND) + set(OpenCV_CUDA ON) + endif() +endif() + +if(MSVC) + if(CMAKE_CL_64) + set(OpenCV_ARCH x64) + set(OpenCV_TBB_ARCH intel64) + else() + set(OpenCV_ARCH x86) + set(OpenCV_TBB_ARCH ia32) + endif() + if(MSVC_VERSION EQUAL 1400) + set(OpenCV_RUNTIME vc8) + elseif(MSVC_VERSION EQUAL 1500) + set(OpenCV_RUNTIME vc9) + elseif(MSVC_VERSION EQUAL 1600) + set(OpenCV_RUNTIME vc10) + elseif(MSVC_VERSION EQUAL 1700) + set(OpenCV_RUNTIME vc11) + endif() +elseif(MINGW) + set(OpenCV_RUNTIME mingw) + + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine + OUTPUT_VARIABLE OPENCV_GCC_TARGET_MACHINE + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(CMAKE_OPENCV_GCC_TARGET_MACHINE MATCHES "64") + set(MINGW64 1) + set(OpenCV_ARCH x64) + else() + set(OpenCV_ARCH x86) + endif() +endif() + +if(CMAKE_VERSION VERSION_GREATER 2.6.2) + unset(OpenCV_CONFIG_PATH CACHE) +endif() + +if(NOT OpenCV_FIND_QUIETLY) + message(STATUS "OpenCV ARCH: ${OpenCV_ARCH}") + message(STATUS "OpenCV RUNTIME: ${OpenCV_RUNTIME}") + message(STATUS "OpenCV STATIC: ${OpenCV_STATIC}") +endif() + +get_filename_component(OpenCV_CONFIG_PATH "${CMAKE_CURRENT_LIST_FILE}" PATH CACHE) +if(OpenCV_RUNTIME AND OpenCV_ARCH) + if(OpenCV_STATIC AND EXISTS "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib/OpenCVConfig.cmake") + if(OpenCV_CUDA AND EXISTS "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib/OpenCVConfig.cmake") + set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib") + else() + set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib") + endif() + elseif(EXISTS "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib/OpenCVConfig.cmake") + if(OpenCV_CUDA AND EXISTS "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib/OpenCVConfig.cmake") + set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib") + else() + set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib") + endif() + endif() +endif() + +if(OpenCV_LIB_PATH AND EXISTS "${OpenCV_LIB_PATH}/OpenCVConfig.cmake") + set(OpenCV_LIB_DIR_OPT "${OpenCV_LIB_PATH}" CACHE PATH "Path where release OpenCV libraries are located" FORCE) + set(OpenCV_LIB_DIR_DBG "${OpenCV_LIB_PATH}" CACHE PATH "Path where debug OpenCV libraries are located" FORCE) + set(OpenCV_3RDPARTY_LIB_DIR_OPT "${OpenCV_LIB_PATH}" CACHE PATH "Path where release 3rdpaty OpenCV dependencies are located" FORCE) + set(OpenCV_3RDPARTY_LIB_DIR_DBG "${OpenCV_LIB_PATH}" CACHE PATH "Path where debug 3rdpaty OpenCV dependencies are located" FORCE) + + include("${OpenCV_LIB_PATH}/OpenCVConfig.cmake") + + if(OpenCV_CUDA) + set(_OpenCV_LIBS "") + foreach(_lib ${OpenCV_LIBS}) + string(REPLACE "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}" "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}" _lib2 "${_lib}") + if(NOT EXISTS "${_lib}" AND EXISTS "${_lib2}") + list(APPEND _OpenCV_LIBS "${_lib2}") + else() + list(APPEND _OpenCV_LIBS "${_lib}") + endif() + endforeach() + set(OpenCV_LIBS ${_OpenCV_LIBS}) + endif() + set(OpenCV_FOUND TRUE CACHE BOOL "" FORCE) + set(OPENCV_FOUND TRUE CACHE BOOL "" FORCE) + + if(NOT OpenCV_FIND_QUIETLY) + message(STATUS "Found OpenCV ${OpenCV_VERSION} in ${OpenCV_LIB_PATH}") + if(NOT OpenCV_LIB_PATH MATCHES "/staticlib") + get_filename_component(_OpenCV_LIB_PATH "${OpenCV_LIB_PATH}/../bin" ABSOLUTE) + file(TO_NATIVE_PATH "${_OpenCV_LIB_PATH}" _OpenCV_LIB_PATH) + message(STATUS "You might need to add ${_OpenCV_LIB_PATH} to your PATH to be able to run your applications.") + if(OpenCV_LIB_PATH MATCHES "/gpu/") + string(REPLACE "\\gpu" "" _OpenCV_LIB_PATH2 "${_OpenCV_LIB_PATH}") + message(STATUS "GPU support is enabled so you might also need ${_OpenCV_LIB_PATH2} in your PATH (it must go after the ${_OpenCV_LIB_PATH}).") + endif() + endif() + endif() +else() + if(NOT OpenCV_FIND_QUIETLY) + message(WARNING +"Found OpenCV Windows Pack but it has not binaries compatible with your configuration. +You should manually point CMake variable OpenCV_DIR to your build of OpenCV library." + ) + endif() + set(OpenCV_FOUND FALSE CACHE BOOL "" FORCE) + set(OPENCV_FOUND FALSE CACHE BOOL "" FORCE) +endif() diff --git a/cmake/OpenCVDetectCXXCompiler.cmake b/cmake/OpenCVDetectCXXCompiler.cmake index 7efcba7c1e..e2691d8cd3 100644 --- a/cmake/OpenCVDetectCXXCompiler.cmake +++ b/cmake/OpenCVDetectCXXCompiler.cmake @@ -110,3 +110,43 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "arm.*|ARM.*") set(ARM 1) endif() + + +# Similar code is existed in OpenCVConfig.cmake +if(NOT DEFINED OpenCV_STATIC) + # look for global setting + if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS) + set(OpenCV_STATIC OFF) + else() + set(OpenCV_STATIC ON) + endif() +endif() + +if(MSVC) + if(CMAKE_CL_64) + set(OpenCV_ARCH x64) + else() + set(OpenCV_ARCH x86) + endif() + if(MSVC_VERSION EQUAL 1400) + set(OpenCV_RUNTIME vc8) + elseif(MSVC_VERSION EQUAL 1500) + set(OpenCV_RUNTIME vc9) + elseif(MSVC_VERSION EQUAL 1600) + set(OpenCV_RUNTIME vc10) + elseif(MSVC_VERSION EQUAL 1700) + set(OpenCV_RUNTIME vc11) + endif() +elseif(MINGW) + set(OpenCV_RUNTIME mingw) + + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine + OUTPUT_VARIABLE OPENCV_GCC_TARGET_MACHINE + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(CMAKE_OPENCV_GCC_TARGET_MACHINE MATCHES "64") + set(MINGW64 1) + set(OpenCV_ARCH x64) + else() + set(OpenCV_ARCH x86) + endif() +endif() diff --git a/cmake/OpenCVDetectPython.cmake b/cmake/OpenCVDetectPython.cmake index 0ef0be9c99..6f3ce4e903 100644 --- a/cmake/OpenCVDetectPython.cmake +++ b/cmake/OpenCVDetectPython.cmake @@ -20,6 +20,7 @@ if(PYTHONINTERP_FOUND) set(PYTHON_VERSION_MAJOR_MINOR "${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}") if(NOT ANDROID AND NOT IOS) + ocv_check_environment_variables(PYTHON_LIBRARY PYTHON_INCLUDE_DIR) find_host_package(PythonLibs "${PYTHON_VERSION_STRING}" EXACT) endif() diff --git a/cmake/OpenCVGenConfig.cmake b/cmake/OpenCVGenConfig.cmake index 7ccf3ef42e..9050e053aa 100644 --- a/cmake/OpenCVGenConfig.cmake +++ b/cmake/OpenCVGenConfig.cmake @@ -57,55 +57,10 @@ if(BUILD_FAT_JAVA_LIB AND HAVE_opencv_java) list(APPEND OPENCV_MODULES_CONFIGCMAKE opencv_java) endif() -macro(ocv_generate_dependencies_map_configcmake suffix configuration) - set(OPENCV_DEPENDENCIES_MAP_${suffix} "") - set(OPENCV_PROCESSED_LIBS "") - set(OPENCV_LIBS_TO_PROCESS ${OPENCV_MODULES_CONFIGCMAKE}) - while(OPENCV_LIBS_TO_PROCESS) - list(GET OPENCV_LIBS_TO_PROCESS 0 __ocv_lib) - get_target_property(__libname ${__ocv_lib} LOCATION_${configuration}) - get_filename_component(__libname "${__libname}" NAME) - - if(WIN32) - string(REGEX REPLACE "${CMAKE_SHARED_LIBRARY_SUFFIX}$" "${OPENCV_LINK_LIBRARY_SUFFIX}" __libname "${__libname}") - endif() - - if (CUDA_FOUND AND WIN32) - if(${__ocv_lib}_EXTRA_DEPS_${suffix}) - list(REMOVE_ITEM ${__ocv_lib}_EXTRA_DEPS_${suffix} ${CUDA_LIBRARIES} ${CUDA_CUFFT_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_npp_LIBRARY} ${CUDA_nvcuvid_LIBRARY} ${CUDA_nvcuvenc_LIBRARY}) - endif() - endif() - - string(REPLACE " " "\\ " __mod_deps "${${__ocv_lib}_MODULE_DEPS_${suffix}}") - string(REPLACE " " "\\ " __ext_deps "${${__ocv_lib}_EXTRA_DEPS_${suffix}}") - string(REPLACE "\"" "\\\"" __mod_deps "${__mod_deps}") - string(REPLACE "\"" "\\\"" __ext_deps "${__ext_deps}") - - - set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_LIBNAME_${suffix} \"${__libname}\")\n") - set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_DEPS_${suffix} ${__mod_deps})\n") - set(OPENCV_DEPENDENCIES_MAP_${suffix} "${OPENCV_DEPENDENCIES_MAP_${suffix}}set(OpenCV_${__ocv_lib}_EXTRA_DEPS_${suffix} \"${__ext_deps}\")\n") - - list(APPEND OPENCV_PROCESSED_LIBS ${__ocv_lib}) - list(APPEND OPENCV_LIBS_TO_PROCESS ${${__ocv_lib}_MODULE_DEPS_${suffix}}) - list(REMOVE_ITEM OPENCV_LIBS_TO_PROCESS ${OPENCV_PROCESSED_LIBS}) - endwhile() - unset(OPENCV_PROCESSED_LIBS) - unset(OPENCV_LIBS_TO_PROCESS) - unset(__ocv_lib) - unset(__libname) -endmacro() - -ocv_generate_dependencies_map_configcmake(OPT Release) -ocv_generate_dependencies_map_configcmake(DBG Debug) - - # ------------------------------------------------------------------------------------------- # Part 1/3: ${BIN_DIR}/OpenCVConfig.cmake -> For use *without* "make install" # ------------------------------------------------------------------------------------------- set(OpenCV_INCLUDE_DIRS_CONFIGCMAKE "\"${OPENCV_CONFIG_FILE_INCLUDE_DIR}\" \"${OpenCV_SOURCE_DIR}/include\" \"${OpenCV_SOURCE_DIR}/include/opencv\"") -set(OpenCV_LIB_DIRS_CONFIGCMAKE "\"${LIBRARY_OUTPUT_PATH}\"") -set(OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "\"${3P_LIBRARY_OUTPUT_PATH}\"") set(OpenCV2_INCLUDE_DIRS_CONFIGCMAKE "") foreach(m ${OPENCV_MODULES_BUILD}) @@ -130,13 +85,6 @@ configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/OpenCVConfig-version.cmake. set(OpenCV_INCLUDE_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/${OPENCV_INCLUDE_INSTALL_PATH}/opencv" "\${OpenCV_INSTALL_PATH}/${OPENCV_INCLUDE_INSTALL_PATH}\"") set(OpenCV2_INCLUDE_DIRS_CONFIGCMAKE "\"\"") -if(ANDROID) - set(OpenCV_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/sdk/native/libs/\${ANDROID_NDK_ABI_NAME}\"") - set(OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/sdk/native/3rdparty/libs/\${ANDROID_NDK_ABI_NAME}\"") -else() - set(OpenCV_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/${OPENCV_LIB_INSTALL_PATH}\"") - set(OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/${OPENCV_3P_LIB_INSTALL_PATH}\"") -endif() if(INSTALL_TO_MANGLED_PATHS) string(REPLACE "OpenCV" "OpenCV-${OPENCV_VERSION}" OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "${OPENCV_3P_LIB_INSTALL_PATH}") set(OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/${OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE}\"") @@ -155,9 +103,11 @@ if(UNIX) if(INSTALL_TO_MANGLED_PATHS) install(FILES ${CMAKE_BINARY_DIR}/unix-install/OpenCVConfig.cmake DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}-${OPENCV_VERSION}/) install(FILES ${CMAKE_BINARY_DIR}/unix-install/OpenCVConfig-version.cmake DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}-${OPENCV_VERSION}/) + install(EXPORT OpenCVModules DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}-${OPENCV_VERSION}/) else() install(FILES "${CMAKE_BINARY_DIR}/unix-install/OpenCVConfig.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/) install(FILES ${CMAKE_BINARY_DIR}/unix-install/OpenCVConfig-version.cmake DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/) + install(EXPORT OpenCVModules DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/) endif() endif() @@ -171,12 +121,17 @@ endif() if(WIN32) set(OpenCV_INCLUDE_DIRS_CONFIGCMAKE "\"\${OpenCV_CONFIG_PATH}/include\" \"\${OpenCV_CONFIG_PATH}/include/opencv\"") set(OpenCV2_INCLUDE_DIRS_CONFIGCMAKE "\"\"") - set(OpenCV_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_CONFIG_PATH}/${OPENCV_LIB_INSTALL_PATH}\"") - set(OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_CONFIG_PATH}/${OPENCV_3P_LIB_INSTALL_PATH}\"") exec_program(mkdir ARGS "-p \"${CMAKE_BINARY_DIR}/win-install/\"" OUTPUT_VARIABLE RET_VAL) configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/OpenCVConfig.cmake.in" "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig.cmake" IMMEDIATE @ONLY) configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/OpenCVConfig-version.cmake.in" "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig-version.cmake" IMMEDIATE @ONLY) - install(FILES "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig.cmake" DESTINATION "${CMAKE_INSTALL_PREFIX}/") - install(FILES "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig-version.cmake" DESTINATION "${CMAKE_INSTALL_PREFIX}/") + if(BUILD_SHARED_LIBS) + install(FILES "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig.cmake" DESTINATION "${OpenCV_INSTALL_BINARIES_PREFIX}/lib") + install(EXPORT OpenCVModules DESTINATION "${OpenCV_INSTALL_BINARIES_PREFIX}/lib") + else() + install(FILES "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig.cmake" DESTINATION "${OpenCV_INSTALL_BINARIES_PREFIX}/staticlib") + install(EXPORT OpenCVModules DESTINATION "${OpenCV_INSTALL_BINARIES_PREFIX}/staticlib") + endif() + install(FILES "${CMAKE_BINARY_DIR}/win-install/OpenCVConfig-version.cmake" DESTINATION "${CMAKE_INSTALL_PREFIX}") + install(FILES "${OpenCV_SOURCE_DIR}/cmake/OpenCVConfig.cmake" DESTINATION "${CMAKE_INSTALL_PREFIX}/") endif() diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 25fa0aa47e..5dbe957ddd 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -11,9 +11,11 @@ # OPENCV_MODULE_${the_module}_HEADERS # OPENCV_MODULE_${the_module}_SOURCES # OPENCV_MODULE_${the_module}_DEPS - final flattened set of module dependencies -# OPENCV_MODULE_${the_module}_DEPS_EXT +# OPENCV_MODULE_${the_module}_DEPS_EXT - non-module dependencies # OPENCV_MODULE_${the_module}_REQ_DEPS # OPENCV_MODULE_${the_module}_OPT_DEPS +# OPENCV_MODULE_${the_module}_PRIVATE_REQ_DEPS +# OPENCV_MODULE_${the_module}_PRIVATE_OPT_DEPS # HAVE_${the_module} - for fast check of module availability # To control the setup of the module you could also set: @@ -48,6 +50,8 @@ foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MOD endif() unset(OPENCV_MODULE_${mod}_REQ_DEPS CACHE) unset(OPENCV_MODULE_${mod}_OPT_DEPS CACHE) + unset(OPENCV_MODULE_${mod}_PRIVATE_REQ_DEPS CACHE) + unset(OPENCV_MODULE_${mod}_PRIVATE_OPT_DEPS CACHE) endforeach() # clean modules info which needs to be recalculated @@ -69,6 +73,10 @@ macro(ocv_add_dependencies full_modname) set(__depsvar OPENCV_MODULE_${full_modname}_REQ_DEPS) elseif(d STREQUAL "OPTIONAL") set(__depsvar OPENCV_MODULE_${full_modname}_OPT_DEPS) + elseif(d STREQUAL "PRIVATE_REQUIRED") + set(__depsvar OPENCV_MODULE_${full_modname}_PRIVATE_REQ_DEPS) + elseif(d STREQUAL "PRIVATE_OPTIONAL") + set(__depsvar OPENCV_MODULE_${full_modname}_PRIVATE_OPT_DEPS) else() list(APPEND ${__depsvar} "${d}") endif() @@ -77,9 +85,17 @@ macro(ocv_add_dependencies full_modname) ocv_list_unique(OPENCV_MODULE_${full_modname}_REQ_DEPS) ocv_list_unique(OPENCV_MODULE_${full_modname}_OPT_DEPS) - - set(OPENCV_MODULE_${full_modname}_REQ_DEPS ${OPENCV_MODULE_${full_modname}_REQ_DEPS} CACHE INTERNAL "Required dependencies of ${full_modname} module") - set(OPENCV_MODULE_${full_modname}_OPT_DEPS ${OPENCV_MODULE_${full_modname}_OPT_DEPS} CACHE INTERNAL "Optional dependencies of ${full_modname} module") + ocv_list_unique(OPENCV_MODULE_${full_modname}_PRIVATE_REQ_DEPS) + ocv_list_unique(OPENCV_MODULE_${full_modname}_PRIVATE_OPT_DEPS) + + set(OPENCV_MODULE_${full_modname}_REQ_DEPS ${OPENCV_MODULE_${full_modname}_REQ_DEPS} + CACHE INTERNAL "Required dependencies of ${full_modname} module") + set(OPENCV_MODULE_${full_modname}_OPT_DEPS ${OPENCV_MODULE_${full_modname}_OPT_DEPS} + CACHE INTERNAL "Optional dependencies of ${full_modname} module") + set(OPENCV_MODULE_${full_modname}_PRIVATE_REQ_DEPS ${OPENCV_MODULE_${full_modname}_PRIVATE_REQ_DEPS} + CACHE INTERNAL "Required private dependencies of ${full_modname} module") + set(OPENCV_MODULE_${full_modname}_PRIVATE_OPT_DEPS ${OPENCV_MODULE_${full_modname}_PRIVATE_OPT_DEPS} + CACHE INTERNAL "Optional private dependencies of ${full_modname} module") endmacro() # declare new OpenCV module in current folder @@ -173,126 +189,6 @@ macro(ocv_module_disable module) endmacro() -# Internal macro; partly disables OpenCV module -macro(__ocv_module_turn_off the_module) - list(REMOVE_ITEM OPENCV_MODULES_DISABLED_AUTO "${the_module}") - list(APPEND OPENCV_MODULES_DISABLED_AUTO "${the_module}") - list(REMOVE_ITEM OPENCV_MODULES_BUILD "${the_module}") - list(REMOVE_ITEM OPENCV_MODULES_PUBLIC "${the_module}") - set(HAVE_${the_module} OFF CACHE INTERNAL "Module ${the_module} can not be built in current configuration") -endmacro() - -# Internal macro for dependencies tracking -macro(__ocv_flatten_module_required_dependencies the_module) - set(__flattened_deps "") - set(__resolved_deps "") - set(__req_depends ${OPENCV_MODULE_${the_module}_REQ_DEPS}) - - while(__req_depends) - ocv_list_pop_front(__req_depends __dep) - if(__dep STREQUAL the_module) - __ocv_module_turn_off(${the_module}) # TODO: think how to deal with cyclic dependency - break() - elseif(";${OPENCV_MODULES_DISABLED_USER};${OPENCV_MODULES_DISABLED_AUTO};" MATCHES ";${__dep};") - __ocv_module_turn_off(${the_module}) # depends on disabled module - list(APPEND __flattened_deps "${__dep}") - elseif(";${OPENCV_MODULES_BUILD};" MATCHES ";${__dep};") - if(";${__resolved_deps};" MATCHES ";${__dep};") - list(APPEND __flattened_deps "${__dep}") # all dependencies of this module are already resolved - else() - # put all required subdependencies before this dependency and mark it as resolved - list(APPEND __resolved_deps "${__dep}") - list(INSERT __req_depends 0 ${OPENCV_MODULE_${__dep}_REQ_DEPS} ${__dep}) - endif() - elseif(__dep MATCHES "^opencv_") - __ocv_module_turn_off(${the_module}) # depends on missing module - message(WARNING "Unknown \"${__dep}\" module is listened in the dependencies of \"${the_module}\" module") - break() - else() - # skip non-modules - endif() - endwhile() - - if(__flattened_deps) - list(REMOVE_DUPLICATES __flattened_deps) - set(OPENCV_MODULE_${the_module}_DEPS ${__flattened_deps}) - else() - set(OPENCV_MODULE_${the_module}_DEPS "") - endif() - - ocv_clear_vars(__resolved_deps __flattened_deps __req_depends __dep) -endmacro() - -# Internal macro for dependencies tracking -macro(__ocv_flatten_module_optional_dependencies the_module) - set(__flattened_deps "") - set(__resolved_deps "") - set(__opt_depends ${OPENCV_MODULE_${the_module}_REQ_DEPS} ${OPENCV_MODULE_${the_module}_OPT_DEPS}) - - while(__opt_depends) - ocv_list_pop_front(__opt_depends __dep) - if(__dep STREQUAL the_module) - __ocv_module_turn_off(${the_module}) # TODO: think how to deal with cyclic dependency - break() - elseif(";${OPENCV_MODULES_BUILD};" MATCHES ";${__dep};") - if(";${__resolved_deps};" MATCHES ";${__dep};") - list(APPEND __flattened_deps "${__dep}") # all dependencies of this module are already resolved - else() - # put all subdependencies before this dependency and mark it as resolved - list(APPEND __resolved_deps "${__dep}") - list(INSERT __opt_depends 0 ${OPENCV_MODULE_${__dep}_REQ_DEPS} ${OPENCV_MODULE_${__dep}_OPT_DEPS} ${__dep}) - endif() - else() - # skip non-modules or missing modules - endif() - endwhile() - - if(__flattened_deps) - list(REMOVE_DUPLICATES __flattened_deps) - set(OPENCV_MODULE_${the_module}_DEPS ${__flattened_deps}) - else() - set(OPENCV_MODULE_${the_module}_DEPS "") - endif() - - ocv_clear_vars(__resolved_deps __flattened_deps __opt_depends __dep) -endmacro() - -macro(__ocv_flatten_module_dependencies) - foreach(m ${OPENCV_MODULES_DISABLED_USER}) - set(HAVE_${m} OFF CACHE INTERNAL "Module ${m} will not be built in current configuration") - endforeach() - foreach(m ${OPENCV_MODULES_BUILD}) - set(HAVE_${m} ON CACHE INTERNAL "Module ${m} will be built in current configuration") - __ocv_flatten_module_required_dependencies(${m}) - set(OPENCV_MODULE_${m}_DEPS ${OPENCV_MODULE_${m}_DEPS} CACHE INTERNAL "Flattened required dependencies of ${m} module") - endforeach() - - foreach(m ${OPENCV_MODULES_BUILD}) - __ocv_flatten_module_optional_dependencies(${m}) - - # save dependencies from other modules - set(OPENCV_MODULE_${m}_DEPS ${OPENCV_MODULE_${m}_DEPS} CACHE INTERNAL "Flattened dependencies of ${m} module") - # save extra dependencies - set(OPENCV_MODULE_${m}_DEPS_EXT ${OPENCV_MODULE_${m}_REQ_DEPS} ${OPENCV_MODULE_${m}_OPT_DEPS}) - if(OPENCV_MODULE_${m}_DEPS_EXT AND OPENCV_MODULE_${m}_DEPS) - list(REMOVE_ITEM OPENCV_MODULE_${m}_DEPS_EXT ${OPENCV_MODULE_${m}_DEPS}) - endif() - ocv_list_filterout(OPENCV_MODULE_${m}_DEPS_EXT "^opencv_[^ ]+$") - set(OPENCV_MODULE_${m}_DEPS_EXT ${OPENCV_MODULE_${m}_DEPS_EXT} CACHE INTERNAL "Extra dependencies of ${m} module") - endforeach() - - # order modules by dependencies - set(OPENCV_MODULES_BUILD_ "") - foreach(m ${OPENCV_MODULES_BUILD}) - list(APPEND OPENCV_MODULES_BUILD_ ${OPENCV_MODULE_${m}_DEPS} ${m}) - endforeach() - ocv_list_unique(OPENCV_MODULES_BUILD_) - - set(OPENCV_MODULES_PUBLIC ${OPENCV_MODULES_PUBLIC} CACHE INTERNAL "List of OpenCV modules marked for export") - set(OPENCV_MODULES_BUILD ${OPENCV_MODULES_BUILD_} CACHE INTERNAL "List of OpenCV modules included into the build") - set(OPENCV_MODULES_DISABLED_AUTO ${OPENCV_MODULES_DISABLED_AUTO} CACHE INTERNAL "List of OpenCV modules implicitly disabled due to dependencies") -endmacro() - # collect modules from specified directories # NB: must be called only once! macro(ocv_glob_modules) @@ -342,7 +238,7 @@ macro(ocv_glob_modules) ocv_clear_vars(__ocvmodules __directories_observed __path __modpath __pathIdx) # resolve dependencies - __ocv_flatten_module_dependencies() + __ocv_resolve_dependencies() # create modules set(OPENCV_INITIAL_PASS OFF PARENT_SCOPE) @@ -351,11 +247,167 @@ macro(ocv_glob_modules) if(m MATCHES "^opencv_") string(REGEX REPLACE "^opencv_" "" __shortname "${m}") add_subdirectory("${OPENCV_MODULE_${m}_LOCATION}" "${CMAKE_CURRENT_BINARY_DIR}/${__shortname}") + else() + message(WARNING "Check module name: ${m}") + add_subdirectory("${OPENCV_MODULE_${m}_LOCATION}" "${CMAKE_CURRENT_BINARY_DIR}/${m}") endif() endforeach() unset(__shortname) endmacro() + +# disables OpenCV module with missing dependencies +function(__ocv_module_turn_off the_module) + list(REMOVE_ITEM OPENCV_MODULES_DISABLED_AUTO "${the_module}") + list(APPEND OPENCV_MODULES_DISABLED_AUTO "${the_module}") + list(REMOVE_ITEM OPENCV_MODULES_BUILD "${the_module}") + list(REMOVE_ITEM OPENCV_MODULES_PUBLIC "${the_module}") + set(HAVE_${the_module} OFF CACHE INTERNAL "Module ${the_module} can not be built in current configuration") + + set(OPENCV_MODULES_DISABLED_AUTO "${OPENCV_MODULES_DISABLED_AUTO}" CACHE INTERNAL "") + set(OPENCV_MODULES_BUILD "${OPENCV_MODULES_BUILD}" CACHE INTERNAL "") + set(OPENCV_MODULES_PUBLIC "${OPENCV_MODULES_PUBLIC}" CACHE INTERNAL "") +endfunction() + +# sort modules by dependencies +function(__ocv_sort_modules_by_deps __lst) + ocv_list_sort(${__lst}) + set(${__lst}_ORDERED ${${__lst}} CACHE INTERNAL "") + set(__result "") + foreach (m ${${__lst}}) + list(LENGTH __result __lastindex) + set(__index ${__lastindex}) + foreach (__d ${__result}) + set(__deps "${OPENCV_MODULE_${__d}_DEPS}") + if(";${__deps};" MATCHES ";${m};") + list(FIND __result "${__d}" __i) + if(__i LESS "${__index}") + set(__index "${__i}") + endif() + endif() + endforeach() + if(__index STREQUAL __lastindex) + list(APPEND __result "${m}") + else() + list(INSERT __result ${__index} "${m}") + endif() + endforeach() + set(${__lst} "${__result}" PARENT_SCOPE) +endfunction() + +# resolve dependensies +function(__ocv_resolve_dependencies) + foreach(m ${OPENCV_MODULES_DISABLED_USER}) + set(HAVE_${m} OFF CACHE INTERNAL "Module ${m} will not be built in current configuration") + endforeach() + foreach(m ${OPENCV_MODULES_BUILD}) + set(HAVE_${m} ON CACHE INTERNAL "Module ${m} will be built in current configuration") + endforeach() + + # disable MODULES with unresolved dependencies + set(has_changes ON) + while(has_changes) + set(has_changes OFF) + foreach(m ${OPENCV_MODULES_BUILD}) + set(__deps ${OPENCV_MODULE_${m}_REQ_DEPS} ${OPENCV_MODULE_${m}_PRIVATE_REQ_DEPS}) + while(__deps) + ocv_list_pop_front(__deps d) + string(TOLOWER "${d}" upper_d) + if(NOT (HAVE_${d} OR HAVE_${upper_d} OR TARGET ${d} OR EXISTS ${d})) + if(d MATCHES "^opencv_") # TODO Remove this condition in the future and use HAVE_ variables only + message(STATUS "Module ${m} disabled because ${d} dependency can't be resolved!") + __ocv_module_turn_off(${m}) + set(has_changes ON) + break() + else() + message(STATUS "Assume that non-module dependency is available: ${d} (for module ${m})") + endif() + endif() + endwhile() + endforeach() + endwhile() + +# message(STATUS "List of active modules: ${OPENCV_MODULES_BUILD}") + + foreach(m ${OPENCV_MODULES_BUILD}) + set(deps_${m} ${OPENCV_MODULE_${m}_REQ_DEPS}) + foreach(d ${OPENCV_MODULE_${m}_OPT_DEPS}) + if(NOT (";${deps_${m}};" MATCHES ";${d};")) + if(HAVE_${d} OR TARGET ${d}) + list(APPEND deps_${m} ${d}) + endif() + endif() + endforeach() +# message(STATUS "Initial deps of ${m} (w/o private deps): ${deps_${m}}") + endforeach() + + # propagate dependencies + set(has_changes ON) + while(has_changes) + set(has_changes OFF) + foreach(m2 ${OPENCV_MODULES_BUILD}) # transfer deps of m2 to m + foreach(m ${OPENCV_MODULES_BUILD}) + if((NOT m STREQUAL m2) AND ";${deps_${m}};" MATCHES ";${m2};") + foreach(d ${deps_${m2}}) + if(NOT (";${deps_${m}};" MATCHES ";${d};")) +# message(STATUS " Transfer dependency ${d} from ${m2} to ${m}") + list(APPEND deps_${m} ${d}) + set(has_changes ON) + endif() + endforeach() + endif() + endforeach() + endforeach() + endwhile() + + # process private deps + foreach(m ${OPENCV_MODULES_BUILD}) + foreach(d ${OPENCV_MODULE_${m}_PRIVATE_REQ_DEPS}) + if(NOT (";${deps_${m}};" MATCHES ";${d};")) + list(APPEND deps_${m} ${d}) + endif() + endforeach() + foreach(d ${OPENCV_MODULE_${m}_PRIVATE_OPT_DEPS}) + if(NOT (";${deps_${m}};" MATCHES ";${d};")) + if(HAVE_${d} OR TARGET ${d}) + list(APPEND deps_${m} ${d}) + endif() + endif() + endforeach() + endforeach() + + ocv_list_sort(OPENCV_MODULES_BUILD) + + foreach(m ${OPENCV_MODULES_BUILD}) +# message(STATUS "FULL deps of ${m}: ${deps_${m}}") + set(OPENCV_MODULE_${m}_DEPS ${deps_${m}}) + set(OPENCV_MODULE_${m}_DEPS_EXT ${deps_${m}}) + ocv_list_filterout(OPENCV_MODULE_${m}_DEPS_EXT "^opencv_[^ ]+$") + if(OPENCV_MODULE_${m}_DEPS_EXT AND OPENCV_MODULE_${m}_DEPS) + list(REMOVE_ITEM OPENCV_MODULE_${m}_DEPS ${OPENCV_MODULE_${m}_DEPS_EXT}) + endif() + endforeach() + + # reorder dependencies + foreach(m ${OPENCV_MODULES_BUILD}) + __ocv_sort_modules_by_deps(OPENCV_MODULE_${m}_DEPS) + ocv_list_sort(OPENCV_MODULE_${m}_DEPS_EXT) + + set(OPENCV_MODULE_${m}_DEPS ${OPENCV_MODULE_${m}_DEPS} CACHE INTERNAL "Flattened dependencies of ${m} module") + set(OPENCV_MODULE_${m}_DEPS_EXT ${OPENCV_MODULE_${m}_DEPS_EXT} CACHE INTERNAL "Extra dependencies of ${m} module") + +# message(STATUS " module deps: ${OPENCV_MODULE_${m}_DEPS}") +# message(STATUS " extra deps: ${OPENCV_MODULE_${m}_DEPS_EXT}") + endforeach() + + __ocv_sort_modules_by_deps(OPENCV_MODULES_BUILD) + + set(OPENCV_MODULES_PUBLIC ${OPENCV_MODULES_PUBLIC} CACHE INTERNAL "List of OpenCV modules marked for export") + set(OPENCV_MODULES_BUILD ${OPENCV_MODULES_BUILD} CACHE INTERNAL "List of OpenCV modules included into the build") + set(OPENCV_MODULES_DISABLED_AUTO ${OPENCV_MODULES_DISABLED_AUTO} CACHE INTERNAL "List of OpenCV modules implicitly disabled due to dependencies") +endfunction() + + # setup include paths for the list of passed modules macro(ocv_include_modules) foreach(d ${ARGN}) @@ -377,7 +429,7 @@ macro(ocv_include_modules_recurse) ocv_include_directories("${OPENCV_MODULE_${d}_LOCATION}/include") endif() if(OPENCV_MODULE_${d}_DEPS) - ocv_include_modules_recurse(${OPENCV_MODULE_${d}_DEPS}) + ocv_include_modules(${OPENCV_MODULE_${d}_DEPS}) endif() elseif(EXISTS "${d}") ocv_include_directories("${d}") @@ -436,7 +488,6 @@ macro(ocv_glob_module_sources) file(GLOB lib_cuda_srcs "src/cuda/*.cu") set(cuda_objs "") set(lib_cuda_hdrs "") - if(HAVE_CUDA AND lib_cuda_srcs) ocv_include_directories(${CUDA_INCLUDE_DIRS}) file(GLOB lib_cuda_hdrs "src/cuda/*.hpp") @@ -448,7 +499,6 @@ macro(ocv_glob_module_sources) source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) file(GLOB cl_kernels "src/opencl/*.cl") - if(HAVE_OPENCL AND cl_kernels) ocv_include_directories(${OPENCL_INCLUDE_DIRS}) add_custom_command( @@ -487,12 +537,10 @@ macro(ocv_create_module) endif() if(NOT "${ARGN}" STREQUAL "SKIP_LINK") - target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN}) + target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS}) + target_link_libraries(${the_module} LINK_PRIVATE ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN}) if (HAVE_CUDA) - target_link_libraries(${the_module} ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) - endif() - if(HAVE_OPENCL AND OPENCL_LIBRARIES) - target_link_libraries(${the_module} ${OPENCL_LIBRARIES}) + target_link_libraries(${the_module} LINK_PRIVATE ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) endif() endif() @@ -533,8 +581,8 @@ macro(ocv_create_module) set_target_properties(${the_module} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:libc /DEBUG") endif() - install(TARGETS ${the_module} - RUNTIME DESTINATION bin COMPONENT main + ocv_install_target(${the_module} EXPORT OpenCVModules + RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT main LIBRARY DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main ARCHIVE DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main ) diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index ddf0290673..2fb8335884 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -11,6 +11,17 @@ if(NOT COMMAND find_host_program) endmacro() endif() +macro(ocv_check_environment_variables) + foreach(_var ${ARGN}) + if(NOT DEFINED ${_var} AND DEFINED ENV{${_var}}) + set(__value "$ENV{${_var}}") + file(TO_CMAKE_PATH "${__value}" __value) # Assume that we receive paths + set(${_var} "${__value}") + message(STATUS "Update variable ${_var} from environment: ${${_var}}") + endif() + endforeach() +endmacro() + # adds include directories in such way that directories from the OpenCV source tree go first function(ocv_include_directories) set(__add_before "") @@ -425,6 +436,48 @@ macro(ocv_convert_to_full_paths VAR) endmacro() +# add install command +function(ocv_install_target) + install(TARGETS ${ARGN}) + + if(INSTALL_CREATE_DISTRIB) + if(MSVC AND NOT BUILD_SHARED_LIBS) + set(__target "${ARGV0}") + + set(isArchive 0) + set(isDst 0) + foreach(e ${ARGN}) + if(isDst EQUAL 1) + set(DST "${e}") + break() + endif() + if(isArchive EQUAL 1 AND e STREQUAL "DESTINATION") + set(isDst 1) + endif() + if(e STREQUAL "ARCHIVE") + set(isArchive 1) + else() + set(isArchive 0) + endif() + endforeach() + +# message(STATUS "Process ${__target} dst=${DST}...") + if(NOT DEFINED DST) + set(DST "OPENCV_LIB_INSTALL_PATH") + endif() + + get_target_property(fname ${__target} LOCATION_DEBUG) + string(REPLACE ".lib" ".pdb" fname "${fname}") + install(FILES ${fname} DESTINATION ${DST} CONFIGURATIONS Debug) + + get_target_property(fname ${__target} LOCATION_RELEASE) + string(REPLACE ".lib" ".pdb" fname "${fname}") + install(FILES ${fname} DESTINATION ${DST} CONFIGURATIONS Release) + endif() + endif() +endfunction() + + # read set of version defines from the header file macro(ocv_parse_header FILENAME FILE_VAR) set(vars_regex "") diff --git a/cmake/cl2cpp.cmake b/cmake/cl2cpp.cmake index 825172b73c..1916c3ee5b 100644 --- a/cmake/cl2cpp.cmake +++ b/cmake/cl2cpp.cmake @@ -6,6 +6,7 @@ get_filename_component(OUTPUT_HPP_NAME "${OUTPUT_HPP}" NAME) set(STR_CPP "// This file is auto-generated. Do not edit! +#include \"precomp.hpp\" #include \"${OUTPUT_HPP_NAME}\" namespace cv @@ -16,6 +17,8 @@ namespace ocl set(STR_HPP "// This file is auto-generated. Do not edit! +#include \"opencv2/ocl/private/util.hpp\" + namespace cv { namespace ocl diff --git a/cmake/templates/OpenCVConfig.cmake.in b/cmake/templates/OpenCVConfig.cmake.in index 3dfac30500..78c03c5c6d 100644 --- a/cmake/templates/OpenCVConfig.cmake.in +++ b/cmake/templates/OpenCVConfig.cmake.in @@ -16,9 +16,7 @@ # If the module is found then OPENCV__FOUND is set to TRUE. # # This file will define the following variables: -# - OpenCV_LIBS : The list of libraries to links against. -# - OpenCV_LIB_DIR : The directory(es) where lib files are. Calling LINK_DIRECTORIES -# with this path is NOT needed. +# - OpenCV_LIBS : The list of all imported targets for OpenCV modules. # - OpenCV_INCLUDE_DIRS : The OpenCV include directories. # - OpenCV_COMPUTE_CAPABILITIES : The version of compute capability # - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API @@ -39,6 +37,10 @@ # # =================================================================================== +include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules.cmake) + +# TODO All things below should be reviewed. What is about of moving this code into related modules (special vars/hooks/files) + # Version Compute Capability from which OpenCV has been compiled is remembered set(OpenCV_COMPUTE_CAPABILITIES @OpenCV_CUDA_CC_CONFIGCMAKE@) @@ -105,15 +107,11 @@ SET(OpenCV_VERSION_TWEAK @OPENCV_VERSION_TWEAK@) SET(OpenCV_VERSION_STATUS "@OPENCV_VERSION_STATUS@") # ==================================================================== -# Link libraries: e.g. libopencv_core.so, opencv_imgproc220d.lib, etc... +# Link libraries: e.g. opencv_core;opencv_imgproc; etc... # ==================================================================== SET(OpenCV_LIB_COMPONENTS @OPENCV_MODULES_CONFIGCMAKE@) -@OPENCV_DEPENDENCIES_MAP_OPT@ - -@OPENCV_DEPENDENCIES_MAP_DBG@ - # ============================================================== # Extra include directories, needed by OpenCV 2 new structure # ============================================================== @@ -193,34 +191,11 @@ else() set(OpenCV_LIB_SUFFIX "") endif() +SET(OpenCV_LIBS "${OpenCV_LIB_COMPONENTS}") + foreach(__opttype OPT DBG) - SET(OpenCV_LIBS_${__opttype} "") + SET(OpenCV_LIBS_${__opttype} "${OpenCV_LIBS}") SET(OpenCV_EXTRA_LIBS_${__opttype} "") - foreach(__cvlib ${OpenCV_FIND_COMPONENTS}) - foreach(__cvdep ${OpenCV_${__cvlib}_DEPS_${__opttype}}) - if(__cvdep MATCHES "^opencv_") - list(APPEND OpenCV_LIBS_${__opttype} "${OpenCV_LIB_DIR_${__opttype}}/${OpenCV_${__cvdep}_LIBNAME_${__opttype}}${OpenCV_LIB_SUFFIX}") - #indicate that this module is also found - string(TOUPPER "${__cvdep}" __cvdep) - set(${__cvdep}_FOUND 1) - elseif(EXISTS "${OpenCV_3RDPARTY_LIB_DIR_${__opttype}}/${OpenCV_${__cvdep}_LIBNAME_${__opttype}}") - list(APPEND OpenCV_LIBS_${__opttype} "${OpenCV_3RDPARTY_LIB_DIR_${__opttype}}/${OpenCV_${__cvdep}_LIBNAME_${__opttype}}") - endif() - endforeach() - list(APPEND OpenCV_LIBS_${__opttype} "${OpenCV_LIB_DIR_${__opttype}}/${OpenCV_${__cvlib}_LIBNAME_${__opttype}}${OpenCV_LIB_SUFFIX}") - list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${OpenCV_${__cvlib}_EXTRA_DEPS_${__opttype}}) - endforeach() - - if(${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_GREATER 2.4) - if(OpenCV_LIBS_${__opttype}) - list(REMOVE_DUPLICATES OpenCV_LIBS_${__opttype}) - endif() - if(OpenCV_EXTRA_LIBS_${__opttype}) - list(REMOVE_DUPLICATES OpenCV_EXTRA_LIBS_${__opttype}) - endif() - else() - #TODO: duplicates are annoying but they should not be the problem - endif() # CUDA if(OpenCV_CUDA_VERSION AND (CMAKE_CROSSCOMPILING OR (WIN32 AND NOT OpenCV_SHARED))) @@ -261,33 +236,6 @@ foreach(__opttype OPT DBG) endif() endforeach() -if(OpenCV_LIBS_DBG) - list(REVERSE OpenCV_LIBS_DBG) -endif() - -if(OpenCV_LIBS_OPT) - list(REVERSE OpenCV_LIBS_OPT) -endif() - -# CMake>=2.6 supports the notation "debug XXd optimized XX" -if(${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_GREATER 2.4) - # Modern CMake: - SET(OpenCV_LIBS "") - foreach(__cvlib ${OpenCV_LIBS_DBG} ${OpenCV_EXTRA_LIBS_DBG}) - list(APPEND OpenCV_LIBS debug "${__cvlib}") - endforeach() - foreach(__cvlib ${OpenCV_LIBS_OPT} ${OpenCV_EXTRA_LIBS_OPT}) - list(APPEND OpenCV_LIBS optimized "${__cvlib}") - endforeach() -else() - # Old CMake: - if(CMAKE_BUILD_TYPE MATCHES "Debug") - SET(OpenCV_LIBS ${OpenCV_LIBS_DBG} ${OpenCV_EXTRA_LIBS_DBG}) - else() - SET(OpenCV_LIBS ${OpenCV_LIBS_OPT} ${OpenCV_EXTRA_LIBS_OPT}) - endif() -endif() - # ============================================================== # Android camera helper macro # ============================================================== @@ -323,3 +271,45 @@ if(CMAKE_CROSSCOMPILING AND OpenCV_SHARED AND (CMAKE_SYSTEM_NAME MATCHES "Linux" set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-rpath-link,${dir}") endforeach() endif() + + + +# +# Some macroses for samples +# +macro(ocv_check_dependencies) + set(OCV_DEPENDENCIES_FOUND TRUE) + foreach(d ${ARGN}) + if(NOT TARGET ${d}) + set(OCV_DEPENDENCIES_FOUND FALSE) + break() + endif() + endforeach() +endmacro() + +# adds include directories in such way that directories from the OpenCV source tree go first +function(ocv_include_directories) + set(__add_before "") + foreach(dir ${ARGN}) + get_filename_component(__abs_dir "${dir}" ABSOLUTE) + if("${__abs_dir}" MATCHES "^${OpenCV_DIR}") + list(APPEND __add_before "${dir}") + else() + include_directories(AFTER SYSTEM "${dir}") + endif() + endforeach() + include_directories(BEFORE ${__add_before}) +endfunction() + +macro(ocv_include_modules) + include_directories(BEFORE "${OpenCV_INCLUDE_DIRS}") +endmacro() + +# remove all matching elements from the list +macro(ocv_list_filterout lst regex) + foreach(item ${${lst}}) + if(item MATCHES "${regex}") + list(REMOVE_ITEM ${lst} "${item}") + endif() + endforeach() +endmacro() diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 0ea04d0765..467e877071 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -9,10 +9,16 @@ if(BUILD_DOCS AND HAVE_SPHINX) project(opencv_docs) - set(DOC_LIST "${OpenCV_SOURCE_DIR}/doc/opencv-logo.png" "${OpenCV_SOURCE_DIR}/doc/opencv-logo2.png" - "${OpenCV_SOURCE_DIR}/doc/opencv-logo-white.png" "${OpenCV_SOURCE_DIR}/doc/opencv.ico" - "${OpenCV_SOURCE_DIR}/doc/haartraining.htm" "${OpenCV_SOURCE_DIR}/doc/license.txt" - "${OpenCV_SOURCE_DIR}/doc/pattern.png" "${OpenCV_SOURCE_DIR}/doc/acircles_pattern.png") + set(DOC_LIST + "${OpenCV_SOURCE_DIR}/doc/opencv-logo.png" + "${OpenCV_SOURCE_DIR}/doc/opencv-logo2.png" + "${OpenCV_SOURCE_DIR}/doc/opencv-logo-white.png" + "${OpenCV_SOURCE_DIR}/doc/opencv.ico" + "${OpenCV_SOURCE_DIR}/doc/pattern.png" + "${OpenCV_SOURCE_DIR}/doc/acircles_pattern.png") + if(NOT INSTALL_CREATE_DISTRIB) + list(APPEND DOC_LIST "${OpenCV_SOURCE_DIR}/doc/haartraining.htm") + endif() set(OPTIONAL_DOC_LIST "") diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index b83a591490..f0603acd6b 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -1,8 +1,8 @@ set(the_description "The Core Functionality") -ocv_add_module(core ${ZLIB_LIBRARIES} OPTIONAL opencv_cudev) +ocv_add_module(core PRIVATE_REQUIRED ${ZLIB_LIBRARIES} OPTIONAL opencv_cudev) ocv_module_include_directories(${ZLIB_INCLUDE_DIRS}) -if (HAVE_WINRT) +if(HAVE_WINRT) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW /GS /Gm- /AI\"${WINDOWS_SDK_PATH}/References/CommonConfiguration/Neutral\" /AI\"${VISUAL_STUDIO_PATH}/vcpackages\"") endif() diff --git a/modules/core/doc/basic_structures.rst b/modules/core/doc/basic_structures.rst index 93e7ca479c..1be3e4d2a2 100644 --- a/modules/core/doc/basic_structures.rst +++ b/modules/core/doc/basic_structures.rst @@ -1429,7 +1429,7 @@ Various Mat constructors :param sizes: Array of integers specifying an n-dimensional array shape. - :param type: Array type. Use ``CV_8UC1, ..., CV_64FC4`` to create 1-4 channel matrices, or ``CV_8UC(n), ..., CV_64FC(n)`` to create multi-channel (up to ``CV_MAX_CN`` channels) matrices. + :param type: Array type. Use ``CV_8UC1, ..., CV_64FC4`` to create 1-4 channel matrices, or ``CV_8UC(n), ..., CV_64FC(n)`` to create multi-channel (up to ``CV_CN_MAX`` channels) matrices. :param s: An optional value to initialize each matrix element with. To set all the matrix elements to the particular value after the construction, use the assignment operator ``Mat::operator=(const Scalar& value)`` . diff --git a/modules/core/doc/drawing_functions.rst b/modules/core/doc/drawing_functions.rst index 258cfe6ef3..6968d580e7 100644 --- a/modules/core/doc/drawing_functions.rst +++ b/modules/core/doc/drawing_functions.rst @@ -99,7 +99,7 @@ Draws a simple or thick elliptic arc or fills an ellipse sector. :param center: Center of the ellipse. - :param axes: Length of the ellipse axes. + :param axes: Half of the size of the ellipse main axes. :param angle: Ellipse rotation angle in degrees. @@ -137,7 +137,7 @@ Approximates an elliptic arc with a polyline. :param center: Center of the arc. - :param axes: Half-sizes of the arc. See the :ocv:func:`ellipse` for details. + :param axes: Half of the size of the ellipse main axes. See the :ocv:func:`ellipse` for details. :param angle: Rotation angle of the ellipse in degrees. See the :ocv:func:`ellipse` for details. @@ -153,7 +153,6 @@ The function ``ellipse2Poly`` computes the vertices of a polyline that approxima :ocv:func:`ellipse` . - fillConvexPoly ------------------ Fills a convex polygon. diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index 8073f0eba1..ce0518015e 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -110,7 +110,10 @@ enum { GpuApiCallError= -217, OpenGlNotSupported= -218, OpenGlApiCallError= -219, - OpenCLApiCallError= -220 + OpenCLApiCallError= -220, + OpenCLDoubleNotSupported= -221, + OpenCLInitError= -222, + OpenCLNoAMDBlasFft= -223 }; } //Error diff --git a/modules/core/include/opencv2/core/types_c.h b/modules/core/include/opencv2/core/types_c.h index 4555efe6b1..38b0f340a8 100644 --- a/modules/core/include/opencv2/core/types_c.h +++ b/modules/core/include/opencv2/core/types_c.h @@ -177,7 +177,11 @@ enum { CV_GpuNotSupported= -216, CV_GpuApiCallError= -217, CV_OpenGlNotSupported= -218, - CV_OpenGlApiCallError= -219 + CV_OpenGlApiCallError= -219, + CV_OpenCLApiCallError= -220, + CV_OpenCLDoubleNotSupported= -221, + CV_OpenCLInitError= -222, + CV_OpenCLNoAMDBlasFft= -223 }; /****************************************************************************************\ diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index c9768bbeca..da27b518eb 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -885,7 +885,7 @@ static void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, Size sz, void* ) { - IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); + IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void *)src2; ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, (IppiSize&)sz), (vBinOp, IF_SIMD(VNot)>(src1, step1, src2, step2, dst, step, sz))); } diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index f24579ca25..bb2e1f4932 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -480,7 +480,7 @@ cv::Scalar cv::sum( InputArray _src ) if( ippFunc ) { Ipp64f res[4]; - if( ippFunc(src.data, src.step[0], sz, res, ippAlgHintAccurate) >= 0 ) + if( ippFunc(src.data, (int)src.step[0], sz, res, ippAlgHintAccurate) >= 0 ) { Scalar sc; for( int i = 0; i < cn; i++ ) @@ -585,7 +585,7 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask ) if( ippFuncC1 ) { Ipp64f res; - if( ippFuncC1(src.data, src.step[0], mask.data, mask.step[0], sz, &res) >= 0 ) + if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &res) >= 0 ) { return Scalar(res); } @@ -599,9 +599,9 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask ) if( ippFuncC3 ) { Ipp64f res1, res2, res3; - if( ippFuncC3(src.data, src.step[0], mask.data, mask.step[0], sz, 1, &res1) >= 0 && - ippFuncC3(src.data, src.step[0], mask.data, mask.step[0], sz, 2, &res2) >= 0 && - ippFuncC3(src.data, src.step[0], mask.data, mask.step[0], sz, 3, &res3) >= 0 ) + if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &res1) >= 0 && + ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &res2) >= 0 && + ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &res3) >= 0 ) { return Scalar(res1, res2, res3); } @@ -627,7 +627,7 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask ) if( ippFunc ) { Ipp64f res[4]; - if( ippFunc(src.data, src.step[0], sz, res, ippAlgHintAccurate) >= 0 ) + if( ippFunc(src.data, (int)src.step[0], sz, res, ippAlgHintAccurate) >= 0 ) { Scalar sc; for( int i = 0; i < cn; i++ ) diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index 02ede4d2f1..07001d6398 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -309,7 +309,7 @@ if(WIN32 AND WITH_FFMPEG) COMMENT "Copying ${ffmpeg_path} to the output directory") endif() - install(FILES "${ffmpeg_path}" DESTINATION bin COMPONENT main RENAME "${ffmpeg_bare_name_ver}") + install(FILES "${ffmpeg_path}" DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT main RENAME "${ffmpeg_bare_name_ver}") endif() ocv_add_accuracy_tests() diff --git a/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst b/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst index f7cb69cf41..3e019d4f98 100644 --- a/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst +++ b/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst @@ -293,8 +293,6 @@ Calculates the up-right bounding rectangle of a point set. The function calculates and returns the minimal up-right bounding rectangle for the specified point set. - - contourArea --------------- Calculates a contour area. @@ -417,6 +415,7 @@ Fits an ellipse around a set of 2D points. * Nx2 numpy array (Python interface) The function calculates the ellipse that fits (in a least-squares sense) a set of 2D points best of all. It returns the rotated rectangle in which the ellipse is inscribed. The algorithm [Fitzgibbon95]_ is used. +Developer should keep in mind that it is possible that the returned ellipse/rotatedRect data contains negative indices, due to the data points being close to the border of the containing Mat element. .. note:: @@ -539,7 +538,7 @@ Finds a rotated rectangle of the minimum area enclosing the input 2D point set. * Nx2 numpy array (Python interface) The function calculates and returns the minimum-area bounding rectangle (possibly rotated) for a specified point set. See the OpenCV sample ``minarea.cpp`` . - +Developer should keep in mind that the returned rotatedRect can contain negative indices when data is close the the containing Mat element boundary. boxPoints diff --git a/modules/imgproc/src/canny.cpp b/modules/imgproc/src/canny.cpp index fb6afaf2b3..dfa7953b10 100644 --- a/modules/imgproc/src/canny.cpp +++ b/modules/imgproc/src/canny.cpp @@ -115,7 +115,7 @@ void cv::Canny( InputArray _src, OutputArray _dst, #ifdef USE_IPP_CANNY if( aperture_size == 3 && !L2gradient && - ippCanny(src, dst, low_thresh, high_thresh) >= 0 ) + ippCanny(src, dst, (float)low_thresh, (float)high_thresh) ) return; #endif diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 14703cce28..49312ba09b 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -218,7 +218,7 @@ public: { const void *yS = src.ptr(range.start); void *yD = dst.ptr(range.start); - if( cvt(yS, (int)src.step[0], yD, (int)dst.step[0], src.cols, range.end - range.start) < 0 ) + if( !cvt(yS, (int)src.step[0], yD, (int)dst.step[0], src.cols, range.end - range.start) ) *ok = false; } @@ -730,7 +730,7 @@ template<> struct RGB2Gray { typedef uchar channel_type; - RGB2Gray(int _srccn, int blueIdx, const int* coeffs) : srccn(_srccn) + RGB2Gray(int _srccn, int blueIdx, const int* coeffs) : srccn(_srccn) { const int coeffs0[] = { R2Y, G2Y, B2Y }; if(!coeffs) coeffs = coeffs0; @@ -761,7 +761,7 @@ template<> struct RGB2Gray { typedef ushort channel_type; - RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) : srccn(_srccn) + RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) : srccn(_srccn) { static const int coeffs0[] = { R2Y, G2Y, B2Y }; memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0])); diff --git a/modules/imgproc/src/deriv.cpp b/modules/imgproc/src/deriv.cpp index 4383c12fb7..eca0db3fde 100644 --- a/modules/imgproc/src/deriv.cpp +++ b/modules/imgproc/src/deriv.cpp @@ -212,8 +212,8 @@ static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy, ippiFilterScharrVertGetBufferSize_8u16s_C1R(roi,&bufSize); buffer.allocate(bufSize); - ippiFilterScharrVertBorder_8u16s_C1R((const Ipp8u*)src.data, src.step, - (Ipp16s*)dst.data, dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer); + ippiFilterScharrVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, + (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer); return true; } @@ -223,8 +223,8 @@ static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy, ippiFilterScharrHorizGetBufferSize_8u16s_C1R(roi,&bufSize); buffer.allocate(bufSize); - ippiFilterScharrHorizBorder_8u16s_C1R((const Ipp8u*)src.data, src.step, - (Ipp16s*)dst.data, dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer); + ippiFilterScharrHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, + (Ipp16s*)dst.data, (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer); return true; } @@ -245,12 +245,12 @@ static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy, ippiFilterScharrVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize); buffer.allocate(bufSize); - ippiFilterScharrVertBorder_32f_C1R((const Ipp32f*)src.data, src.step, - (Ipp32f*)dst.data, dst.step, ippiSize(src.cols, src.rows), + ippiFilterScharrVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, + (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), ippBorderRepl, 0, (Ipp8u*)(char*)buffer); if(scale != 1) /* IPP is fast, so MulC produce very little perf degradation */ - ippiMulC_32f_C1IR((Ipp32f)scale,(Ipp32f*)dst.data,dst.step,ippiSize(dst.cols*dst.channels(),dst.rows)); + ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f*)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } @@ -260,11 +260,11 @@ static bool IPPDerivScharr(const Mat& src, Mat& dst, int ddepth, int dx, int dy, ippiFilterScharrHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize); buffer.allocate(bufSize); - ippiFilterScharrHorizBorder_32f_C1R((const Ipp32f*)src.data, src.step, - (Ipp32f*)dst.data, dst.step, ippiSize(src.cols, src.rows), + ippiFilterScharrHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, + (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), ippBorderRepl, 0, (Ipp8u*)(char*)buffer); if(scale != 1) - ippiMulC_32f_C1IR((Ipp32f)scale,(Ipp32f *)dst.data,dst.step,ippiSize(dst.cols*dst.channels(),dst.rows)); + ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } @@ -297,8 +297,8 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); buffer.allocate(bufSize); - ippiFilterSobelNegVertBorder_8u16s_C1R((const Ipp8u*)src.data, src.step, - (Ipp16s*)dst.data, dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippiFilterSobelNegVertBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, + (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer); return true; } @@ -308,8 +308,8 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k ippiFilterSobelHorizGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); buffer.allocate(bufSize); - ippiFilterSobelHorizBorder_8u16s_C1R((const Ipp8u*)src.data, src.step, - (Ipp16s*)dst.data, dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippiFilterSobelHorizBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, + (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer); return true; @@ -320,8 +320,8 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k ippiFilterSobelVertSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); buffer.allocate(bufSize); - ippiFilterSobelVertSecondBorder_8u16s_C1R((const Ipp8u*)src.data, src.step, - (Ipp16s*)dst.data, dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippiFilterSobelVertSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, + (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer); return true; @@ -332,8 +332,8 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k ippiFilterSobelHorizSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); buffer.allocate(bufSize); - ippiFilterSobelHorizSecondBorder_8u16s_C1R((const Ipp8u*)src.data, src.step, - (Ipp16s*)dst.data, dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippiFilterSobelHorizSecondBorder_8u16s_C1R((const Ipp8u*)src.data, (int)src.step, + (Ipp16s*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer); return true; @@ -344,14 +344,14 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k { if((dx == 1) && (dy == 0)) { - ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); + ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize); buffer.allocate(bufSize); - ippiFilterSobelNegVertBorder_32f_C1R((const Ipp32f*)src.data, src.step, - (Ipp32f*)dst.data, dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippiFilterSobelNegVertBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, + (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer); if(scale != 1) - ippiMulC_32f_C1IR((Ipp32f)scale,(Ipp32f *)dst.data,dst.step,ippiSize(dst.cols*dst.channels(),dst.rows)); + ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } @@ -361,11 +361,11 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k ippiFilterSobelHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); buffer.allocate(bufSize); - ippiFilterSobelHorizBorder_32f_C1R((const Ipp32f*)src.data, src.step, - (Ipp32f*)dst.data, dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippiFilterSobelHorizBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, + (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer); if(scale != 1) - ippiMulC_32f_C1IR((Ipp32f)scale,(Ipp32f *)dst.data,dst.step,ippiSize(dst.cols*dst.channels(),dst.rows)); + ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } @@ -375,11 +375,11 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k ippiFilterSobelVertSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); buffer.allocate(bufSize); - ippiFilterSobelVertSecondBorder_32f_C1R((const Ipp32f*)src.data, src.step, - (Ipp32f*)dst.data, dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippiFilterSobelVertSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, + (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer); if(scale != 1) - ippiMulC_32f_C1IR((Ipp32f)scale,(Ipp32f *)dst.data,dst.step,ippiSize(dst.cols*dst.channels(),dst.rows)); + ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } @@ -389,11 +389,11 @@ static bool IPPDeriv(const Mat& src, Mat& dst, int ddepth, int dx, int dy, int k ippiFilterSobelHorizSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize); buffer.allocate(bufSize); - ippiFilterSobelHorizSecondBorder_32f_C1R((const Ipp32f*)src.data, src.step, - (Ipp32f*)dst.data, dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), + ippiFilterSobelHorizSecondBorder_32f_C1R((const Ipp32f*)src.data, (int)src.step, + (Ipp32f*)dst.data, (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), ippBorderRepl, 0, (Ipp8u*)(char*)buffer); if(scale != 1) - ippiMulC_32f_C1IR((Ipp32f)scale,(Ipp32f *)dst.data,dst.step,ippiSize(dst.cols*dst.channels(),dst.rows)); + ippiMulC_32f_C1IR((Ipp32f)scale, (Ipp32f *)dst.data, (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); return true; } diff --git a/modules/imgproc/src/sumpixels.cpp b/modules/imgproc/src/sumpixels.cpp index 229bbcb8c6..219f28d62e 100644 --- a/modules/imgproc/src/sumpixels.cpp +++ b/modules/imgproc/src/sumpixels.cpp @@ -252,11 +252,11 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output { _sqsum.create( isize, CV_MAKETYPE( CV_64F, cn ) ); sqsum = _sqsum.getMat(); - ippiSqrIntegral_8u32f64f_C1R( (const Ipp8u*)src.data, src.step, (Ipp32f*)sum.data, sum.step, (Ipp64f*)sqsum.data, sqsum.step, srcRoiSize, 0, 0 ); + ippiSqrIntegral_8u32f64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 ); } else { - ippiIntegral_8u32f_C1R( (const Ipp8u*)src.data, src.step, (Ipp32f*)sum.data, sum.step, srcRoiSize, 0 ); + ippiIntegral_8u32f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, srcRoiSize, 0 ); } return; } @@ -272,11 +272,11 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output { _sqsum.create( isize, CV_MAKETYPE( CV_64F, cn ) ); sqsum = _sqsum.getMat(); - ippiSqrIntegral_8u32s64f_C1R( (const Ipp8u*)src.data, src.step, (Ipp32s*)sum.data, sum.step, (Ipp64f*)sqsum.data, sqsum.step, srcRoiSize, 0, 0 ); + ippiSqrIntegral_8u32s64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 ); } else { - ippiIntegral_8u32s_C1R( (const Ipp8u*)src.data, src.step, (Ipp32s*)sum.data, sum.step, srcRoiSize, 0 ); + ippiIntegral_8u32s_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, srcRoiSize, 0 ); } return; } diff --git a/modules/imgproc/test/test_convhull.cpp b/modules/imgproc/test/test_convhull.cpp index 0a1428f0ac..cb9944fd63 100644 --- a/modules/imgproc/test/test_convhull.cpp +++ b/modules/imgproc/test/test_convhull.cpp @@ -1445,14 +1445,14 @@ void CV_FitLineTest::generate_point_set( void* pointsSet ) t = (float)((cvtest::randReal(rng)-0.5)*low_high_range*2); for( k = 0; k < n; k++ ) + { p[k] = (float)((cvtest::randReal(rng)-0.5)*max_noise*2 + t*line0[k] + line0[k+n]); - if( point_type == CV_32S ) - for( k = 0; k < n; k++ ) + if( point_type == CV_32S ) pi[k] = cvRound(p[k]); - else - for( k = 0; k < n; k++ ) + else pf[k] = p[k]; + } } } diff --git a/modules/java/CMakeLists.txt b/modules/java/CMakeLists.txt index 3dfae12381..2cb58a692f 100644 --- a/modules/java/CMakeLists.txt +++ b/modules/java/CMakeLists.txt @@ -334,9 +334,15 @@ if(ANDROID) LIBRARY DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main ARCHIVE DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main) else() - install(TARGETS ${the_module} - RUNTIME DESTINATION ${JAR_INSTALL_DIR} COMPONENT main - LIBRARY DESTINATION ${JAR_INSTALL_DIR} COMPONENT main) + if(NOT INSTALL_CREATE_DISTRIB) + install(TARGETS ${the_module} + RUNTIME DESTINATION ${JAR_INSTALL_DIR} COMPONENT main + LIBRARY DESTINATION ${JAR_INSTALL_DIR} COMPONENT main) + else() + install(TARGETS ${the_module} + RUNTIME DESTINATION ${JAR_INSTALL_DIR}/${OpenCV_ARCH} COMPONENT main + LIBRARY DESTINATION ${JAR_INSTALL_DIR}/${OpenCV_ARCH} COMPONENT main) + endif() endif() ###################################################################################################################################### diff --git a/modules/ml/src/svm.cpp b/modules/ml/src/svm.cpp index 50c618e5d9..9f531ac4e7 100644 --- a/modules/ml/src/svm.cpp +++ b/modules/ml/src/svm.cpp @@ -1392,6 +1392,8 @@ bool CvSVM::do_train( int svm_type, int sample_count, int var_count, const float for( i = 0; i < sample_count; i++ ) sv_count += fabs(alpha[i]) > 0; + CV_Assert(sv_count != 0); + sv_total = df->sv_count = sv_count; CV_CALL( df->alpha = (double*)cvMemStorageAlloc( storage, sv_count*sizeof(df->alpha[0])) ); CV_CALL( sv = (float**)cvMemStorageAlloc( storage, sv_count*sizeof(sv[0]))); diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index 2728b2f1eb..6aa71d8ea8 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -55,20 +55,11 @@ namespace cv { namespace ocl { - static const char noImage2dOption[] = "-D DISABLE_IMAGE2D"; - - static bool use_image2d = false; - static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3], size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth) { char optBuf [100] = {0}; char * optBufPtr = optBuf; - if( !use_image2d ) - { - strcat(optBufPtr, noImage2dOption); - optBufPtr += strlen(noImage2dOption); - } cl_kernel kernel; kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr); size_t wave_size = queryWaveFrontSize(kernel); @@ -149,13 +140,10 @@ public: counters.setTo(Scalar::all(0)); integral(img, surf_.sum); - use_image2d = support_image2d(); - if(use_image2d) - { - bindImgTex(img, imgTex); - bindImgTex(surf_.sum, sumTex); - finish(); - } + + bindImgTex(img, imgTex); + bindImgTex(surf_.sum, sumTex); + finish(); maskSumTex = 0; diff --git a/modules/ocl/CMakeLists.txt b/modules/ocl/CMakeLists.txt index 69d9df52d8..21e0b30858 100644 --- a/modules/ocl/CMakeLists.txt +++ b/modules/ocl/CMakeLists.txt @@ -1,7 +1,8 @@ if(NOT HAVE_OPENCL) ocv_module_disable(ocl) + return() endif() set(the_description "OpenCL-accelerated Computer Vision") -ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d opencv_ml) +ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d opencv_ml "${OPENCL_LIBRARIES}") ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow) diff --git a/modules/ocl/doc/matrix_reductions.rst b/modules/ocl/doc/matrix_reductions.rst index 4bedb944f9..41161d8aa7 100644 --- a/modules/ocl/doc/matrix_reductions.rst +++ b/modules/ocl/doc/matrix_reductions.rst @@ -23,6 +23,32 @@ Returns the number of non-zero elements in src Counts non-zero array elements. Supports all data types. +ocl::min +------------------ + +.. ocv:function:: void ocl::min(const oclMat &src1, const oclMat &src2, oclMat &dst) + + :param src1: the first input array. + + :param src2: the second input array, must be the same size and same type as ``src1``. + + :param dst: the destination array, it will have the same size and same type as ``src1``. + +Computes element-wise minima of two arrays. Supports all data types. + +ocl::max +------------------ + +.. ocv:function:: void ocl::max(const oclMat &src1, const oclMat &src2, oclMat &dst) + + :param src1: the first input array. + + :param src2: the second input array, must be the same size and same type as ``src1``. + + :param dst: the destination array, it will have the same size and same type as ``src1``. + +Computes element-wise maxima of two arrays. Supports all data types. + ocl::minMax ------------------ Returns void diff --git a/modules/ocl/doc/operations_on_matrices.rst b/modules/ocl/doc/operations_on_matrices.rst index 24a4ea1dc5..7efd719672 100644 --- a/modules/ocl/doc/operations_on_matrices.rst +++ b/modules/ocl/doc/operations_on_matrices.rst @@ -3,6 +3,18 @@ Operations on Matrics .. highlight:: cpp +ocl::abs +------------------ +Returns void + +.. ocv:function:: void ocl::abs(const oclMat& src, oclMat& dst) + + :param src: input array. + + :param dst: destination array, it will have the same size and same type as ``src``. + +Computes per-element absolute values of the input array. Supports all data types. + ocl::absdiff ------------------ Returns void diff --git a/modules/ocl/include/opencv2/ocl.hpp b/modules/ocl/include/opencv2/ocl.hpp index f99e99a6c0..1c0dc976b6 100644 --- a/modules/ocl/include/opencv2/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl.hpp @@ -82,15 +82,6 @@ namespace cv DEVICE_MEM_PM //persistent memory }; - //Get the global device memory and read/write type - //return 1 if unified memory system supported, otherwise return 0 - CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type); - - //Set the global device memory and read/write type, - //the newly generated oclMat will all use this type - //return -1 if the target type is unsupported, otherwise return 0 - CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT); - // these classes contain OpenCL runtime information struct PlatformInfo; @@ -113,6 +104,7 @@ namespace cv std::vector maxWorkItemSizes; int maxComputeUnits; size_t localMemorySize; + size_t maxMemAllocSize; int deviceVersionMajor; int deviceVersionMinor; @@ -126,7 +118,6 @@ namespace cv DeviceInfo(); }; - //////////////////////////////// Initialization & Info //////////////////////// struct PlatformInfo { @@ -193,32 +184,55 @@ namespace cv return Context::getContext()->getOpenCLCommandQueuePtr(); } - bool CV_EXPORTS supportsFeature(FEATURE_TYPE featureType); + CV_EXPORTS bool supportsFeature(FEATURE_TYPE featureType); - void CV_EXPORTS finish(); + CV_EXPORTS void finish(); + enum BINARY_CACHE_MODE + { + CACHE_NONE = 0, // do not cache OpenCL binary + CACHE_DEBUG = 0x1 << 0, // cache OpenCL binary when built in debug mode + CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode + CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, // cache opencl binary + }; //! Enable or disable OpenCL program binary caching onto local disk // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the // compiled OpenCL program to be cached to the path automatically as "path/*.clb" // binary file, which will be reused when the OpenCV executable is started again. // - // Caching mode is controlled by the following enums - // Notes - // 1. the feature is by default enabled when OpenCV is built in release mode. - // 2. the CACHE_DEBUG / CACHE_RELEASE flags only effectively work with MSVC compiler; - // for GNU compilers, the function always treats the build as release mode (enabled by default). - enum - { - CACHE_NONE = 0, // do not cache OpenCL binary - CACHE_DEBUG = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC) - CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC) - CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary - }; + // This feature is enabled by default. CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./"); //! set where binary cache to be saved to CV_EXPORTS void setBinaryPath(const char *path); + struct ProgramSource + { + const char* name; + const char* programStr; + const char* programHash; + + // Cache in memory by name (should be unique). Caching on disk disabled. + inline ProgramSource(const char* _name, const char* _programStr) + : name(_name), programStr(_programStr), programHash(NULL) + { + } + + // Cache in memory by name (should be unique). Caching on disk uses programHash mark. + inline ProgramSource(const char* _name, const char* _programStr, const char* _programHash) + : name(_name), programStr(_programStr), programHash(_programHash) + { + } + }; + + //! Calls OpenCL kernel. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. + //! Deprecated, will be replaced + CV_EXPORTS void openCLExecuteKernelInterop(Context *clCxt, + const cv::ocl::ProgramSource& source, String kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, + int channels, int depth, const char *build_options); + class CV_EXPORTS oclMatExpr; //////////////////////////////// oclMat //////////////////////////////// class CV_EXPORTS oclMat @@ -311,9 +325,9 @@ namespace cv //! allocates new oclMatrix with specified device memory type. void createEx(int rows, int cols, int type, - DevMemRW rw_type, DevMemType mem_type, void* hptr = 0); + DevMemRW rw_type, DevMemType mem_type); void createEx(Size size, int type, DevMemRW rw_type, - DevMemType mem_type, void* hptr = 0); + DevMemType mem_type); //! decreases reference counter; // deallocate the data when reference counter reaches 0. @@ -457,6 +471,14 @@ namespace cv // supports all data types CV_EXPORTS void divide(double scale, const oclMat &src1, oclMat &dst); + //! computes element-wise minimum of the two arrays (dst = min(src1, src2)) + // supports all data types + CV_EXPORTS void min(const oclMat &src1, const oclMat &src2, oclMat &dst); + + //! computes element-wise maximum of the two arrays (dst = max(src1, src2)) + // supports all data types + CV_EXPORTS void max(const oclMat &src1, const oclMat &src2, oclMat &dst); + //! compares elements of two arrays (dst = src1 src2) // supports all data types CV_EXPORTS void compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop); @@ -465,6 +487,10 @@ namespace cv // supports all data types CV_EXPORTS void transpose(const oclMat &src, oclMat &dst); + //! computes element-wise absolute values of an array (dst = abs(src)) + // supports all data types + CV_EXPORTS void abs(const oclMat &src, oclMat &dst); + //! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2)) // supports all data types CV_EXPORTS void absdiff(const oclMat &src1, const oclMat &src2, oclMat &dst); @@ -1812,7 +1838,7 @@ namespace cv // output - // keys = {1, 2, 3} (CV_8UC1) // values = {6,2, 10,5, 4,3} (CV_8UC2) - void CV_EXPORTS sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false); + CV_EXPORTS void sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false); /*!Base class for MOG and MOG2!*/ class CV_EXPORTS BackgroundSubtractor { @@ -2011,6 +2037,7 @@ namespace cv private: oclMat samples_ocl; }; + /*!*************** SVM *************!*/ class CV_EXPORTS CvSVM_OCL : public CvSVM { @@ -2030,6 +2057,7 @@ namespace cv void create_kernel(); void create_solver(); }; + /*!*************** END *************!*/ } } diff --git a/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp b/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp new file mode 100644 index 0000000000..beb3d27525 --- /dev/null +++ b/modules/ocl/include/opencv2/ocl/private/opencl_dumpinfo.hpp @@ -0,0 +1,135 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined(DUMP_INFO_STDOUT) && !defined(DUMP_INFO_XML) +#error Invalid usage +#endif + +#if !defined(DUMP_INFO_STDOUT) +#define DUMP_INFO_STDOUT(...) +#endif + +#if !defined(DUMP_INFO_XML) +#define DUMP_INFO_XML(...) +#endif + +#include + +static std::string bytesToStringRepr(size_t value) +{ + size_t b = value % 1024; + value /= 1024; + + size_t kb = value % 1024; + value /= 1024; + + size_t mb = value % 1024; + value /= 1024; + + size_t gb = value; + + std::ostringstream stream; + + if (gb > 0) + stream << gb << " GB "; + if (mb > 0) + stream << mb << " MB "; + if (kb > 0) + stream << kb << " kB "; + if (b > 0) + stream << b << " B"; + + return stream.str(); +} + +static void dumpOpenCLDevice() +{ + using namespace cv::ocl; + try + { + const cv::ocl::DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo(); + + const char* deviceTypeStr = deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU + ? "CPU" : + (deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown"); + DUMP_INFO_STDOUT("Device type", deviceTypeStr); + DUMP_INFO_XML("cv_ocl_deviceType", deviceTypeStr); + + DUMP_INFO_STDOUT("Platform name", deviceInfo.platform->platformName); + DUMP_INFO_XML("cv_ocl_platformName", deviceInfo.platform->platformName); + + DUMP_INFO_STDOUT("Device name", deviceInfo.deviceName); + DUMP_INFO_XML("cv_ocl_deviceName", deviceInfo.deviceName); + + DUMP_INFO_STDOUT("Device version", deviceInfo.deviceVersion); + DUMP_INFO_XML("cv_ocl_deviceVersion", deviceInfo.deviceVersion); + + DUMP_INFO_STDOUT("Compute units", deviceInfo.maxComputeUnits); + DUMP_INFO_XML("cv_ocl_maxComputeUnits", deviceInfo.maxComputeUnits); + + DUMP_INFO_STDOUT("Max work group size", deviceInfo.maxWorkGroupSize); + DUMP_INFO_XML("cv_ocl_maxWorkGroupSize", deviceInfo.maxWorkGroupSize); + + std::string localMemorySizeStr = bytesToStringRepr(deviceInfo.localMemorySize); + DUMP_INFO_STDOUT("Local memory size", localMemorySizeStr.c_str()); + DUMP_INFO_XML("cv_ocl_localMemorySize", deviceInfo.localMemorySize); + + std::string maxMemAllocSizeStr = bytesToStringRepr(deviceInfo.maxMemAllocSize); + DUMP_INFO_STDOUT("Max memory allocation size", maxMemAllocSizeStr.c_str()); + DUMP_INFO_XML("cv_ocl_maxMemAllocSize", deviceInfo.maxMemAllocSize); + + const char* doubleSupportStr = deviceInfo.haveDoubleSupport ? "Yes" : "No"; + DUMP_INFO_STDOUT("Double support", doubleSupportStr); + DUMP_INFO_XML("cv_ocl_haveDoubleSupport", deviceInfo.haveDoubleSupport); + + const char* isUnifiedMemoryStr = deviceInfo.isUnifiedMemory ? "Yes" : "No"; + DUMP_INFO_STDOUT("Unified memory", isUnifiedMemoryStr); + DUMP_INFO_XML("cv_ocl_isUnifiedMemory", deviceInfo.isUnifiedMemory); + } + catch (...) + { + DUMP_INFO_STDOUT("OpenCL device", "not available"); + DUMP_INFO_XML("cv_ocl", "not available"); + } +} + +#undef DUMP_INFO_STDOUT +#undef DUMP_INFO_XML diff --git a/modules/ocl/include/opencv2/ocl/private/opencl_utils.hpp b/modules/ocl/include/opencv2/ocl/private/opencl_utils.hpp new file mode 100644 index 0000000000..70c45d3dde --- /dev/null +++ b/modules/ocl/include/opencv2/ocl/private/opencl_utils.hpp @@ -0,0 +1,115 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_OCL_PRIVATE_OPENCL_UTILS_HPP__ +#define __OPENCV_OCL_PRIVATE_OPENCL_UTILS_HPP__ + +#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" +#include +#include + +namespace cl_utils { + +inline cl_int getPlatforms(std::vector& platforms) +{ + cl_uint n = 0; + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) + return err; + + platforms.clear(); platforms.resize(n); + err = ::clGetPlatformIDs(n, &platforms[0], NULL); + if (err != CL_SUCCESS) + return err; + + return CL_SUCCESS; +} + +inline cl_int getDevices(cl_platform_id platform, cl_device_type type, std::vector& devices) +{ + cl_uint n = 0; + + cl_int err = ::clGetDeviceIDs(platform, type, 0, NULL, &n); + if (err != CL_SUCCESS) + return err; + + devices.clear(); devices.resize(n); + err = ::clGetDeviceIDs(platform, type, n, &devices[0], NULL); + if (err != CL_SUCCESS) + return err; + + return CL_SUCCESS; +} + + + + +template +inline cl_int getScalarInfo(Functor f, ObjectType obj, cl_uint name, T& param) +{ + return f(obj, name, sizeof(T), ¶m, NULL); +} + +template +inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param) +{ + ::size_t required; + cl_int err = f(obj, name, 0, NULL, &required); + if (err != CL_SUCCESS) + return err; + + param.clear(); + if (required > 0) + { + std::vector buf(required + 1, char(0)); + err = f(obj, name, required, &buf[0], NULL); + if (err != CL_SUCCESS) + return err; + param = &buf[0]; + } + + return CL_SUCCESS; +}; + +} // namespace cl_utils + +#endif // __OPENCV_OCL_PRIVATE_OPENCL_UTILS_HPP__ diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp index 611e75564c..670b03c2ef 100644 --- a/modules/ocl/include/opencv2/ocl/private/util.hpp +++ b/modules/ocl/include/opencv2/ocl/private/util.hpp @@ -77,6 +77,8 @@ inline cl_command_queue getClCommandQueue(const Context *ctx) return *(cl_command_queue*)(ctx->getOpenCLCommandQueuePtr()); } +CV_EXPORTS cv::Mutex& getInitializationMutex(); + enum openCLMemcpyKind { clMemcpyHostToDevice = 0, @@ -84,39 +86,39 @@ enum openCLMemcpyKind clMemcpyDeviceToDevice }; ///////////////////////////OpenCL call wrappers//////////////////////////// -void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, +CV_EXPORTS void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, size_t widthInBytes, size_t height); -void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, +CV_EXPORTS void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type); -void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, +CV_EXPORTS void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, openCLMemcpyKind kind, int channels = -1); -void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, +CV_EXPORTS void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, const void *src, size_t spitch, size_t width, size_t height, int src_offset); -void CV_EXPORTS openCLFree(void *devPtr); -cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size); -void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size); -cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, +CV_EXPORTS void openCLFree(void *devPtr); +CV_EXPORTS cl_mem openCLCreateBuffer(Context *clCxt, size_t flag, size_t size); +CV_EXPORTS void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size); +CV_EXPORTS cl_kernel openCLGetKernelFromSource(const Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName); -cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, +CV_EXPORTS cl_kernel openCLGetKernelFromSource(const Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, const char *build_options); -void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads); -void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const cv::ocl::ProgramEntry* source, String kernelName, std::vector< std::pair > &args, +CV_EXPORTS void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads); +CV_EXPORTS void openCLExecuteKernel(Context *clCxt , const cv::ocl::ProgramEntry* source, String kernelName, std::vector< std::pair > &args, int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1); -void CV_EXPORTS openCLExecuteKernel_(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, +CV_EXPORTS void openCLExecuteKernel_(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3], size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, const char *build_options); -void CV_EXPORTS openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3], +CV_EXPORTS void openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3], size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth); -void CV_EXPORTS openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3], +CV_EXPORTS void openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3], size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, const char *build_options); -cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value, +CV_EXPORTS cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, const size_t size); -cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr); +CV_EXPORTS cl_mem openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr); enum FLUSH_MODE { @@ -125,9 +127,9 @@ enum FLUSH_MODE DISABLE }; -void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3], +CV_EXPORTS void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3], size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); -void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3], +CV_EXPORTS void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3], size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, const char *build_options, FLUSH_MODE finish_mode = DISABLE); @@ -135,8 +137,8 @@ void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry // note: // 1. there is no memory management. User need to explicitly release the resource // 2. for faster clamping, there is no buffer padding for the constructed texture -cl_mem CV_EXPORTS bindTexture(const oclMat &mat); -void CV_EXPORTS releaseTexture(cl_mem& texture); +CV_EXPORTS cl_mem bindTexture(const oclMat &mat); +CV_EXPORTS void releaseTexture(cl_mem& texture); //Represents an image texture object class CV_EXPORTS TextureCL @@ -163,15 +165,11 @@ private: // bind oclMat to OpenCL image textures and retunrs an TextureCL object // note: // for faster clamping, there is no buffer padding for the constructed texture -Ptr CV_EXPORTS bindTexturePtr(const oclMat &mat); - -// returns whether the current context supports image2d_t format or not -bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext()); - -bool CV_EXPORTS isCpuDevice(); +CV_EXPORTS Ptr bindTexturePtr(const oclMat &mat); -size_t CV_EXPORTS queryWaveFrontSize(cl_kernel kernel); +CV_EXPORTS bool isCpuDevice(); +CV_EXPORTS size_t queryWaveFrontSize(cl_kernel kernel); inline size_t divUp(size_t total, size_t grain) @@ -189,24 +187,6 @@ inline size_t roundUp(size_t sz, size_t n) return result; } -//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. -CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt, - const cv::ocl::ProgramEntry* source, String kernelName, - size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, - int channels, int depth, const char *build_options, - bool finish = true, bool measureKernelTime = false, - bool cleanUp = true); - -//! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. -CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt, - const cv::ocl::ProgramEntry* source, const int numFiles, String kernelName, - size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, - int channels, int depth, const char *build_options, - bool finish = true, bool measureKernelTime = false, - bool cleanUp = true); - }//namespace ocl }//namespace cv diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index 8328694d19..78ebc1a268 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -13,7 +13,7 @@ // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. - +// // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -42,7 +42,20 @@ #include "perf_precomp.hpp" -const char * impls[] = +#define DUMP_INFO_STDOUT(propertyDisplayName, propertyValue) \ + do { \ + std::cout << (propertyDisplayName) << ": " << (propertyValue) << std::endl; \ + } while (false) + +#define DUMP_INFO_XML(propertyXMLName, propertyValue) \ + do { \ + std::stringstream ss; ss << propertyValue; \ + ::testing::Test::RecordProperty((propertyXMLName), ss.str()); \ + } while (false) + +#include "opencv2/ocl/private/opencl_dumpinfo.hpp" + +static const char * impls[] = { IMPL_OCL, IMPL_PLAIN, @@ -51,59 +64,10 @@ const char * impls[] = #endif }; -using namespace cv::ocl; int main(int argc, char ** argv) { - const char * keys = - "{ h help | false | print help message }" - "{ t type | gpu | set device type:cpu or gpu}" - "{ p platform | -1 | set platform id }" - "{ d device | 0 | set device id }"; - - if (getenv("OPENCV_OPENCL_DEVICE") == NULL) // TODO Remove this after buildbot updates - { - CommandLineParser cmd(argc, argv, keys); - if (cmd.has("help")) - { - cout << "Available options besides google test option:" << endl; - cmd.printMessage(); - return 0; - } - - string type = cmd.get("type"); - int pid = cmd.get("platform"); - int device = cmd.get("device"); - - int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU : - cv::ocl::CVCL_DEVICE_TYPE_GPU; - - cv::ocl::PlatformsInfo platformsInfo; - cv::ocl::getOpenCLPlatforms(platformsInfo); - if (pid >= (int)platformsInfo.size()) - { - std::cout << "platform is invalid\n"; - return 1; - } - - cv::ocl::DevicesInfo devicesInfo; - int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); - if (device < 0 || device >= devnums) - { - std::cout << "device/platform invalid\n"; - return 1; - } - - cv::ocl::setDevice(devicesInfo[device]); - } - - const DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo(); - - cout << "Device type: " << (deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU ? - "CPU" : - (deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown")) << endl - << "Platform name: " << deviceInfo.platform->platformName << endl - << "Device name: " << deviceInfo.deviceName << endl; + ::perf::TestBase::setPerformanceStrategy(::perf::PERF_STRATEGY_SIMPLE); - CV_PERF_TEST_MAIN_INTERNALS(ocl, impls) + CV_PERF_TEST_MAIN_INTERNALS(ocl, impls, dumpOpenCLDevice()) } diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp index d718ed5519..12dcde950f 100644 --- a/modules/ocl/perf/perf_arithm.cpp +++ b/modules/ocl/perf/perf_arithm.cpp @@ -877,3 +877,108 @@ PERF_TEST_P(AddWeightedFixture, AddWeighted, else OCL_PERF_ELSE } + +///////////// Min //////////////////////// + +typedef Size_MatType MinFixture; + +PERF_TEST_P(MinFixture, Min, + ::testing::Combine(OCL_TYPICAL_MAT_SIZES, + OCL_PERF_ENUM(CV_8UC1, CV_32FC1))) +{ + const Size_MatType_t params = GetParam(); + const Size srcSize = get<0>(params); + const int type = get<1>(params); + + Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type); + declare.in(src1, src2, WARMUP_RNG).out(dst); + + if (RUN_OCL_IMPL) + { + ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type); + + OCL_TEST_CYCLE() cv::ocl::min(oclSrc1, oclSrc2, oclDst); + + oclDst.download(dst); + + SANITY_CHECK(dst); + } + else if (RUN_PLAIN_IMPL) + { + TEST_CYCLE() dst = cv::min(src1, src2); + + SANITY_CHECK(dst); + } + else + OCL_PERF_ELSE +} + +///////////// Max //////////////////////// + +typedef Size_MatType MaxFixture; + +PERF_TEST_P(MaxFixture, Max, + ::testing::Combine(OCL_TYPICAL_MAT_SIZES, + OCL_PERF_ENUM(CV_8UC1, CV_32FC1))) +{ + const Size_MatType_t params = GetParam(); + const Size srcSize = get<0>(params); + const int type = get<1>(params); + + Mat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type); + declare.in(src1, src2, WARMUP_RNG).out(dst); + + if (RUN_OCL_IMPL) + { + ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type); + + OCL_TEST_CYCLE() cv::ocl::max(oclSrc1, oclSrc2, oclDst); + + oclDst.download(dst); + + SANITY_CHECK(dst); + } + else if (RUN_PLAIN_IMPL) + { + TEST_CYCLE() dst = cv::max(src1, src2); + + SANITY_CHECK(dst); + } + else + OCL_PERF_ELSE +} + +///////////// Max //////////////////////// + +typedef Size_MatType AbsFixture; + +PERF_TEST_P(AbsFixture, Abs, + ::testing::Combine(OCL_TYPICAL_MAT_SIZES, + OCL_PERF_ENUM(CV_8UC1, CV_32FC1))) +{ + const Size_MatType_t params = GetParam(); + const Size srcSize = get<0>(params); + const int type = get<1>(params); + + Mat src(srcSize, type), dst(srcSize, type); + declare.in(src, WARMUP_RNG).out(dst); + + if (RUN_OCL_IMPL) + { + ocl::oclMat oclSrc(src), oclDst(srcSize, type); + + OCL_TEST_CYCLE() cv::ocl::abs(oclSrc, oclDst); + + oclDst.download(dst); + + SANITY_CHECK(dst); + } + else if (RUN_PLAIN_IMPL) + { + TEST_CYCLE() dst = cv::abs(src); + + SANITY_CHECK(dst); + } + else + OCL_PERF_ELSE +} diff --git a/modules/ocl/perf/perf_kalman.cpp b/modules/ocl/perf/perf_kalman.cpp index b5f713be95..017a8a70dc 100644 --- a/modules/ocl/perf/perf_kalman.cpp +++ b/modules/ocl/perf/perf_kalman.cpp @@ -43,20 +43,25 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ + #include "perf_precomp.hpp" + +#ifdef HAVE_CLAMDBLAS + using namespace perf; using namespace std; using namespace cv::ocl; using namespace cv; using std::tr1::tuple; using std::tr1::get; + ///////////// Kalman Filter //////////////////////// typedef tuple KalmanFilterType; typedef TestBaseWithParam KalmanFilterFixture; PERF_TEST_P(KalmanFilterFixture, KalmanFilter, - ::testing::Values(1000, 1500)) + ::testing::Values(1000, 1500)) { KalmanFilterType params = GetParam(); const int dim = get<0>(params); @@ -66,7 +71,7 @@ PERF_TEST_P(KalmanFilterFixture, KalmanFilter, cv::Mat statePre_; - if(RUN_PLAIN_IMPL) + if (RUN_PLAIN_IMPL) { cv::KalmanFilter kalman; TEST_CYCLE() @@ -76,7 +81,8 @@ PERF_TEST_P(KalmanFilterFixture, KalmanFilter, kalman.predict(); } statePre_ = kalman.statePre; - }else if(RUN_OCL_IMPL) + } + else if(RUN_OCL_IMPL) { cv::ocl::oclMat dsample(sample); cv::ocl::KalmanFilter kalman_ocl; @@ -87,7 +93,11 @@ PERF_TEST_P(KalmanFilterFixture, KalmanFilter, kalman_ocl.predict(); } kalman_ocl.statePre.download(statePre_); - }else + } + else OCL_PERF_ELSE + SANITY_CHECK(statePre_); -} \ No newline at end of file +} + +#endif // HAVE_CLAMDBLAS diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 4b26231dcc..cff0875743 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -56,11 +56,28 @@ using namespace cv; using namespace cv::ocl; +static std::vector scalarToVector(const cv::Scalar & sc, int depth, int ocn, int cn) +{ + CV_Assert(ocn == cn || (ocn == 4 && cn == 3)); + + static const int sizeMap[] = { sizeof(uchar), sizeof(char), sizeof(ushort), + sizeof(short), sizeof(int), sizeof(float), sizeof(double) }; + + int elemSize1 = sizeMap[depth]; + int bufSize = elemSize1 * ocn; + std::vector _buf(bufSize); + uchar * buf = &_buf[0]; + scalarToRawData(sc, buf, CV_MAKE_TYPE(depth, cn)); + memset(buf + elemSize1 * cn, 0, (ocn - cn) * elemSize1); + + return _buf; +} + ////////////////////////////////////////////////////////////////////////////// -/////////////////////// add subtract multiply divide ///////////////////////// +/////////////// add subtract multiply divide min max ///////////////////////// ////////////////////////////////////////////////////////////////////////////// -enum { ADD = 0, SUB, MUL, DIV, ABS_DIFF }; +enum { ADD = 0, SUB, MUL, DIV, ABS, ABS_DIFF, MIN, MAX }; static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const Scalar & scalar, const oclMat & mask, oclMat &dst, int op_type, bool use_scalar = false) @@ -69,13 +86,13 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE); if (!hasDouble && (src1.depth() == CV_64F || src2.depth() == CV_64F || dst.depth() == CV_64F)) { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double\r\n"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } CV_Assert(src2.empty() || (!src2.empty() && src1.type() == src2.type() && src1.size() == src2.size())); CV_Assert(mask.empty() || (!mask.empty() && mask.type() == CV_8UC1 && mask.size() == src1.size())); - CV_Assert(op_type >= ADD && op_type <= ABS_DIFF); + CV_Assert(op_type >= ADD && op_type <= MAX); dst.create(src1.size(), src1.type()); @@ -84,7 +101,7 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const int src2step1 = src2.step / src2.elemSize(), src2offset1 = src2.offset / src2.elemSize(); int maskstep1 = mask.step, maskoffset1 = mask.offset / mask.elemSize(); int dststep1 = dst.step / dst.elemSize(), dstoffset1 = dst.offset / dst.elemSize(); - oclMat m; + std::vector m; size_t localThreads[3] = { 16, 16, 1 }; size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; @@ -93,7 +110,7 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; const char * const WTypeMap[] = { "short", "short", "int", "int", "int", "float", "double" }; - const char * const funcMap[] = { "FUNC_ADD", "FUNC_SUB", "FUNC_MUL", "FUNC_DIV", "FUNC_ABS_DIFF" }; + const char * const funcMap[] = { "FUNC_ADD", "FUNC_SUB", "FUNC_MUL", "FUNC_DIV", "FUNC_ABS", "FUNC_ABS_DIFF", "FUNC_MIN", "FUNC_MAX" }; const char * const channelMap[] = { "", "", "2", "4", "4" }; bool haveScalar = use_scalar || src2.empty(); @@ -132,10 +149,9 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const if (haveScalar) { const int WDepthMap[] = { CV_16S, CV_16S, CV_32S, CV_32S, CV_32S, CV_32F, CV_64F }; - m.create(1, 1, CV_MAKE_TYPE(WDepthMap[WDepth], oclChannels)); - m.setTo(scalar); + m = scalarToVector(scalar, WDepthMap[WDepth], oclChannels, src1.channels()); - args.push_back( std::make_pair( sizeof(cl_mem), (void *)&m.data )); + args.push_back( std::make_pair( m.size(), (void *)&m[0])); kernelName += "_scalar"; } @@ -205,10 +221,26 @@ void cv::ocl::divide(double scalar, const oclMat &src, oclMat &dst) arithmetic_run_generic(src, oclMat(), Scalar::all(scalar), oclMat(), dst, DIV); } +void cv::ocl::min(const oclMat &src1, const oclMat &src2, oclMat &dst) +{ + arithmetic_run_generic(src1, src2, Scalar::all(0), oclMat(), dst, MIN); +} + +void cv::ocl::max(const oclMat &src1, const oclMat &src2, oclMat &dst) +{ + arithmetic_run_generic(src1, src2, Scalar::all(0), oclMat(), dst, MAX); +} + ////////////////////////////////////////////////////////////////////////////// -///////////////////////////////// Absdiff //////////////////////////////////// +/////////////////////////////Abs, Absdiff //////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// +void cv::ocl::abs(const oclMat &src, oclMat &dst) +{ + // explicitly uses use_scalar (even if zero) so that the correct kernel is used + arithmetic_run_generic(src, oclMat(), Scalar(), oclMat(), dst, ABS, true); +} + void cv::ocl::absdiff(const oclMat &src1, const oclMat &src2, oclMat &dst) { arithmetic_run_generic(src1, src2, Scalar(), oclMat(), dst, ABS_DIFF); @@ -226,9 +258,7 @@ void cv::ocl::absdiff(const oclMat &src1, const Scalar &src2, oclMat &dst) static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpOp, String kernelName, const cv::ocl::ProgramEntry* source) { - CV_Assert(src1.type() == src2.type()); dst.create(src1.size(), CV_8UC1); - Context *clCxt = src1.clCxt; int depth = src1.depth(); size_t localThreads[3] = { 64, 4, 1 }; @@ -255,7 +285,7 @@ static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.cols )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows )); - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, + openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } @@ -263,11 +293,11 @@ void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int { if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F) { - std::cout << "Selected device do not support double" << std::endl; + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } - CV_Assert(src1.channels() == 1 && src2.channels() == 1); + CV_Assert(src1.type() == src2.type() && src1.channels() == 1); CV_Assert(cmpOp >= CMP_EQ && cmpOp <= CMP_NE); compare_run(src1, src2, dst, cmpOp, "arithm_compare", &arithm_compare); @@ -347,7 +377,8 @@ Scalar cv::ocl::sum(const oclMat &src) { if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return Scalar::all(0); } static sumFunc functab[3] = { @@ -356,11 +387,7 @@ Scalar cv::ocl::sum(const oclMat &src) arithmetic_sum }; - bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE); int ddepth = std::max(src.depth(), CV_32S); - if (!hasDouble && ddepth == CV_64F) - ddepth = CV_32F; - sumFunc func = functab[ddepth - CV_32S]; return func(src, SUM, ddepth); } @@ -369,8 +396,10 @@ Scalar cv::ocl::absSum(const oclMat &src) { if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return cv::Scalar::all(0); } + static sumFunc functab[3] = { arithmetic_sum, @@ -378,11 +407,7 @@ Scalar cv::ocl::absSum(const oclMat &src) arithmetic_sum }; - bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE); int ddepth = std::max(src.depth(), CV_32S); - if (!hasDouble && ddepth == CV_64F) - ddepth = CV_32F; - sumFunc func = functab[ddepth - CV_32S]; return func(src, ABS_SUM, ddepth); } @@ -391,18 +416,17 @@ Scalar cv::ocl::sqrSum(const oclMat &src) { if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return cv::Scalar::all(0); } static sumFunc functab[3] = { arithmetic_sum, - arithmetic_sum, + arithmetic_sum, arithmetic_sum }; - bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE); - int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F); - + int ddepth = std::max(src.depth(), CV_32S); sumFunc func = functab[ddepth - CV_32S]; return func(src, SQR_SUM, ddepth); } @@ -413,6 +437,12 @@ Scalar cv::ocl::sqrSum(const oclMat &src) void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev) { + if (src.depth() == CV_64F && !src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) + { + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return; + } + double total = 1.0 / src.size().area(); mean = sum(src); @@ -445,8 +475,9 @@ static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem std::ostringstream stream; stream << "-D T=" << typeMap[src.depth()] << channelMap[src.channels()]; stream << " -D MAX_VAL=" << (WT)std::numeric_limits::max(); - stream << " -D MIN_VAL=" << (WT)std::numeric_limits::min(); - String buildOptions = stream.str(); + stream << " -D MIN_VAL=" << (std::numeric_limits::is_integer ? + (WT)std::numeric_limits::min() : -(WT)(std::numeric_limits::max())); + std::string buildOptions = stream.str(); std::vector > args; args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); @@ -522,7 +553,8 @@ void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oc if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return; } static minMaxFunc functab[] = @@ -553,13 +585,22 @@ double cv::ocl::norm(const oclMat &src1, int normType) return norm(src1, oclMat(), normType); } -static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & src2, oclMat & diff) +static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & src2, oclMat & diff, int ntype) { - CV_Assert(src1.step % src1.elemSize() == 0 && (src2.empty() || src2.step % src2.elemSize() == 0)); Context *clCxt = src1.clCxt; + if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F) + { + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return; + } + CV_Assert(src1.step % src1.elemSize() == 0 && (src2.empty() || src2.step % src2.elemSize() == 0)); + + int ddepth = std::max(src1.depth(), CV_32S); + if (ntype == NORM_L2) + ddepth = std::max(CV_32F, ddepth); - int ddepth = CV_64F; diff.create(src1.size(), CV_MAKE_TYPE(ddepth, src1.channels())); + CV_Assert(diff.step % diff.elemSize() == 0); int oclChannels = src1.oclchannels(), sdepth = src1.depth(); int src1step1 = src1.step / src1.elemSize(), src1offset1 = src1.offset / src1.elemSize(); @@ -606,13 +647,12 @@ static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & s double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) { - CV_Assert(!src1.empty()); - CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size())); - if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F) { - CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return -1; } + CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size())); bool isRelative = (normType & NORM_RELATIVE) != 0; normType &= NORM_TYPE_MASK; @@ -622,7 +662,8 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) int cn = src1.channels(); double r = 0; oclMat diff; - arithm_absdiff_nonsaturate_run(src1, src2, diff); + + arithm_absdiff_nonsaturate_run(src1, src2, diff, normType); switch (normType) { @@ -654,17 +695,6 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, String kernelName) { - if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F) - { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double\r\n"); - return; - } - - CV_Assert(src.cols == dst.cols && src.rows == dst.rows); - - CV_Assert(src.type() == dst.type()); - - Context *clCxt = src.clCxt; int channels = dst.oclchannels(); int depth = dst.depth(); @@ -696,21 +726,11 @@ static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, String kern args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 )); - openCLExecuteKernel(clCxt, &arithm_flip, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src.clCxt, &arithm_flip, kernelName, globalThreads, localThreads, args, -1, depth); } static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, String kernelName, bool isVertical) { - if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F) - { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double\r\n"); - return; - } - - CV_Assert(src.cols == dst.cols && src.rows == dst.rows); - CV_Assert(src.type() == dst.type()); - - Context *clCxt = src.clCxt; int channels = dst.oclchannels(); int depth = dst.depth(); @@ -749,16 +769,21 @@ static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, String kern const cv::ocl::ProgramEntry* source = isVertical ? &arithm_flip_rc : &arithm_flip; - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth); + openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth); } void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode) { + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) + { + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return; + } + dst.create(src.size(), src.type()); + if (flipCode == 0) - { arithmetic_flip_rows_run(src, dst, "arithm_flip_rows"); - } else if (flipCode > 0) arithmetic_flip_cols_run(src, dst, "arithm_flip_cols", false); else @@ -771,7 +796,6 @@ void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode) static void arithmetic_lut_run(const oclMat &src, const oclMat &lut, oclMat &dst, String kernelName) { - Context *clCxt = src.clCxt; int sdepth = src.depth(); int src_step1 = src.step1(), dst_step1 = dst.step1(); int src_offset1 = src.offset / src.elemSize1(), dst_offset1 = dst.offset / dst.elemSize1(); @@ -796,19 +820,26 @@ static void arithmetic_lut_run(const oclMat &src, const oclMat &lut, oclMat &dst args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step1 )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 )); - openCLExecuteKernel(clCxt, &arithm_LUT, kernelName, globalSize, localSize, + openCLExecuteKernel(src.clCxt, &arithm_LUT, kernelName, globalSize, localSize, args, lut.oclchannels(), -1, buildOptions.c_str()); } void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst) { + if (!lut.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && lut.depth() == CV_64F) + { + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return; + } + int cn = src.channels(), depth = src.depth(); + CV_Assert(depth == CV_8U || depth == CV_8S); CV_Assert(lut.channels() == 1 || lut.channels() == src.channels()); CV_Assert(lut.rows == 1 && lut.cols == 256); + dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn)); - String kernelName = "LUT"; - arithmetic_lut_run(src, lut, dst, kernelName); + arithmetic_lut_run(src, lut, dst, "LUT"); } ////////////////////////////////////////////////////////////////////////////// @@ -820,7 +851,7 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, String kernel Context *clCxt = src.clCxt; if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double\r\n"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } @@ -868,13 +899,6 @@ void cv::ocl::log(const oclMat &src, oclMat &dst) static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String kernelName) { - if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F) - { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double\r\n"); - return; - } - - Context *clCxt = src1.clCxt; int channels = dst.oclchannels(); int depth = dst.depth(); @@ -898,11 +922,17 @@ static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols )); - openCLExecuteKernel(clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src1.clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, depth); } void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst) { + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F) + { + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return; + } + CV_Assert(src1.type() == src2.type() && src1.size() == src2.size() && (src1.depth() == CV_32F || src1.depth() == CV_64F)); @@ -912,13 +942,6 @@ void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst) static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source) { - if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F) - { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double\r\n"); - return; - } - - Context *clCxt = src1.clCxt; int depth = dst.depth(), cols1 = src1.cols * src1.oclchannels(); int src1step1 = src1.step / src1.elemSize1(), src1offset1 = src1.offset / src1.elemSize1(); int src2step1 = src2.step / src2.elemSize1(), src2offset1 = src2.offset / src2.elemSize1(); @@ -940,11 +963,17 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols1 )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows )); - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); } void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees) { + if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.depth() == CV_64F) + { + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return; + } + CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F)); CV_Assert(x.step % x.elemSize() == 0 && y.step % y.elemSize() == 0); @@ -959,13 +988,6 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleI static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart, String kernelName, bool angleInDegrees) { - if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F) - { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double\r\n"); - return; - } - - Context *clCxt = src1.clCxt; int channels = src1.oclchannels(); int depth = src1.depth(); @@ -992,11 +1014,17 @@ static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, o args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&tmp )); - openCLExecuteKernel(clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src1.clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args, -1, depth); } void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat &angle, bool angleInDegrees) { + if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.depth() == CV_64F) + { + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return; + } + CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F)); mag.create(x.size(), x.type()); @@ -1012,13 +1040,6 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees, String kernelName) { - if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F) - { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double\r\n"); - return; - } - - Context *clCxt = src2.clCxt; int channels = src2.oclchannels(); int depth = src2.depth(); @@ -1049,21 +1070,25 @@ static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &d args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&tmp )); - openCLExecuteKernel(clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src1.clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads, args, -1, depth); } void cv::ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees) { + if (!magnitude.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && magnitude.depth() == CV_64F) + { + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return; + } + CV_Assert(angle.depth() == CV_32F || angle.depth() == CV_64F); + CV_Assert(magnitude.size() == angle.size() && magnitude.type() == angle.type()); x.create(angle.size(), angle.type()); y.create(angle.size(), angle.type()); if ( magnitude.data ) - { - CV_Assert( magnitude.size() == angle.size() && magnitude.type() == angle.type() ); arithmetic_ptc_run(magnitude, angle, x, y, angleInDegrees, "arithm_polarToCart_mag"); - } else arithmetic_ptc_run(magnitude, angle, x, y, angleInDegrees, "arithm_polarToCart"); } @@ -1195,7 +1220,7 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, { if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } @@ -1253,7 +1278,8 @@ int cv::ocl::countNonZero(const oclMat &src) Context *clCxt = src.clCxt; if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - CV_Error(Error::GpuNotSupported, "selected device doesn't support double"); + CV_Error(Error::OpenCLDoubleNotSupported, "selected device doesn't support double"); + return -1; } size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits; @@ -1286,8 +1312,6 @@ static void bitwise_unary_run(const oclMat &src1, oclMat &dst, String kernelName { dst.create(src1.size(), src1.type()); - - Context *clCxt = src1.clCxt; int channels = dst.oclchannels(); int depth = dst.depth(); @@ -1316,7 +1340,7 @@ static void bitwise_unary_run(const oclMat &src1, oclMat &dst, String kernelName args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 )); - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); } enum { AND = 0, OR, XOR }; @@ -1324,29 +1348,25 @@ enum { AND = 0, OR, XOR }; static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Scalar& src3, const oclMat &mask, oclMat &dst, int operationType) { - Context *clCxt = src1.clCxt; - if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F) - { - std::cout << "Selected device does not support double" << std::endl; - return; - } - CV_Assert(operationType >= AND && operationType <= XOR); CV_Assert(src2.empty() || (!src2.empty() && src1.type() == src2.type() && src1.size() == src2.size())); CV_Assert(mask.empty() || (!mask.empty() && mask.type() == CV_8UC1 && mask.size() == src1.size())); dst.create(src1.size(), src1.type()); - - int elemSize = dst.elemSize(); - int cols1 = dst.cols * elemSize; oclMat m; const char operationMap[] = { '&', '|', '^' }; std::string kernelName("arithm_bitwise_binary"); - std::string buildOptions = format("-D Operation=%c", operationMap[operationType]); + + int vlen = std::min(8, src1.elemSize1() * src1.oclchannels()); + std::string vlenstr = vlen > 1 ? format("%d", vlen) : ""; + std::string buildOptions = format("-D Operation=%c -D vloadn=vload%s -D vstoren=vstore%s -D elemSize=%d -D vlen=%d" + " -D ucharv=uchar%s", + operationMap[operationType], vlenstr.c_str(), vlenstr.c_str(), + (int)src1.elemSize(), vlen, vlenstr.c_str()); size_t localThreads[3] = { 16, 16, 1 }; - size_t globalThreads[3] = { cols1, dst.rows, 1 }; + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; std::vector > args; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data )); @@ -1359,7 +1379,6 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca m.setTo(src3); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&m.data )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemSize ) ); kernelName += "_scalar"; } @@ -1376,9 +1395,6 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.step )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.offset )); - if (!src2.empty()) - args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemSize )); - kernelName += "_mask"; } @@ -1386,10 +1402,10 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset )); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols1 )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.cols )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows )); - openCLExecuteKernel(clCxt, mask.empty() ? (!src2.empty() ? &arithm_bitwise_binary : &arithm_bitwise_binary_scalar) : + openCLExecuteKernel(src1.clCxt, mask.empty() ? (!src2.empty() ? &arithm_bitwise_binary : &arithm_bitwise_binary_scalar) : (!src2.empty() ? &arithm_bitwise_binary_mask : &arithm_bitwise_binary_scalar_mask), kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); @@ -1397,15 +1413,14 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst) { - if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - std::cout << "Selected device does not support double" << std::endl; + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } dst.create(src.size(), src.type()); - String kernelName = "arithm_bitwise_not"; - bitwise_unary_run(src, dst, kernelName, &arithm_bitwise_not); + bitwise_unary_run(src, dst, "arithm_bitwise_not", &arithm_bitwise_not); } void cv::ocl::bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask) @@ -1525,13 +1540,6 @@ oclMatExpr::operator oclMat() const static void transpose_run(const oclMat &src, oclMat &dst, String kernelName, bool inplace = false) { - Context *clCxt = src.clCxt; - if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) - { - CV_Error(Error::GpuNotSupported, "Selected device doesn't support double\r\n"); - return; - } - const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; const char channelsString[] = { ' ', ' ', '2', '4', '4' }; std::string buildOptions = format("-D T=%s%c", typeMap[src.depth()], @@ -1553,13 +1561,17 @@ static void transpose_run(const oclMat &src, oclMat &dst, String kernelName, boo args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcoffset1 )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstoffset1 )); - openCLExecuteKernel(clCxt, &arithm_transpose, kernelName, globalThreads, localThreads, + openCLExecuteKernel(src.clCxt, &arithm_transpose, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } void cv::ocl::transpose(const oclMat &src, oclMat &dst) { - CV_Assert(src.depth() <= CV_64F && src.channels() <= 4); + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) + { + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return; + } if ( src.data == dst.data && src.cols == src.rows && dst.offset == src.offset && dst.size() == src.size()) @@ -1581,7 +1593,7 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE); if (!hasDouble && src1.depth() == CV_64F) { - CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } @@ -1645,10 +1657,6 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source) { - CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows); - CV_Assert(src1.type() == dst.type()); - - Context *clCxt = src1.clCxt; int channels = dst.oclchannels(); int depth = dst.depth(); @@ -1678,22 +1686,21 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, String else args.push_back( std::make_pair( sizeof(cl_double), (void *)&p )); - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); } void cv::ocl::pow(const oclMat &x, double p, oclMat &y) { - if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.type() == CV_64F) + if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.depth() == CV_64F) { - std::cout << "Selected device do not support double" << std::endl; + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } CV_Assert(x.depth() == CV_32F || x.depth() == CV_64F); y.create(x.size(), x.type()); - String kernelName = "arithm_pow"; - arithmetic_pow_run(x, p, y, kernelName, &arithm_pow); + arithmetic_pow_run(x, p, y, "arithm_pow", &arithm_pow); } ////////////////////////////////////////////////////////////////////////////// @@ -1702,10 +1709,9 @@ void cv::ocl::pow(const oclMat &x, double p, oclMat &y) void cv::ocl::setIdentity(oclMat& src, const Scalar & scalar) { - Context *clCxt = Context::getContext(); - if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } @@ -1729,6 +1735,6 @@ void cv::ocl::setIdentity(oclMat& src, const Scalar & scalar) oclMat sc(1, 1, src.type(), scalar); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&sc.data )); - openCLExecuteKernel(clCxt, &arithm_setidentity, "setIdentity", global_threads, local_threads, + openCLExecuteKernel(src.clCxt, &arithm_setidentity, "setIdentity", global_threads, local_threads, args, -1, -1, buildOptions.c_str()); } diff --git a/modules/ocl/src/cl_context.cpp b/modules/ocl/src/cl_context.cpp index 01785eaa26..72ffce485f 100644 --- a/modules/ocl/src/cl_context.cpp +++ b/modules/ocl/src/cl_context.cpp @@ -50,24 +50,26 @@ #include #include "cl_programcache.hpp" -// workaround for OpenCL C++ bindings -#if defined(HAVE_OPENCL12) -#include "opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp" -#elif defined(HAVE_OPENCL11) -#include "opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp" -#else -#error Invalid OpenCL configuration -#endif - -#if defined _MSC_VER && _MSC_VER >= 1200 -#pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610) -#endif -#undef __CL_ENABLE_EXCEPTIONS -#include +#include "opencv2/ocl/private/opencl_utils.hpp" namespace cv { namespace ocl { +struct __Module +{ + __Module(); + ~__Module(); + cv::Mutex initializationMutex; + cv::Mutex currentContextMutex; +}; +static __Module __module; + +cv::Mutex& getInitializationMutex() +{ + return __module.initializationMutex; +} + + struct PlatformInfoImpl { cl_platform_id platform_id; @@ -325,21 +327,22 @@ not_found: return false; } -static cv::Mutex __initializedMutex; static bool __initialized = false; static int initializeOpenCLDevices() { + using namespace cl_utils; + assert(!__initialized); __initialized = true; assert(global_devices.size() == 0); - std::vector platforms; + std::vector platforms; try { - openCLSafeCall(cl::Platform::get(&platforms)); + openCLSafeCall(getPlatforms(platforms)); } - catch (cv::Exception& e) + catch (cv::Exception&) { return 0; // OpenCL not found } @@ -351,20 +354,20 @@ static int initializeOpenCLDevices() PlatformInfoImpl& platformInfo = global_platforms[i]; platformInfo.info._id = i; - cl::Platform& platform = platforms[i]; + cl_platform_id platform = platforms[i]; - platformInfo.platform_id = platform(); - openCLSafeCall(platform.getInfo(CL_PLATFORM_PROFILE, &platformInfo.info.platformProfile)); - openCLSafeCall(platform.getInfo(CL_PLATFORM_VERSION, &platformInfo.info.platformVersion)); - openCLSafeCall(platform.getInfo(CL_PLATFORM_NAME, &platformInfo.info.platformName)); - openCLSafeCall(platform.getInfo(CL_PLATFORM_VENDOR, &platformInfo.info.platformVendor)); - openCLSafeCall(platform.getInfo(CL_PLATFORM_EXTENSIONS, &platformInfo.info.platformExtensons)); + platformInfo.platform_id = platform; + openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE, platformInfo.info.platformProfile)); + openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_VERSION, platformInfo.info.platformVersion)); + openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_NAME, platformInfo.info.platformName)); + openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR, platformInfo.info.platformVendor)); + openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_EXTENSIONS, platformInfo.info.platformExtensons)); parseOpenCLVersion(platformInfo.info.platformVersion, platformInfo.info.platformVersionMajor, platformInfo.info.platformVersionMinor); - std::vector devices; - cl_int status = platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); + std::vector devices; + cl_int status = getDevices(platform, CL_DEVICE_TYPE_ALL, devices); if(status != CL_DEVICE_NOT_FOUND) openCLVerifyCall(status); @@ -377,60 +380,63 @@ static int initializeOpenCLDevices() for(size_t j = 0; j < devices.size(); ++j) { - cl::Device& device = devices[j]; + cl_device_id device = devices[j]; DeviceInfoImpl& deviceInfo = global_devices[baseIndx + j]; deviceInfo.info._id = baseIndx + j; - deviceInfo.platform_id = platform(); - deviceInfo.device_id = device(); + deviceInfo.platform_id = platform; + deviceInfo.device_id = device; deviceInfo.info.platform = &platformInfo.info; platformInfo.deviceIDs[j] = deviceInfo.info._id; cl_device_type type = cl_device_type(-1); - openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type)); + openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_TYPE, type)); deviceInfo.info.deviceType = DeviceType(type); - openCLSafeCall(device.getInfo(CL_DEVICE_PROFILE, &deviceInfo.info.deviceProfile)); - openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion)); - openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName)); - openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor)); + openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_PROFILE, deviceInfo.info.deviceProfile)); + openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_VERSION, deviceInfo.info.deviceVersion)); + openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_NAME, deviceInfo.info.deviceName)); + openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_VENDOR, deviceInfo.info.deviceVendor)); cl_uint vendorID = 0; - openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID)); + openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_VENDOR_ID, vendorID)); deviceInfo.info.deviceVendorId = vendorID; - openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion)); - openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions)); + openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DRIVER_VERSION, deviceInfo.info.deviceDriverVersion)); + openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, deviceInfo.info.deviceExtensions)); parseOpenCLVersion(deviceInfo.info.deviceVersion, deviceInfo.info.deviceVersionMajor, deviceInfo.info.deviceVersionMinor); size_t maxWorkGroupSize = 0; - openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &maxWorkGroupSize)); + openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_WORK_GROUP_SIZE, maxWorkGroupSize)); deviceInfo.info.maxWorkGroupSize = maxWorkGroupSize; cl_uint maxDimensions = 0; - openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &maxDimensions)); + openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, maxDimensions)); std::vector maxWorkItemSizes(maxDimensions); - openCLSafeCall(clGetDeviceInfo(device(), CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions, + openCLSafeCall(clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions, (void *)&maxWorkItemSizes[0], 0)); deviceInfo.info.maxWorkItemSizes = maxWorkItemSizes; cl_uint maxComputeUnits = 0; - openCLSafeCall(device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &maxComputeUnits)); + openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_COMPUTE_UNITS, maxComputeUnits)); deviceInfo.info.maxComputeUnits = maxComputeUnits; cl_ulong localMemorySize = 0; - openCLSafeCall(device.getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &localMemorySize)); + openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_LOCAL_MEM_SIZE, localMemorySize)); deviceInfo.info.localMemorySize = (size_t)localMemorySize; + cl_ulong maxMemAllocSize = 0; + openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, maxMemAllocSize)); + deviceInfo.info.maxMemAllocSize = (size_t)maxMemAllocSize; cl_bool unifiedMemory = false; - openCLSafeCall(device.getInfo(CL_DEVICE_HOST_UNIFIED_MEMORY, &unifiedMemory)); + openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_HOST_UNIFIED_MEMORY, unifiedMemory)); deviceInfo.info.isUnifiedMemory = unifiedMemory != 0; //initialize extra options for compilation. Currently only fp64 is included. //Assume 4KB is enough to store all possible extensions. - openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions)); + openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, deviceInfo.info.deviceExtensions)); size_t fp64_khr = deviceInfo.info.deviceExtensions.find("cl_khr_fp64"); if(fp64_khr != std::string::npos) @@ -463,7 +469,7 @@ static int initializeOpenCLDevices() DeviceInfo::DeviceInfo() : _id(-1), deviceType(DeviceType(0)), deviceVendorId(-1), - maxWorkGroupSize(0), maxComputeUnits(0), localMemorySize(0), + maxWorkGroupSize(0), maxComputeUnits(0), localMemorySize(0), maxMemAllocSize(0), deviceVersionMajor(0), deviceVersionMinor(0), haveDoubleSupport(false), isUnifiedMemory(false), platform(NULL) @@ -501,9 +507,12 @@ public: bool supportsFeature(FEATURE_TYPE featureType) const; static void cleanupContext(void); + +private: + ContextImpl(const ContextImpl&); // disabled + ContextImpl& operator=(const ContextImpl&); // disabled }; -static cv::Mutex currentContextMutex; static ContextImpl* currentContext = NULL; Context* Context::getContext() @@ -512,19 +521,19 @@ Context* Context::getContext() { if (!__initialized || !__deviceSelected) { - cv::AutoLock lock(__initializedMutex); + cv::AutoLock lock(getInitializationMutex()); if (!__initialized) { if (initializeOpenCLDevices() == 0) { - CV_Error(CV_GpuNotSupported, "OpenCL not available"); + CV_Error(Error::OpenCLInitError, "OpenCL not available"); } } if (!__deviceSelected) { if (!selectOpenCLDevice()) { - CV_Error(CV_GpuNotSupported, "Can't select OpenCL device"); + CV_Error(Error::OpenCLInitError, "Can't select OpenCL device"); } } } @@ -608,7 +617,7 @@ void ContextImpl::cleanupContext(void) fft_teardown(); clBlasTeardown(); - cv::AutoLock lock(currentContextMutex); + cv::AutoLock lock(__module.currentContextMutex); if (currentContext) delete currentContext; currentContext = NULL; @@ -619,7 +628,7 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo) CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size()); { - cv::AutoLock lock(currentContextMutex); + cv::AutoLock lock(__module.currentContextMutex); if (currentContext) { if (currentContext->deviceInfo._id == deviceInfo->_id) @@ -644,7 +653,7 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo) ContextImpl* old = NULL; { - cv::AutoLock lock(currentContextMutex); + cv::AutoLock lock(__module.currentContextMutex); old = currentContext; currentContext = ctx; } @@ -728,13 +737,19 @@ bool supportsFeature(FEATURE_TYPE featureType) return Context::getContext()->supportsFeature(featureType); } -struct __Module +__Module::__Module() { - __Module() { /* moved to Context::getContext(): initializeOpenCLDevices(); */ } - ~__Module() { ContextImpl::cleanupContext(); } -}; -static __Module __module; + /* moved to Context::getContext(): initializeOpenCLDevices(); */ +} +__Module::~__Module() +{ +#if defined(WIN32) && defined(CVAPI_EXPORTS) + // nothing, see DllMain +#else + ContextImpl::cleanupContext(); +#endif +} } // namespace ocl } // namespace cv @@ -749,6 +764,7 @@ BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved) { if (lpReserved != NULL) // called after ExitProcess() call cv::ocl::__termination = true; + cv::ocl::ContextImpl::cleanupContext(); } return TRUE; } diff --git a/modules/ocl/src/cl_operations.cpp b/modules/ocl/src/cl_operations.cpp index 01bc2c1a9a..5f04561e94 100644 --- a/modules/ocl/src/cl_operations.cpp +++ b/modules/ocl/src/cl_operations.cpp @@ -212,13 +212,35 @@ void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThrea static double total_execute_time = 0; static double total_kernel_time = 0; #endif + +static std::string removeDuplicatedWhiteSpaces(const char * buildOptions) +{ + if (buildOptions == NULL) + return ""; + + size_t length = strlen(buildOptions), didx = 0, sidx = 0; + while (sidx < length && buildOptions[sidx] == 0) + ++sidx; + + std::string opt; + opt.resize(length); + + for ( ; sidx < length; ++sidx) + if (buildOptions[sidx] != ' ') + opt[didx++] = buildOptions[sidx]; + else if ( !(didx > 0 && opt[didx - 1] == ' ') ) + opt[didx++] = buildOptions[sidx]; + + return opt; +} + void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3], size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, const char *build_options) { //construct kernel name //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number - //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) + //for example split_C2_D3, represent the split kernel with channels = 2 and dataType Depth = 3(Data type is short) std::stringstream idxStr; if(channels != -1) idxStr << "_C" << channels; @@ -227,7 +249,8 @@ void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, Str kernelName = kernelName + idxStr.str(); cl_kernel kernel; - kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options); + std::string fixedOptions = removeDuplicatedWhiteSpaces(build_options); + kernel = openCLGetKernelFromSource(ctx, source, kernelName, fixedOptions.c_str()); if ( localThreads != NULL) { @@ -302,28 +325,27 @@ void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, Stri total_kernel_time = 0; cout << "-------------------------------------" << endl; - cout << setiosflags(ios::left) << setw(15) << "excute time"; - cout << setiosflags(ios::left) << setw(15) << "lauch time"; + cout << setiosflags(ios::left) << setw(15) << "execute time"; + cout << setiosflags(ios::left) << setw(15) << "launch time"; cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl; int i = 0; for(i = 0; i < RUN_TIMES; i++) openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth, build_options); - cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl; + cout << "average kernel execute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl; cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl; #endif } -double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName, +void openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramSource& source, String kernelName, size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, int channels, int depth, const char *build_options, - bool finish, bool measureKernelTime, bool cleanUp) + std::vector< std::pair > &args, int channels, int depth, const char *build_options) { //construct kernel name //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number - //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) + //for example split_C2_D2, represent the split kernel with channels = 2 and dataType Depth = 2 (Data type is char) std::stringstream idxStr; if(channels != -1) idxStr << "_C" << channels; @@ -331,63 +353,27 @@ double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* sou idxStr << "_D" << depth; kernelName = kernelName + idxStr.str(); - cl_kernel kernel; - kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options); - - double kernelTime = 0.0; + std::string name = std::string("custom_") + source.name; + ProgramEntry program = { name.c_str(), source.programStr, source.programHash }; + cl_kernel kernel = openCLGetKernelFromSource(ctx, &program, kernelName, build_options); - if( globalThreads != NULL) + CV_Assert(globalThreads != NULL); + if ( localThreads != NULL) { - if ( localThreads != NULL) - { - globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0]; - globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1]; - globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2]; - - //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2]; - cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads); - } - for(size_t i = 0; i < args.size(); i ++) - openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); - - if(measureKernelTime == false) - { - openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, NULL)); - } - else - { - cl_event event = NULL; - openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, &event)); - - cl_ulong end_time, queue_time; - - openCLSafeCall(clWaitForEvents(1, &event)); - - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, - sizeof(cl_ulong), &end_time, 0)); - - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, - sizeof(cl_ulong), &queue_time, 0)); - - kernelTime = (double)(end_time - queue_time) / (1000 * 1000); - - clReleaseEvent(event); - } - } + globalThreads[0] = roundUp(globalThreads[0], localThreads[0]); + globalThreads[1] = roundUp(globalThreads[1], localThreads[1]); + globalThreads[2] = roundUp(globalThreads[2], localThreads[2]); - if(finish) - { - clFinish(getClCommandQueue(ctx)); + cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads); } + for(size_t i = 0; i < args.size(); i ++) + openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); - if(cleanUp) - { - openCLSafeCall(clReleaseKernel(kernel)); - } + openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, + localThreads, 0, NULL, NULL)); - return kernelTime; + clFinish(getClCommandQueue(ctx)); + openCLSafeCall(clReleaseKernel(kernel)); } cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, diff --git a/modules/ocl/src/cl_programcache.cpp b/modules/ocl/src/cl_programcache.cpp index 99a6db1415..ba279b794d 100644 --- a/modules/ocl/src/cl_programcache.cpp +++ b/modules/ocl/src/cl_programcache.cpp @@ -50,24 +50,8 @@ #include #include "cl_programcache.hpp" -// workaround for OpenCL C++ bindings -#if defined(HAVE_OPENCL12) -#include "opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp" -#elif defined(HAVE_OPENCL11) -#include "opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp" -#else -#error Invalid OpenCL configuration -#endif - -#if defined _MSC_VER && _MSC_VER >= 1200 -# pragma warning( disable: 4100 4244 4267 4510 4512 4610) -#endif -#undef __CL_ENABLE_EXCEPTIONS -#include - namespace cv { namespace ocl { -#define MAX_PROG_CACHE_SIZE 1024 /* * The binary caching system to eliminate redundant program source compilation. * Strictly, this is not a cache because we do not implement evictions right now. @@ -126,17 +110,12 @@ void ProgramCache::releaseProgram() cacheSize = 0; } -static int enable_disk_cache = true || -#ifdef _DEBUG - false; -#else - true; -#endif +static bool enable_disk_cache = true; static String binpath = ""; void setBinaryDiskCache(int mode, String path) { - enable_disk_cache = 0; + enable_disk_cache = false; binpath = ""; if(mode == CACHE_NONE) @@ -144,7 +123,7 @@ void setBinaryDiskCache(int mode, String path) return; } enable_disk_cache = -#ifdef _DEBUG +#if defined(_DEBUG) || defined(DEBUG) (mode & CACHE_DEBUG) == CACHE_DEBUG; #else (mode & CACHE_RELEASE) == CACHE_RELEASE; @@ -291,7 +270,7 @@ struct ProgramFileCache bool writeConfigurationToFile(const String& options, std::vector& buf) { if (hash_ == NULL) - return true; // don't save dynamic kernels + return true; // don't save programs without hash if (!f.is_open()) { @@ -469,26 +448,30 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn { std::stringstream src_sign; - src_sign << (int64)(source->programStr); - src_sign << getClContext(ctx); - if (NULL != build_options) + if (source->name) { - src_sign << "_" << build_options; - } + src_sign << source->name; + src_sign << getClContext(ctx); + if (NULL != build_options) + { + src_sign << "_" << build_options; + } - { - cv::AutoLock lockCache(mutexCache); - cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str()); - if (!!program) { - clRetainProgram(program); - return program; + cv::AutoLock lockCache(mutexCache); + cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str()); + if (!!program) + { + clRetainProgram(program); + return program; + } } } cv::AutoLock lockCache(mutexFiles); // second check + if (source->name) { cv::AutoLock lockCache(mutexCache); cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str()); @@ -514,15 +497,11 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn cl_program program = programFileCache.getOrBuildProgram(ctx, source, all_build_options); //Cache the binary for future use if build_options is null - if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE) + if (source->name) { cv::AutoLock lockCache(mutexCache); this->addProgram(src_sign.str(), program); } - else - { - std::cout << "Warning: code cache has been full.\n"; - } return program; } diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp index db668d4213..9b91802d85 100644 --- a/modules/ocl/src/color.cpp +++ b/modules/ocl/src/color.cpp @@ -60,111 +60,144 @@ using namespace cv::ocl; namespace { + void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx) { - std::vector > args; int channels = src.oclchannels(); - char build_options[50]; - sprintf(build_options, "-D DEPTH_%d", src.depth()); - //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); + int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); + + String build_options = format("-D DEPTH_%d", src.depth()); + + std::vector > args; args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&channels)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2Gray", gt, lt, args, -1, -1, build_options); + openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2Gray", gt, lt, args, -1, -1, build_options.c_str()); } + void Gray2RGB_caller(const oclMat &src, oclMat &dst) { + String build_options = format("-D DEPTH_%d", src.depth()); + int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); + int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); + std::vector > args; - char build_options[50]; - sprintf(build_options, "-D DEPTH_%d", src.depth()); - //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "Gray2RGB", gt, lt, args, -1, -1, build_options); + openCLExecuteKernel(src.clCxt, &cvt_color, "Gray2RGB", gt, lt, args, -1, -1, build_options.c_str()); } + void RGB2YUV_caller(const oclMat &src, oclMat &dst, int bidx) { - std::vector > args; int channels = src.oclchannels(); - char build_options[50]; - sprintf(build_options, "-D DEPTH_%d", src.depth()); - //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + String build_options = format("-D DEPTH_%d", src.depth()); + int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); + int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); + + std::vector > args; args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&channels)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YUV", gt, lt, args, -1, -1, build_options); + openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YUV", gt, lt, args, -1, -1, build_options.c_str()); } + void YUV2RGB_caller(const oclMat &src, oclMat &dst, int bidx) { - std::vector > args; int channels = src.oclchannels(); - char build_options[50]; - sprintf(build_options, "-D DEPTH_%d", src.depth()); - //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); + int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); + + String buildOptions = format("-D DEPTH_%d", src.depth()); + + std::vector > args; args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&channels)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGB", gt, lt, args, -1, -1, build_options); + openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGB", gt, lt, args, -1, -1, buildOptions.c_str()); } + void YUV2RGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx) { + String build_options = format("-D DEPTH_%d", src.depth()); + int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); + int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); + std::vector > args; - char build_options[50]; - sprintf(build_options, "-D DEPTH_%d", src.depth()); - //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); + size_t gt[3] = {dst.cols / 2, dst.rows / 2, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGBA_NV12", gt, lt, args, -1, -1, build_options); + openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGBA_NV12", gt, lt, args, -1, -1, build_options.c_str()); } + void RGB2YCrCb_caller(const oclMat &src, oclMat &dst, int bidx) { - std::vector > args; int channels = src.oclchannels(); - char build_options[50]; - sprintf(build_options, "-D DEPTH_%d", src.depth()); - //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); + String build_options = format("-D DEPTH_%d", src.depth()); + int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); + int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); + + std::vector > args; args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step)); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&channels)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); + size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1}; - openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YCrCb", gt, lt, args, -1, -1, build_options); + openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YCrCb", gt, lt, args, -1, -1, build_options.c_str()); } + void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn) { Size sz = src.size(); diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp index 695b5b8a03..0fbcecc5c3 100644 --- a/modules/ocl/src/fft.cpp +++ b/modules/ocl/src/fft.cpp @@ -50,7 +50,7 @@ using namespace cv::ocl; #if !defined HAVE_CLAMDFFT void cv::ocl::dft(const oclMat&, oclMat&, Size, int) { - CV_Error(Error::StsNotImplemented, "OpenCL DFT is not implemented"); + CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented"); } namespace cv { namespace ocl { void fft_teardown(); @@ -90,8 +90,7 @@ namespace cv protected: PlanCache(); ~PlanCache(); - friend class std::auto_ptr; - static std::auto_ptr planCache; + static PlanCache* planCache; bool started; std::vector planStore; @@ -102,9 +101,9 @@ namespace cv static PlanCache* getPlanCache() { - if( NULL == planCache.get()) - planCache.reset(new PlanCache()); - return planCache.get(); + if (NULL == planCache) + planCache = new PlanCache(); + return planCache; } // return a baked plan-> // if there is one matched plan, return it @@ -118,7 +117,7 @@ namespace cv }; } } -std::auto_ptr PlanCache::planCache; +PlanCache* PlanCache::planCache = NULL; void cv::ocl::fft_setup() { @@ -134,17 +133,22 @@ void cv::ocl::fft_setup() void cv::ocl::fft_teardown() { PlanCache& pCache = *PlanCache::getPlanCache(); + if(!pCache.started) - { return; - } - delete pCache.setupData; + for(size_t i = 0; i < pCache.planStore.size(); i ++) - { delete pCache.planStore[i]; - } pCache.planStore.clear(); - openCLSafeCall( clAmdFftTeardown( ) ); + + try + { + openCLSafeCall( clAmdFftTeardown( ) ); + } + catch (const std::bad_alloc &) + { } + + delete pCache.setupData; pCache.setupData = NULL; pCache.started = false; } diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index 8f238921da..4247cd886d 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -619,7 +619,7 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, const oclMat &mat_kernel sprintf(btype, "BORDER_REFLECT"); break; case 3: - CV_Error(CV_StsUnsupportedFormat, "BORDER_WRAP is not supported!"); + CV_Error(Error::StsUnsupportedFormat, "BORDER_WRAP is not supported!"); return; case 4: sprintf(btype, "BORDER_REFLECT_101"); @@ -1418,7 +1418,7 @@ void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, d { if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F) { - CV_Error(Error::GpuNotSupported, "Selected device don't support double\r\n"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } @@ -1557,7 +1557,7 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize sprintf(btype, "BORDER_REFLECT_101"); break; default: - CV_Error(CV_StsBadArg, "This border type is not supported"); + CV_Error(Error::StsBadArg, "This border type is not supported"); break; } diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp index 2fceb6a0c3..504b697a76 100644 --- a/modules/ocl/src/gemm.cpp +++ b/modules/ocl/src/gemm.cpp @@ -58,12 +58,12 @@ void clBlasTeardown(); void cv::ocl::gemm(const oclMat&, const oclMat&, double, const oclMat&, double, oclMat&, int) { - CV_Error(Error::StsNotImplemented, "OpenCL BLAS is not implemented"); + CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented"); } void cv::ocl::clBlasSetup() { - CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented"); + CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented"); } void cv::ocl::clBlasTeardown() @@ -76,13 +76,12 @@ void cv::ocl::clBlasTeardown() using namespace cv; static bool clBlasInitialized = false; -static Mutex cs; void cv::ocl::clBlasSetup() { if(!clBlasInitialized) { - AutoLock al(cs); + AutoLock lock(getInitializationMutex()); if(!clBlasInitialized) { openCLSafeCall(clAmdBlasSetup()); @@ -93,7 +92,7 @@ void cv::ocl::clBlasSetup() void cv::ocl::clBlasTeardown() { - AutoLock al(cs); + AutoLock lock(getInitializationMutex()); if(clBlasInitialized) { clAmdBlasTeardown(); diff --git a/modules/ocl/src/gftt.cpp b/modules/ocl/src/gftt.cpp index 980f3575b3..28af01dc2f 100644 --- a/modules/ocl/src/gftt.cpp +++ b/modules/ocl/src/gftt.cpp @@ -202,8 +202,6 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0); CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); - CV_DbgAssert(support_image2d()); - ensureSizeIsEnough(image.size(), CV_32F, eig_); if (useHarrisDetector) diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index fca449ae35..15dcbf9b6a 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -624,37 +624,21 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade) cascade->p3 = equRect.width ; for( i = 0; i < _cascade->count; i++ ) { - int j, k, l; + int j, l; for( j = 0; j < stage_classifier[i].count; j++ ) { for( l = 0; l < stage_classifier[i].classifier[j].count; l++ ) { - CvHaarFeature *feature = + const CvHaarFeature *feature = &_cascade->stage_classifier[i].classifier[j].haar_feature[l]; GpuHidHaarTreeNode *hidnode = &stage_classifier[i].classifier[j].node[l]; - CvRect r[3]; - - int nr; - - /* align blocks */ - for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ ) + for( int k = 0; k < CV_HAAR_FEATURE_MAX; k++ ) { - if(!hidnode->p[k][0]) + const CvRect tr = feature->rect[k].r; + if (tr.width == 0) break; - r[k] = feature->rect[k].r; - } - - nr = k; - for( k = 0; k < nr; k++ ) - { - CvRect tr; - double correction_ratio; - tr.x = r[k].x; - tr.width = r[k].width; - tr.y = r[k].y ; - tr.height = r[k].height; - correction_ratio = weight_scale * (!feature->tilted ? 1 : 0.5); + double correction_ratio = weight_scale * (!feature->tilted ? 1 : 0.5); hidnode->p[k][0] = tr.x; hidnode->p[k][1] = tr.y; hidnode->p[k][2] = tr.width; @@ -925,7 +909,6 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector> 2); @@ -144,10 +137,7 @@ namespace cv int src_offset = (src.offset >> 2); int src_step = (src.step >> 2); - String kernelName = "threshold"; - size_t cols = (dst.cols + (dst_offset & 3) + 3) / 4; - //size_t cols = dst.cols; size_t bSizeX = 16, bSizeY = 16; size_t gSizeX = cols % bSizeX == 0 ? cols : (cols + bSizeX - 1) / bSizeX * bSizeX; size_t gSizeY = dst.rows; @@ -166,11 +156,11 @@ namespace cv args.push_back( std::make_pair(sizeof(cl_float), (void *)&thresh_f)); args.push_back( std::make_pair(sizeof(cl_float), (void *)&max_val)); args.push_back( std::make_pair(sizeof(cl_int), (void *)&type)); - openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); + openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args, src.oclchannels(), src.depth()); } - //threshold: support 8UC1 and 32FC1 data type and five threshold type + // threshold: support 8UC1 and 32FC1 data type and five threshold type double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type) { //TODO: These limitations shall be removed later. @@ -185,6 +175,7 @@ namespace cv return thresh; } + //////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////// remap ////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////// @@ -200,27 +191,26 @@ namespace cv dst.create(map1.size(), src.type()); - String kernelName; - if( map1.type() == CV_32FC2 && !map2.data ) + if ( map1.type() == CV_32FC2 && !map2.data ) { - if(interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT) + if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT) kernelName = "remapLNFConstant"; - else if(interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT) + else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT) kernelName = "remapNNFConstant"; } - else if(map1.type() == CV_16SC2 && !map2.data) + else if (map1.type() == CV_16SC2 && !map2.data) { - if(interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT) + if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT) kernelName = "remapLNSConstant"; - else if(interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT) + else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT) kernelName = "remapNNSConstant"; } - else if(map1.type() == CV_32FC1 && map2.type() == CV_32FC1) + else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) { - if(interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT) + if (interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT) kernelName = "remapLNF1Constant"; else if (interpolation == INTER_NEAREST && borderType == BORDER_CONSTANT) kernelName = "remapNNF1Constant"; @@ -229,30 +219,27 @@ namespace cv size_t blkSizeX = 16, blkSizeY = 16; size_t glbSizeX; int cols = dst.cols; - if(src.type() == CV_8UC1) + if (src.type() == CV_8UC1) { cols = (dst.cols + dst.offset % 4 + 3) / 4; glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX; } - else if(src.type() == CV_32FC1 && interpolation == INTER_LINEAR) + else if (src.type() == CV_32FC1 && interpolation == INTER_LINEAR) { cols = (dst.cols + (dst.offset >> 2) % 4 + 3) / 4; glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX; } else - { glbSizeX = dst.cols % blkSizeX == 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX; - } - size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY; size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; std::vector< std::pair > args; - if(map1.channels() == 2) + if (map1.channels() == 2) { args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data)); @@ -272,16 +259,12 @@ namespace cv args.push_back( std::make_pair(sizeof(cl_int), (void *)&cols)); float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; - if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) - { + if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) args.push_back( std::make_pair(sizeof(cl_double4), (void *)&borderValue)); - } else - { args.push_back( std::make_pair(sizeof(cl_float4), (void *)&borderFloat)); - } } - if(map1.channels() == 1) + if (map1.channels() == 1) { args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data)); @@ -300,14 +283,10 @@ namespace cv args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.cols)); args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( std::make_pair(sizeof(cl_int), (void *)&cols)); - if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) - { + if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) args.push_back( std::make_pair(sizeof(cl_double4), (void *)&borderValue)); - } else - { args.push_back( std::make_pair(sizeof(cl_float4), (void *)&borderFloat)); - } } openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); } @@ -327,31 +306,30 @@ namespace cv int srcoffset_in_pixel = src.offset / src.elemSize(); int dstStep_in_pixel = dst.step1() / dst.oclchannels(); int dstoffset_in_pixel = dst.offset / dst.elemSize(); - //printf("%d %d\n",src.step1() , dst.elemSize()); + String kernelName; - if(interpolation == INTER_LINEAR) + if (interpolation == INTER_LINEAR) kernelName = "resizeLN"; - else if(interpolation == INTER_NEAREST) + else if (interpolation == INTER_NEAREST) kernelName = "resizeNN"; //TODO: improve this kernel size_t blkSizeX = 16, blkSizeY = 16; size_t glbSizeX; - if(src.type() == CV_8UC1) + if (src.type() == CV_8UC1) { size_t cols = (dst.cols + dst.offset % 4 + 3) / 4; glbSizeX = cols % blkSizeX == 0 && cols != 0 ? cols : (cols / blkSizeX + 1) * blkSizeX; } else - { glbSizeX = dst.cols % blkSizeX == 0 && dst.cols != 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX; - } + size_t glbSizeY = dst.rows % blkSizeY == 0 && dst.rows != 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY; size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; std::vector< std::pair > args; - if(interpolation == INTER_NEAREST) + if (interpolation == INTER_NEAREST) { args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data)); @@ -363,7 +341,7 @@ namespace cv args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows)); - if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) + if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { args.push_back( std::make_pair(sizeof(cl_double), (void *)&ifx_d)); args.push_back( std::make_pair(sizeof(cl_double), (void *)&ify_d)); @@ -393,7 +371,6 @@ namespace cv openCLExecuteKernel(clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); } - void resize(const oclMat &src, oclMat &dst, Size dsize, double fx, double fy, int interpolation) { @@ -403,17 +380,12 @@ namespace cv CV_Assert( src.size().area() > 0 ); CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) ); - if(!(dsize == Size()) && (fx > 0 && fy > 0)) - { - if(dsize.width != (int)(src.cols * fx) || dsize.height != (int)(src.rows * fy)) - { + if (!(dsize == Size()) && (fx > 0 && fy > 0)) + if (dsize.width != (int)(src.cols * fx) || dsize.height != (int)(src.rows * fy)) CV_Error(Error::StsUnmatchedSizes, "invalid dsize and fx, fy!"); - } - } - if( dsize == Size() ) - { + + if ( dsize == Size() ) dsize = Size(saturate_cast(src.cols * fx), saturate_cast(src.rows * fy)); - } else { fx = (double)dsize.width / src.cols; @@ -422,24 +394,25 @@ namespace cv dst.create(dsize, src.type()); - if( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR ) + if ( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR ) { resize_gpu( src, dst, fx, fy, interpolation); return; } + CV_Error(Error::StsUnsupportedFormat, "Non-supported interpolation method"); } - //////////////////////////////////////////////////////////////////////// // medianFilter + void medianFilter(const oclMat &src, oclMat &dst, int m) { CV_Assert( m % 2 == 1 && m > 1 ); CV_Assert( m <= 5 || src.depth() == CV_8U ); CV_Assert( src.cols <= dst.cols && src.rows <= dst.rows ); - if(src.data == dst.data) + if (src.data == dst.data) { oclMat src1; src.copyTo(src1); @@ -452,8 +425,6 @@ namespace cv int dstOffset = dst.offset / dst.oclchannels() / dst.elemSize1(); Context *clCxt = src.clCxt; - String kernelName = "medianFilter"; - std::vector< std::pair > args; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data)); @@ -468,67 +439,65 @@ namespace cv size_t globalThreads[3] = {(src.cols + 18) / 16 * 16, (src.rows + 15) / 16 * 16, 1}; size_t localThreads[3] = {16, 16, 1}; - if(m == 3) + if (m == 3) { String kernelName = "medianFilter3"; openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); } - else if(m == 5) + else if (m == 5) { String kernelName = "medianFilter5"; openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); } else - CV_Error(Error::StsUnsupportedFormat, "Non-supported filter length"); + CV_Error(Error::StsBadArg, "Non-supported filter length"); } //////////////////////////////////////////////////////////////////////// // copyMakeBorder + void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int bordertype, const Scalar &scalar) { CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0); - if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi + if ((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi { - if(((bordertype & cv::BORDER_ISOLATED) == 0) && + if (((bordertype & cv::BORDER_ISOLATED) == 0) && (bordertype != cv::BORDER_CONSTANT) && (bordertype != cv::BORDER_REPLICATE)) { - CV_Error(Error::StsBadArg, "unsupported border type"); + CV_Error(Error::StsBadArg, "Unsupported border type"); } } + bordertype &= ~cv::BORDER_ISOLATED; - if((bordertype == cv::BORDER_REFLECT) || (bordertype == cv::BORDER_WRAP)) + if (bordertype == cv::BORDER_REFLECT || bordertype == cv::BORDER_WRAP) { CV_Assert((src.cols >= left) && (src.cols >= right) && (src.rows >= top) && (src.rows >= bottom)); } - - if(bordertype == cv::BORDER_REFLECT_101) + else if (bordertype == cv::BORDER_REFLECT_101) { CV_Assert((src.cols > left) && (src.cols > right) && (src.rows > top) && (src.rows > bottom)); } dst.create(src.rows + top + bottom, src.cols + left + right, src.type()); - int srcStep = src.step1() / src.oclchannels(); - int dstStep = dst.step1() / dst.oclchannels(); - int srcOffset = src.offset / src.elemSize(); - int dstOffset = dst.offset / dst.elemSize(); + int srcStep = src.step1() / src.oclchannels(), dstStep = dst.step1() / dst.oclchannels(); + int srcOffset = src.offset / src.elemSize(), dstOffset = dst.offset / dst.elemSize(); + int depth = src.depth(), ochannels = src.oclchannels(); + int __bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101}; const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"}; size_t bordertype_index; + for(bordertype_index = 0; bordertype_index < sizeof(__bordertype) / sizeof(int); bordertype_index++) - { - if(__bordertype[bordertype_index] == bordertype) + if (__bordertype[bordertype_index] == bordertype) break; - } - if(bordertype_index == sizeof(__bordertype) / sizeof(int)) - { + + if (bordertype_index == sizeof(__bordertype) / sizeof(int)) CV_Error(Error::StsBadArg, "unsupported border type"); - } + String kernelName = "copymakeborder"; size_t localThreads[3] = {16, 16, 1}; - size_t globalThreads[3] = {(dst.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], - (dst.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1 - }; + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; std::vector< std::pair > args; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data)); @@ -543,169 +512,30 @@ namespace cv args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstOffset)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&top)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&left)); - char compile_option[64]; - union sc - { - cl_uchar4 uval; - cl_char4 cval; - cl_ushort4 usval; - cl_short4 shval; - cl_int4 ival; - cl_float4 fval; - cl_double4 dval; - } val; - switch(dst.depth()) + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const channelMap[] = { "", "", "2", "4", "4" }; + std::string buildOptions = format("-D GENTYPE=%s%s -D %s", + typeMap[depth], channelMap[ochannels], + borderstr[bordertype_index]); + + if (src.type() == CV_8UC1 && (dst.offset & 3) == 0 && (dst.cols & 3) == 0) { - case CV_8U: - val.uval.s[0] = saturate_cast(scalar.val[0]); - val.uval.s[1] = saturate_cast(scalar.val[1]); - val.uval.s[2] = saturate_cast(scalar.val[2]); - val.uval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=uchar -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); - if(((dst.offset & 3) == 0) && ((dst.cols & 3) == 0)) - { - kernelName = "copymakeborder_C1_D0"; - globalThreads[0] = (dst.cols / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; - } - break; - case 4: - sprintf(compile_option, "-D GENTYPE=uchar4 -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); - break; - default: - CV_Error(Error::StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_8S: - val.cval.s[0] = saturate_cast(scalar.val[0]); - val.cval.s[1] = saturate_cast(scalar.val[1]); - val.cval.s[2] = saturate_cast(scalar.val[2]); - val.cval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=char -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=char4 -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_char4) , (void *)&val.cval )); - break; - default: - CV_Error(Error::StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_16U: - val.usval.s[0] = saturate_cast(scalar.val[0]); - val.usval.s[1] = saturate_cast(scalar.val[1]); - val.usval.s[2] = saturate_cast(scalar.val[2]); - val.usval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=ushort -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=ushort4 -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); - break; - default: - CV_Error(Error::StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_16S: - val.shval.s[0] = saturate_cast(scalar.val[0]); - val.shval.s[1] = saturate_cast(scalar.val[1]); - val.shval.s[2] = saturate_cast(scalar.val[2]); - val.shval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=short -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=short4 -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_short4) , (void *)&val.shval )); - break; - default: - CV_Error(Error::StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_32S: - val.ival.s[0] = saturate_cast(scalar.val[0]); - val.ival.s[1] = saturate_cast(scalar.val[1]); - val.ival.s[2] = saturate_cast(scalar.val[2]); - val.ival.s[3] = saturate_cast(scalar.val[3]); - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=int -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); - break; - case 2: - sprintf(compile_option, "-D GENTYPE=int2 -D %s", borderstr[bordertype_index]); - cl_int2 i2val; - i2val.s[0] = val.ival.s[0]; - i2val.s[1] = val.ival.s[1]; - args.push_back( std::make_pair( sizeof(cl_int2) , (void *)&i2val )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=int4 -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_int4) , (void *)&val.ival )); - break; - default: - CV_Error(Error::StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_32F: - val.fval.s[0] = scalar.val[0]; - val.fval.s[1] = scalar.val[1]; - val.fval.s[2] = scalar.val[2]; - val.fval.s[3] = scalar.val[3]; - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=float -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=float4 -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_float4) , (void *)&val.fval )); - break; - default: - CV_Error(Error::StsUnsupportedFormat, "unsupported channels"); - } - break; - case CV_64F: - val.dval.s[0] = scalar.val[0]; - val.dval.s[1] = scalar.val[1]; - val.dval.s[2] = scalar.val[2]; - val.dval.s[3] = scalar.val[3]; - switch(dst.oclchannels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=double -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=double4 -D %s", borderstr[bordertype_index]); - args.push_back( std::make_pair( sizeof(cl_double4) , (void *)&val.dval )); - break; - default: - CV_Error(Error::StsUnsupportedFormat, "unsupported channels"); - } - break; - default: - CV_Error(Error::StsUnsupportedFormat, "unknown depth"); + kernelName = "copymakeborder_C1_D0"; + globalThreads[0] = dst.cols >> 2; } - openCLExecuteKernel(src.clCxt, &imgproc_copymakeboder, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); + int cn = src.channels(), ocn = src.oclchannels(); + int bufSize = src.elemSize1() * ocn; + AutoBuffer _buf(bufSize); + uchar * buf = (uchar *)_buf; + scalarToRawData(scalar, buf, dst.type()); + memset(buf + src.elemSize1() * cn, 0, (ocn - cn) * src.elemSize1()); + + args.push_back( std::make_pair( bufSize , (void *)buf )); + + openCLExecuteKernel(src.clCxt, &imgproc_copymakeboder, kernelName, globalThreads, + localThreads, args, -1, -1, buildOptions.c_str()); } //////////////////////////////////////////////////////////////////////// @@ -741,7 +571,7 @@ namespace cv double *Dd = M; double d = det3(Sd); double result = 0; - if( d != 0) + if ( d != 0) { double t[9]; result = d; @@ -784,32 +614,32 @@ namespace cv String s[3] = {"NN", "Linear", "Cubic"}; String kernelName = "warpAffine" + s[interpolation]; - - if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) + if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { cl_int st; coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st ); openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0)); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, + sizeof(F) * 2 * 3, coeffs, 0, 0, 0)); } else { cl_int st; for(int m = 0; m < 2; m++) for(int n = 0; n < 3; n++) - { float_coeffs[m][n] = coeffs[m][n]; - } - coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st ); - openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0)); + + coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st ); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, + 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0)); } //TODO: improve this kernel size_t blkSizeX = 16, blkSizeY = 16; size_t glbSizeX; size_t cols; - //if(src.type() == CV_8UC1 && interpolation != 2) - if(src.type() == CV_8UC1 && interpolation != 2) + + if (src.type() == CV_8UC1 && interpolation != 2) { cols = (dst.cols + dst.offset % 4 + 3) / 4; glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX; @@ -819,6 +649,7 @@ namespace cv cols = dst.cols; glbSizeX = dst.cols % blkSizeX == 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX; } + size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY; size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; @@ -842,7 +673,6 @@ namespace cv openCLSafeCall(clReleaseMemObject(coeffs_cm)); } - void warpPerspective_gpu(const oclMat &src, oclMat &dst, double coeffs[3][3], int interpolation) { CV_Assert( (src.oclchannels() == dst.oclchannels()) ); @@ -855,12 +685,13 @@ namespace cv String s[3] = {"NN", "Linear", "Cubic"}; String kernelName = "warpPerspective" + s[interpolation]; - if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) + if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { cl_int st; coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st ); openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0)); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, + sizeof(double) * 3 * 3, coeffs, 0, 0, 0)); } else { @@ -871,24 +702,25 @@ namespace cv coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st ); openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0)); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, + sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0)); } + //TODO: improve this kernel size_t blkSizeX = 16, blkSizeY = 16; size_t glbSizeX; size_t cols; - if(src.type() == CV_8UC1 && interpolation == 0) + if (src.type() == CV_8UC1 && interpolation == 0) { cols = (dst.cols + dst.offset % 4 + 3) / 4; glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX; } else - /* - */ { cols = dst.cols; glbSizeX = dst.cols % blkSizeX == 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX; } + size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY; size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; @@ -930,10 +762,8 @@ namespace cv double coeffsM[2*3]; Mat coeffsMat(2, 3, CV_64F, (void *)coeffsM); M.convertTo(coeffsMat, coeffsMat.type()); - if(!warpInd) - { + if (!warpInd) convert_coeffs(coeffsM); - } for(int i = 0; i < 2; ++i) for(int j = 0; j < 3; ++j) @@ -960,10 +790,8 @@ namespace cv double coeffsM[3*3]; Mat coeffsMat(3, 3, CV_64F, (void *)coeffsM); M.convertTo(coeffsMat, coeffsMat.type()); - if(!warpInd) - { + if (!warpInd) invert(coeffsM); - } for(int i = 0; i < 3; ++i) for(int j = 0; j < 3; ++j) @@ -974,12 +802,13 @@ namespace cv //////////////////////////////////////////////////////////////////////// // integral + void integral(const oclMat &src, oclMat &sum, oclMat &sqsum) { CV_Assert(src.type() == CV_8UC1); - if(!src.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - CV_Error(Error::GpuNotSupported, "select device don't support double"); + CV_Error(Error::OpenCLDoubleNotSupported, "Select device doesn't support double"); return; } @@ -1073,6 +902,7 @@ namespace cv } /////////////////////// corner ////////////////////////////// + static void extractCovData(const oclMat &src, oclMat &Dx, oclMat &Dy, int blockSize, int ksize, int borderType) { @@ -1087,9 +917,8 @@ namespace cv scale = 1. / scale; } else - { scale = 1. / scale; - } + if (ksize > 0) { Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType); @@ -1122,10 +951,10 @@ namespace cv sprintf(borderType, "BORDER_REPLICATE"); break; default: - std::cout << "BORDER type is not supported!" << std::endl; + CV_Error(Error::StsBadFlag, "BORDER type is not supported!"); } - char build_options[150]; - sprintf(build_options, "-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s", + + std::string buildOptions = format("-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s", block_size / 2, block_size / 2, block_size, block_size, borderType); size_t blockSizeX = 256, blockSizeY = 1; @@ -1155,7 +984,7 @@ namespace cv args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back( std::make_pair( sizeof(cl_float) , (void *)&k)); - openCLExecuteKernel(dst.clCxt, source, kernelName, gt, lt, args, -1, -1, build_options); + openCLExecuteKernel(dst.clCxt, source, kernelName, gt, lt, args, -1, -1, buildOptions.c_str()); } void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, @@ -1168,12 +997,15 @@ namespace cv void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, double k, int borderType) { - if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - CV_Error(Error::GpuNotSupported, "select device don't support double"); + CV_Error(Error::OpenCLDoubleNotSupported, "Select device doesn't support double"); + return; } + CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2); - CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); + CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE + || borderType == cv::BORDER_REFLECT); extractCovData(src, dx, dy, blockSize, ksize, borderType); dst.create(src.size(), CV_32F); corner_ocl(&imgproc_calcHarris, "calcHarris", blockSize, static_cast(k), dx, dy, dst, borderType); @@ -1187,29 +1019,33 @@ namespace cv void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType) { - if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { - CV_Error(Error::GpuNotSupported, "select device don't support double"); + CV_Error(Error::OpenCLDoubleNotSupported, "select device don't support double"); + return; } + CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2); CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); extractCovData(src, dx, dy, blockSize, ksize, borderType); dst.create(src.size(), CV_32F); + corner_ocl(&imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType); } + /////////////////////////////////// MeanShiftfiltering /////////////////////////////////////////////// + static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps) { CV_Assert( (src.cols == dst.cols) && (src.rows == dst.rows) ); CV_Assert( !(dst.step & 0x3) ); - Context *clCxt = src.clCxt; //Arrange the NDRange int col = src.cols, row = src.rows; int ltx = 16, lty = 8; - if(src.cols % ltx != 0) + if (src.cols % ltx != 0) col = (col / ltx + 1) * ltx; - if(src.rows % lty != 0) + if (src.rows % lty != 0) row = (row / lty + 1) * lty; size_t globalThreads[3] = {col, row, 1}; @@ -1229,31 +1065,31 @@ namespace cv args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sr )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&maxIter )); args.push_back( std::make_pair( sizeof(cl_float) , (void *)&eps )); - openCLExecuteKernel(clCxt, &meanShift, "meanshift_kernel", globalThreads, localThreads, args, -1, -1); + + openCLExecuteKernel(src.clCxt, &meanShift, "meanshift_kernel", globalThreads, localThreads, args, -1, -1); } void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr, TermCriteria criteria) { - if( src.empty() ) - CV_Error(Error::StsBadArg, "The input image is empty" ); + if (src.empty()) + CV_Error(Error::StsBadArg, "The input image is empty"); - if( src.depth() != CV_8U || src.oclchannels() != 4 ) - CV_Error(Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); + if ( src.depth() != CV_8U || src.oclchannels() != 4 ) + CV_Error(Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported"); dst.create( src.size(), CV_8UC4 ); - if( !(criteria.type & TermCriteria::MAX_ITER) ) + if ( !(criteria.type & TermCriteria::MAX_ITER) ) criteria.maxCount = 5; int maxIter = std::min(std::max(criteria.maxCount, 1), 100); float eps; - if( !(criteria.type & TermCriteria::EPS) ) + if ( !(criteria.type & TermCriteria::EPS) ) eps = 1.f; eps = (float)std::max(criteria.epsilon, 0.0); meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps); - } static void meanShiftProc_gpu(const oclMat &src, oclMat dstr, oclMat dstsp, int sp, int sr, int maxIter, float eps) @@ -1262,14 +1098,13 @@ namespace cv CV_Assert( (src.cols == dstr.cols) && (src.rows == dstr.rows) && (src.rows == dstsp.rows) && (src.cols == dstsp.cols)); CV_Assert( !(dstsp.step & 0x3) ); - Context *clCxt = src.clCxt; //Arrange the NDRange int col = src.cols, row = src.rows; int ltx = 16, lty = 8; - if(src.cols % ltx != 0) + if (src.cols % ltx != 0) col = (col / ltx + 1) * ltx; - if(src.rows % lty != 0) + if (src.rows % lty != 0) row = (row / lty + 1) * lty; size_t globalThreads[3] = {col, row, 1}; @@ -1292,27 +1127,34 @@ namespace cv args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sr )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&maxIter )); args.push_back( std::make_pair( sizeof(cl_float) , (void *)&eps )); - openCLExecuteKernel(clCxt, &meanShift, "meanshiftproc_kernel", globalThreads, localThreads, args, -1, -1); + + openCLExecuteKernel(src.clCxt, &meanShift, "meanshiftproc_kernel", globalThreads, localThreads, args, -1, -1); } void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr, TermCriteria criteria) { - if( src.empty() ) - CV_Error(Error::StsBadArg, "The input image is empty" ); + if (src.empty()) + CV_Error(Error::StsBadArg, "The input image is empty"); + + if ( src.depth() != CV_8U || src.oclchannels() != 4 ) + CV_Error(Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported"); - if( src.depth() != CV_8U || src.oclchannels() != 4 ) - CV_Error(Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); +// if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) +// { +// CV_Error(Error::OpenCLDoubleNotSupportedNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n"); +// return; +// } dstr.create( src.size(), CV_8UC4 ); dstsp.create( src.size(), CV_16SC2 ); - if( !(criteria.type & TermCriteria::MAX_ITER) ) + if ( !(criteria.type & TermCriteria::MAX_ITER) ) criteria.maxCount = 5; int maxIter = std::min(std::max(criteria.maxCount, 1), 100); float eps; - if( !(criteria.type & TermCriteria::EPS) ) + if ( !(criteria.type & TermCriteria::EPS) ) eps = 1.f; eps = (float)std::max(criteria.epsilon, 0.0); @@ -1322,6 +1164,7 @@ namespace cv /////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////hist/////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////// + namespace histograms { const int PARTIAL_HISTOGRAM256_COUNT = 256; @@ -1332,11 +1175,8 @@ namespace cv { using namespace histograms; - Context *clCxt = mat_src.clCxt; int depth = mat_src.depth(); - String kernelName = "calc_sub_hist"; - size_t localThreads[3] = { HISTOGRAM256_BIN_COUNT, 1, 1 }; size_t globalThreads[3] = { PARTIAL_HISTOGRAM256_COUNT *localThreads[0], 1, 1}; @@ -1349,7 +1189,7 @@ namespace cv int hist_step = mat_sub_hist.step >> 2; int left_col = 0, right_col = 0; - if(cols >= dataWidth * 2 - 1) + if (cols >= dataWidth * 2 - 1) { left_col = dataWidth - (src_offset & mask); left_col &= mask; @@ -1367,7 +1207,7 @@ namespace cv } std::vector > args; - if(globalThreads[0] != 0) + if (globalThreads[0] != 0) { int tempcols = cols >> dataWidth_bits; int inc_x = globalThreads[0] % tempcols; @@ -1375,6 +1215,7 @@ namespace cv src_offset >>= dataWidth_bits; int src_step = mat_src.step >> dataWidth_bits; int datacount = tempcols * mat_src.rows; + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src.data)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset)); @@ -1384,16 +1225,17 @@ namespace cv args.push_back( std::make_pair( sizeof(cl_int), (void *)&inc_x)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&inc_y)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&hist_step)); - openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, depth); + + openCLExecuteKernel(mat_src.clCxt, &imgproc_histogram, "calc_sub_hist", globalThreads, localThreads, args, -1, depth); } - if(left_col != 0 || right_col != 0) + + if (left_col != 0 || right_col != 0) { - kernelName = "calc_sub_hist_border"; src_offset = mat_src.offset; localThreads[0] = 1; localThreads[1] = 256; globalThreads[0] = left_col + right_col; - globalThreads[1] = (mat_src.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; + globalThreads[1] = mat_src.rows; args.clear(); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src.data)); @@ -1404,25 +1246,27 @@ namespace cv args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src.rows)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&hist_step)); - openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, depth); + + openCLExecuteKernel(mat_src.clCxt, &imgproc_histogram, "calc_sub_hist_border", globalThreads, localThreads, args, -1, depth); } } + static void merge_sub_hist(const oclMat &sub_hist, oclMat &mat_hist) { using namespace histograms; - Context *clCxt = sub_hist.clCxt; - String kernelName = "merge_hist"; - size_t localThreads[3] = { 256, 1, 1 }; size_t globalThreads[3] = { HISTOGRAM256_BIN_COUNT *localThreads[0], 1, 1}; int src_step = sub_hist.step >> 2; + std::vector > args; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&sub_hist.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_hist.data)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step)); - openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, -1); + + openCLExecuteKernel(sub_hist.clCxt, &imgproc_histogram, "merge_hist", globalThreads, localThreads, args, -1, -1); } + void calcHist(const oclMat &mat_src, oclMat &mat_hist) { using namespace histograms; @@ -1435,6 +1279,7 @@ namespace cv calc_sub_hist(mat_src, buf); merge_sub_hist(buf, mat_hist); } + ///////////////////////////////////equalizeHist///////////////////////////////////////////////////// void equalizeHist(const oclMat &mat_src, oclMat &mat_dst) { @@ -1444,17 +1289,17 @@ namespace cv calcHist(mat_src, mat_hist); - Context *clCxt = mat_src.clCxt; - String kernelName = "calLUT"; size_t localThreads[3] = { 256, 1, 1}; size_t globalThreads[3] = { 256, 1, 1}; oclMat lut(1, 256, CV_8UC1); - std::vector > args; int total = mat_src.rows * mat_src.cols; + + std::vector > args; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&lut.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_hist.data)); args.push_back( std::make_pair( sizeof(int), (void *)&total)); - openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, -1); + + openCLExecuteKernel(mat_src.clCxt, &imgproc_histogram, "calLUT", globalThreads, localThreads, args, -1, -1); LUT(mat_src, lut, mat_dst); } @@ -1485,16 +1330,15 @@ namespace cv size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 }; bool is_cpu = isCpuDevice(); if (is_cpu) - openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)" -D CPU"); + openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)"-D CPU"); else { cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &imgproc_clahe, kernelName); - size_t wave_size = queryWaveFrontSize(kernel); + int wave_size = (int)queryWaveFrontSize(kernel); openCLSafeCall(clReleaseKernel(kernel)); - static char opt[20] = {0}; - sprintf(opt, " -D WAVE_SIZE=%d", (int)wave_size); - openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt); + std::string opt = format("-D WAVE_SIZE=%d", wave_size); + openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt.c_str()); } } @@ -1518,11 +1362,10 @@ namespace cv args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesY )); - String kernelName = "transform"; size_t localThreads[3] = { 32, 8, 1 }; size_t globalThreads[3] = { src.cols, src.rows, 1 }; - openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(Context::getContext(), &imgproc_clahe, "transform", globalThreads, localThreads, args, -1, -1); } } @@ -1553,8 +1396,9 @@ namespace cv oclMat srcExt_; oclMat lut_; }; + CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) : - clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY) + clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY) { } @@ -1562,6 +1406,7 @@ namespace cv obj.info()->addParam(obj, "clipLimit", obj.clipLimit_); obj.info()->addParam(obj, "tilesX", obj.tilesX_); obj.info()->addParam(obj, "tilesY", obj.tilesY_)) + void CLAHE_Impl::apply(cv::InputArray src_raw, cv::OutputArray dst_raw) { oclMat& src = getOclMatRef(src_raw); @@ -1601,7 +1446,6 @@ namespace cv } clahe::calcLut(srcForLut, lut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale); - //finish(); clahe::transform(src, dst, lut_, tilesX_, tilesY_, tileSize); } @@ -1639,8 +1483,8 @@ namespace cv } //////////////////////////////////bilateralFilter//////////////////////////////////////////////////// - static void - oclbilateralFilter_8u( const oclMat &src, oclMat &dst, int d, + + static void oclbilateralFilter_8u( const oclMat &src, oclMat &dst, int d, double sigma_color, double sigma_space, int borderType ) { @@ -1651,15 +1495,15 @@ namespace cv src.type() == dst.type() && src.size() == dst.size() && src.data != dst.data ); - if( sigma_color <= 0 ) + if ( sigma_color <= 0 ) sigma_color = 1; - if( sigma_space <= 0 ) + if ( sigma_space <= 0 ) sigma_space = 1; double gauss_color_coeff = -0.5 / (sigma_color * sigma_color); double gauss_space_coeff = -0.5 / (sigma_space * sigma_space); - if( d <= 0 ) + if ( d <= 0 ) radius = cvRound(sigma_space * 1.5); else radius = d / 2; @@ -1678,6 +1522,7 @@ namespace cv int dst_step_in_pixel = dst.step / dst.elemSize(); int dst_offset_in_pixel = dst.offset / dst.elemSize(); int temp_step_in_pixel = temp.step / temp.elemSize(); + // initialize color-related bilateral filter coefficients for( i = 0; i < 256 * cn; i++ ) color_weight[i] = (float)std::exp(i * i * gauss_color_coeff); @@ -1687,26 +1532,26 @@ namespace cv for( j = -radius; j <= radius; j++ ) { double r = std::sqrt((double)i * i + (double)j * j); - if( r > radius ) + if ( r > radius ) continue; space_weight[maxk] = (float)std::exp(r * r * gauss_space_coeff); space_ofs[maxk++] = (int)(i * temp_step_in_pixel + j); } + oclMat oclcolor_weight(1, cn * 256, CV_32FC1, color_weight); oclMat oclspace_weight(1, d * d, CV_32FC1, space_weight); oclMat oclspace_ofs(1, d * d, CV_32SC1, space_ofs); String kernelName = "bilateral"; size_t localThreads[3] = { 16, 16, 1 }; - size_t globalThreads[3] = { (dst.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], - (dst.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], - 1 - }; - if((dst.type() == CV_8UC1) && ((dst.offset & 3) == 0) && ((dst.cols & 3) == 0)) + size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; + + if ((dst.type() == CV_8UC1) && ((dst.offset & 3) == 0) && ((dst.cols & 3) == 0)) { kernelName = "bilateral2"; - globalThreads[0] = (dst.cols / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; + globalThreads[0] = dst.cols / 4; } + std::vector > args; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&temp.data )); @@ -1726,12 +1571,11 @@ namespace cv } void bilateralFilter(const oclMat &src, oclMat &dst, int radius, double sigmaclr, double sigmaspc, int borderType) { - dst.create( src.size(), src.type() ); - if( src.depth() == CV_8U ) + if ( src.depth() == CV_8U ) oclbilateralFilter_8u( src, dst, radius, sigmaclr, sigmaspc, borderType ); else - CV_Error(Error::StsUnsupportedFormat, "Bilateral filtering is only implemented for 8uimages" ); + CV_Error(Error::StsUnsupportedFormat, "Bilateral filtering is only implemented for 8uimages"); } } @@ -1870,7 +1714,7 @@ static void convolve_run_fft(const oclMat &image, const oclMat &templ, oclMat &r } #else - CV_Error(Error::StsNotImplemented, "OpenCL DFT is not implemented"); + CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented"); #define UNUSED(x) (void)(x); UNUSED(image) UNUSED(templ) UNUSED(result) UNUSED(ccorr) UNUSED(buf) #undef UNUSED diff --git a/modules/ocl/src/kmeans.cpp b/modules/ocl/src/kmeans.cpp index 1f8b95b826..c5a03bacd9 100644 --- a/modules/ocl/src/kmeans.cpp +++ b/modules/ocl/src/kmeans.cpp @@ -164,7 +164,7 @@ void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src { //if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) //{ - // CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); + // CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); // return; //} diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 13361d740d..f3dc7b56f5 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -119,41 +119,33 @@ static void convert_C4C3(const oclMat &src, cl_mem &dst) void cv::ocl::oclMat::upload(const Mat &m) { + if (!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE) && m.depth() == CV_64F) + { + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); + return; + } + CV_DbgAssert(!m.empty()); Size wholeSize; Point ofs; m.locateROI(wholeSize, ofs); - if(m.channels() == 3) + create(wholeSize, m.type()); + + if (m.channels() == 3) { - create(wholeSize, m.type()); int pitch = wholeSize.width * 3 * m.elemSize1(); int tail_padding = m.elemSize1() * 3072; int err; - cl_mem temp; - if(gDeviceMemType!=DEVICE_MEM_UHP && gDeviceMemType!=DEVICE_MEM_CHP){ - temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, - (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err); - openCLVerifyCall(err); - openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, - wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3); - } - else{ - temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, - (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, m.datastart, &err); - openCLVerifyCall(err); - } + cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, + (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err); + openCLVerifyCall(err); + openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3); convert_C3C4(temp, *this); openCLSafeCall(clReleaseMemObject(temp)); } else - { - // try to use host ptr - createEx(wholeSize, m.type(), gDeviceMemRW, gDeviceMemType, m.datastart); - if(gDeviceMemType!=DEVICE_MEM_UHP && gDeviceMemType!=DEVICE_MEM_CHP) - openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, - wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice); - } + openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice); rows = m.rows; cols = m.cols; @@ -322,7 +314,7 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && (depth() == CV_64F || dst.depth() == CV_64F)) { - CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } @@ -360,6 +352,66 @@ oclMat &cv::ocl::oclMat::operator = (const Scalar &s) return *this; } +#ifdef CL_VERSION_1_2 + +template +static std::vector cvt1(const cv::Scalar & s) +{ + std::vector _buf(sizeof(CLT)); + CLT * const buf = reinterpret_cast(&_buf[0]); + buf[0] = saturate_cast(s[0]); + return _buf; +} + +template +static std::vector cvt2(const cv::Scalar & s) +{ + std::vector _buf(sizeof(CLT)); + CLT * const buf = reinterpret_cast(&_buf[0]); + buf->s[0] = saturate_cast(s[0]); + buf->s[1] = saturate_cast(s[1]); + return _buf; +} + +template +static std::vector cvt4(const cv::Scalar & s) +{ + std::vector _buf(sizeof(CLT)); + CLT * const buf = reinterpret_cast(&_buf[0]); + buf->s[0] = saturate_cast(s[0]); + buf->s[1] = saturate_cast(s[1]); + buf->s[2] = saturate_cast(s[2]); + buf->s[3] = saturate_cast(s[3]); + return _buf; +} + +typedef std::vector (*ConvertFunc)(const cv::Scalar & s); + +static std::vector scalarToCLVector(const cv::Scalar & s, int type) +{ + const int depth = CV_MAT_DEPTH(type); + const int channels = CV_MAT_CN(type); + + static const ConvertFunc funcs[4][7] = + { + { cvt1, cvt1, cvt1, cvt1, + cvt1, cvt1, cvt1 }, + + { cvt2, cvt2, cvt2, cvt2, + cvt2, cvt2, cvt2 }, + + { 0, 0, 0, 0, 0, 0, 0 }, + + { cvt4, cvt4, cvt4, cvt4, + cvt4, cvt4, cvt4 } + }; + + ConvertFunc func = funcs[channels - 1][depth]; + return func(s); +} + +#endif + static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, String kernelName) { std::vector > args; @@ -380,23 +432,14 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, Stri #ifdef CL_VERSION_1_2 // this enables backwards portability to // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support -// if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) && -// dst.offset == 0 && dst.cols == dst.wholecols) -// { -// const int sizeofMap[][7] = -// { -// { sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double) }, -// { sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) }, -// { 0 , 0 , 0 , 0 , 0 , 0 , 0 }, -// { sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) }, -// }; -// int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()]; - -// clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), -// (cl_mem)dst.data, (void*)mat.data, sizeofGeneric, -// 0, dst.step * dst.rows, 0, NULL, NULL); -// } -// else + if (Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2) && dst.isContinuous()) + { + std::vector p = ::scalarToCLVector(scalar, CV_MAKE_TYPE(dst.depth(), dst.oclchannels())); + clEnqueueFillBuffer(getClCommandQueue(dst.clCxt), + (cl_mem)dst.data, (void*)&p[0], p.size(), + 0, dst.step * dst.rows, 0, NULL, NULL); + } + else #endif { oclMat m(mat); @@ -501,9 +544,9 @@ oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const } void cv::ocl::oclMat::createEx(Size size, int type, - DevMemRW rw_type, DevMemType mem_type, void* hptr) + DevMemRW rw_type, DevMemType mem_type) { - createEx(size.height, size.width, type, rw_type, mem_type, hptr); + createEx(size.height, size.width, type, rw_type, mem_type); } void cv::ocl::oclMat::create(int _rows, int _cols, int _type) @@ -512,7 +555,7 @@ void cv::ocl::oclMat::create(int _rows, int _cols, int _type) } void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, - DevMemRW rw_type, DevMemType mem_type, void* hptr) + DevMemRW rw_type, DevMemType mem_type) { clCxt = Context::getContext(); /* core logic */ diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp index 11097aac30..866dbbef6e 100644 --- a/modules/ocl/src/mcwutil.cpp +++ b/modules/ocl/src/mcwutil.cpp @@ -45,26 +45,6 @@ #include "precomp.hpp" -#ifdef __GNUC__ -#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402 -#define GCC_DIAG_STR(s) #s -#define GCC_DIAG_JOINSTR(x,y) GCC_DIAG_STR(x ## y) -# define GCC_DIAG_DO_PRAGMA(x) _Pragma (#x) -# define GCC_DIAG_PRAGMA(x) GCC_DIAG_DO_PRAGMA(GCC diagnostic x) -# if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406 -# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(push) \ -GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x)) -# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(pop) -# else -# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x)) -# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(warning GCC_DIAG_JOINSTR(-W,x)) -# endif -#else -# define GCC_DIAG_OFF(x) -# define GCC_DIAG_ON(x) -#endif -#endif /* __GNUC__ */ - using namespace std; namespace cv @@ -134,9 +114,6 @@ namespace cv build_options, finish_mode); } -#ifdef __GNUC__ - GCC_DIAG_OFF(deprecated-declarations) -#endif cl_mem bindTexture(const oclMat &mat) { cl_mem texture; @@ -234,49 +211,16 @@ namespace cv openCLSafeCall(err); return texture; } -#ifdef __GNUC__ - GCC_DIAG_ON(deprecated-declarations) -#endif Ptr bindTexturePtr(const oclMat &mat) { return makePtr(bindTexture(mat), mat.rows, mat.cols, mat.type()); } + void releaseTexture(cl_mem& texture) { openCLFree(texture); } - - bool support_image2d(Context *clCxt) - { - const cv::ocl::ProgramEntry _kernel = {NULL, "__kernel void test_func(image2d_t img) {}", NULL}; - static bool _isTested = false; - static bool _support = false; - if(_isTested) - { - return _support; - } - try - { - cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel, "test_func"); - cv::ocl::finish(); - _support = true; - } - catch (const cv::Exception& e) - { - if(e.code == -217) - { - _support = false; - } - else - { - // throw e once again - throw e; - } - } - _isTested = true; - return _support; - } }//namespace ocl }//namespace cv diff --git a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl index e5d8271394..0208806069 100644 --- a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl +++ b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl @@ -70,7 +70,7 @@ __kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_st dstT t1 = convertToDstT(src2[src2_index]); dstT t2 = t0 - t1; - dst[dst_index] = t2 >= 0 ? t2 : -t2; + dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2; } } @@ -88,6 +88,6 @@ __kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int dstT t0 = convertToDstT(src1[src1_index]); - dst[dst_index] = t0 >= 0 ? t0 : -t0; + dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0; } } diff --git a/modules/ocl/src/opencl/arithm_add.cl b/modules/ocl/src/opencl/arithm_add.cl index 40caba5a9e..2f34bbbffe 100644 --- a/modules/ocl/src/opencl/arithm_add.cl +++ b/modules/ocl/src/opencl/arithm_add.cl @@ -62,7 +62,7 @@ #if defined (FUNC_MUL) #if defined (HAVE_SCALAR) -#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0] * convertToWT(src2[src2_index])); +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar * convertToWT(src2[src2_index])); #else #define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * convertToWT(src2[src2_index])); #endif @@ -72,7 +72,7 @@ #if defined (HAVE_SCALAR) #define EXPRESSION T zero = (T)(0); \ dst[dst_index] = src2[src2_index] == zero ? zero : \ - convertToT(convertToWT(src1[src1_index]) * scalar[0] / convertToWT(src2[src2_index])); + convertToT(convertToWT(src1[src1_index]) * scalar / convertToWT(src2[src2_index])); #else #define EXPRESSION T zero = (T)(0); \ dst[dst_index] = src2[src2_index] == zero ? zero : \ @@ -86,6 +86,14 @@ dst[dst_index] = convertToT(value); #endif +#if defined (FUNC_MIN) +#define EXPRESSION dst[dst_index] = min( src1[src1_index], src2[src2_index] ); +#endif + +#if defined (FUNC_MAX) +#define EXPRESSION dst[dst_index] = max( src1[src1_index], src2[src2_index] ); +#endif + ////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////// ADD //////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -115,7 +123,7 @@ __kernel void arithm_binary_op_mat(__global T *src1, int src1_step, int src1_off // add mat with scale __kernel void arithm_binary_op_mat_scalar(__global T *src1, int src1_step, int src1_offset, __global T *src2, int src2_step, int src2_offset, - __global WT *scalar, + WT scalar, __global T *dst, int dst_step, int dst_offset, int cols, int rows) { diff --git a/modules/ocl/src/opencl/arithm_add_scalar.cl b/modules/ocl/src/opencl/arithm_add_scalar.cl index 4a0167fd55..7f4e413277 100644 --- a/modules/ocl/src/opencl/arithm_add_scalar.cl +++ b/modules/ocl/src/opencl/arithm_add_scalar.cl @@ -52,24 +52,30 @@ #endif #if defined (FUNC_ADD) -#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar[0]); +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar); #endif #if defined (FUNC_SUB) -#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar[0]); +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar); #endif #if defined (FUNC_MUL) -#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0]); +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar); #endif #if defined (FUNC_DIV) #define EXPRESSION T zero = (T)(0); \ - dst[dst_index] = src1[src1_index] == zero ? zero : convertToT(scalar[0] / convertToWT(src1[src1_index])); + dst[dst_index] = src1[src1_index] == zero ? zero : convertToT(scalar / convertToWT(src1[src1_index])); +#endif + +#if defined (FUNC_ABS) +#define EXPRESSION \ + T value = src1[src1_index] > (T)(0) ? src1[src1_index] : -src1[src1_index]; \ + dst[dst_index] = value; #endif #if defined (FUNC_ABS_DIFF) -#define EXPRESSION WT value = convertToWT(src1[src1_index]) - scalar[0]; \ +#define EXPRESSION WT value = convertToWT(src1[src1_index]) - scalar; \ value = value > (WT)(0) ? value : -value; \ dst[dst_index] = convertToT(value); #endif @@ -79,7 +85,7 @@ /////////////////////////////////////////////////////////////////////////////////// __kernel void arithm_binary_op_scalar (__global T *src1, int src1_step, int src1_offset, - __global WT *scalar, + WT scalar, __global T *dst, int dst_step, int dst_offset, int cols, int rows) { @@ -92,5 +98,6 @@ __kernel void arithm_binary_op_scalar (__global T *src1, int src1_step, int src1 int dst_index = mad24(y, dst_step, x + dst_offset); EXPRESSION + } } diff --git a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl index d472b3cbfb..b93de0c6b2 100644 --- a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl +++ b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl @@ -52,15 +52,15 @@ #endif #if defined (FUNC_ADD) -#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar[0]); +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar); #endif #if defined (FUNC_SUB) -#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar[0]); +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar); #endif #if defined (FUNC_MUL) -#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar[0]); +#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar); #endif #if defined (FUNC_DIV) @@ -74,7 +74,7 @@ /////////////////////////////////////////////////////////////////////////////////// __kernel void arithm_binary_op_scalar_mask(__global T *src1, int src1_step, int src1_offset, - __global WT *scalar, + WT scalar, __global uchar *mask, int mask_step, int mask_offset, __global T *dst, int dst_step, int dst_offset, int cols, int rows) diff --git a/modules/ocl/src/opencl/arithm_bitwise_binary.cl b/modules/ocl/src/opencl/arithm_bitwise_binary.cl index 898b40a9ee..a4fa205c1f 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_binary.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_binary.cl @@ -51,17 +51,32 @@ __kernel void arithm_bitwise_binary(__global uchar * src1, int src1_step, int src1_offset, __global uchar * src2, int src2_step, int src2_offset, __global uchar * dst, int dst_step, int dst_offset, - int cols1, int rows) + int cols, int rows) { int x = get_global_id(0); int y = get_global_id(1); - if (x < cols1 && y < rows) + if (x < cols && y < rows) { +#if elemSize > 1 + x *= elemSize; +#endif int src1_index = mad24(y, src1_step, x + src1_offset); int src2_index = mad24(y, src2_step, x + src2_offset); - int dst_index = mad24(y, dst_step, dst_offset + x); + int dst_index = mad24(y, dst_step, x + dst_offset); +#if elemSize > 1 + #pragma unroll + for (int i = 0; i < elemSize; i += vlen) + { + ucharv t0 = vloadn(0, src1 + src1_index + i); + ucharv t1 = vloadn(0, src2 + src2_index + i); + ucharv t2 = t0 Operation t1; + + vstoren(t2, 0, dst + dst_index + i); + } +#else dst[dst_index] = src1[src1_index] Operation src2[src2_index]; +#endif } } diff --git a/modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl index 622ab5b113..d244e572d9 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl @@ -50,7 +50,7 @@ __kernel void arithm_bitwise_binary_mask(__global uchar * src1, int src1_step, int src1_offset, __global uchar * src2, int src2_step, int src2_offset, - __global uchar * mask, int mask_step, int mask_offset, int elemSize, + __global uchar * mask, int mask_step, int mask_offset, __global uchar * dst, int dst_step, int dst_offset, int cols1, int rows) { @@ -59,15 +59,30 @@ __kernel void arithm_bitwise_binary_mask(__global uchar * src1, int src1_step, i if (x < cols1 && y < rows) { - int mask_index = mad24(y, mask_step, mask_offset + (x / elemSize)); + int mask_index = mad24(y, mask_step, mask_offset + x); if (mask[mask_index]) { +#if elemSize > 1 + x *= elemSize; +#endif int src1_index = mad24(y, src1_step, x + src1_offset); int src2_index = mad24(y, src2_step, x + src2_offset); int dst_index = mad24(y, dst_step, x + dst_offset); +#if elemSize > 1 + #pragma unroll + for (int i = 0; i < elemSize; i += vlen) + { + ucharv t0 = vloadn(0, src1 + src1_index + i); + ucharv t1 = vloadn(0, src2 + src2_index + i); + ucharv t2 = t0 Operation t1; + + vstoren(t2, 0, dst + dst_index + i); + } +#else dst[dst_index] = src1[src1_index] Operation src2[src2_index]; +#endif } } } diff --git a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl b/modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl index c17b412a6d..5a7d5938cb 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl @@ -50,19 +50,33 @@ __kernel void arithm_bitwise_binary_scalar( __global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int elemSize, + __global uchar *src2, __global uchar *dst, int dst_step, int dst_offset, - int cols1, int rows) + int cols, int rows) { int x = get_global_id(0); int y = get_global_id(1); - if (x < cols1 && y < rows) + if (x < cols && y < rows) { +#if elemSize > 1 + x *= elemSize; +#endif int src1_index = mad24(y, src1_step, src1_offset + x); - int src2_index = x % elemSize; int dst_index = mad24(y, dst_step, dst_offset + x); - dst[dst_index] = src1[src1_index] Operation src2[src2_index]; +#if elemSize > 1 + #pragma unroll + for (int i = 0; i < elemSize; i += vlen) + { + ucharv t0 = vloadn(0, src1 + src1_index + i); + ucharv t1 = vloadn(0, src2 + i); + ucharv t2 = t0 Operation t1; + + vstoren(t2, 0, dst + dst_index + i); + } +#else + dst[dst_index] = src1[src1_index] Operation src2[0]; +#endif } } diff --git a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl index bae1699a3e..a1876b57d0 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl @@ -56,7 +56,7 @@ ////////////////////////////////////////////////////////////////////////////////////////////////////// __kernel void arithm_bitwise_binary_scalar_mask(__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int elemSize, + __global uchar *src2, __global uchar *mask, int mask_step, int mask_offset, __global uchar *dst, int dst_step, int dst_offset, int cols, int rows) @@ -66,14 +66,29 @@ __kernel void arithm_bitwise_binary_scalar_mask(__global uchar *src1, int src1_s if (x < cols && y < rows) { - int mask_index = mad24(y, mask_step, (x / elemSize) + mask_offset); + int mask_index = mad24(y, mask_step, x + mask_offset); + if (mask[mask_index]) { +#if elemSize > 1 + x *= elemSize; +#endif int src1_index = mad24(y, src1_step, x + src1_offset); - int src2_index = x % elemSize; int dst_index = mad24(y, dst_step, x + dst_offset); - dst[dst_index] = src1[src1_index] Operation src2[src2_index]; +#if elemSize > 1 + #pragma unroll + for (int i = 0; i < elemSize; i += vlen) + { + ucharv t0 = vloadn(0, src1 + src1_index + i); + ucharv t1 = vloadn(0, src2 + i); + ucharv t2 = t0 Operation t1; + + vstoren(t2, 0, dst + dst_index + i); + } +#else + dst[dst_index] = src1[src1_index] Operation src2[0]; +#endif } } } diff --git a/modules/ocl/src/opencl/arithm_sum.cl b/modules/ocl/src/opencl/arithm_sum.cl index 4011f03bea..39bcf949a0 100644 --- a/modules/ocl/src/opencl/arithm_sum.cl +++ b/modules/ocl/src/opencl/arithm_sum.cl @@ -51,14 +51,14 @@ #endif #endif -#if defined (FUNC_SUM) +#if FUNC_SUM #define FUNC(a, b) b += a; -#endif -#if defined (FUNC_ABS_SUM) -#define FUNC(a, b) b += a >= 0 ? a : -a; -#endif -#if defined (FUNC_SQR_SUM) +#elif FUNC_ABS_SUM +#define FUNC(a, b) b += a >= (dstT)(0) ? a : -a; +#elif FUNC_SQR_SUM #define FUNC(a, b) b += a * a; +#else +#error No sum function #endif /**************************************Array buffer SUM**************************************/ diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl index 410f8fc8db..2b1cfccd03 100644 --- a/modules/ocl/src/opencl/cvt_color.cl +++ b/modules/ocl/src/opencl/cvt_color.cl @@ -45,6 +45,7 @@ //M*/ /**************************************PUBLICFUNC*************************************/ + #if defined (DOUBLE_SUPPORT) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif @@ -52,7 +53,6 @@ #define DATA_TYPE UNDEFINED #if defined (DEPTH_0) -#undef DATA_TYPE #define DATA_TYPE uchar #define MAX_NUM 255 #define HALF_MAX 128 @@ -60,7 +60,6 @@ #endif #if defined (DEPTH_2) -#undef DATA_TYPE #define DATA_TYPE ushort #define MAX_NUM 65535 #define HALF_MAX 32768 @@ -68,15 +67,14 @@ #endif #if defined (DEPTH_5) -#undef DATA_TYPE #define DATA_TYPE float #define MAX_NUM 1.0f #define HALF_MAX 0.5f #define SAT_CAST(num) (num) #endif - #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) + enum { yuv_shift = 14, @@ -86,20 +84,20 @@ enum B2Y = 1868, BLOCK_SIZE = 256 }; + ///////////////////////////////////// RGB <-> GRAY ////////////////////////////////////// -__kernel void RGB2Gray(int cols,int rows,int src_step,int dst_step,int channels, - int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst) +__kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step, int channels, + int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst, + int src_offset, int dst_offset) { const int x = get_global_id(0); const int y = get_global_id(1); - src_step /= sizeof(DATA_TYPE); - dst_step /= sizeof(DATA_TYPE); if (y < rows && x < cols) { - int src_idx = y * src_step + x * channels; - int dst_idx = y * dst_step + x; + int src_idx = mad24(y, src_step, src_offset + x * channels); + int dst_idx = mad24(y, dst_step, dst_offset + x); #if defined (DEPTH_5) dst[dst_idx] = src[src_idx + bidx] * 0.114f + src[src_idx + 1] * 0.587f + src[src_idx + (bidx^2)] * 0.299f; #else @@ -109,17 +107,16 @@ __kernel void RGB2Gray(int cols,int rows,int src_step,int dst_step,int channels, } __kernel void Gray2RGB(int cols,int rows,int src_step,int dst_step, - __global const DATA_TYPE* src, __global DATA_TYPE* dst) + __global const DATA_TYPE* src, __global DATA_TYPE* dst, + int src_offset, int dst_offset) { const int x = get_global_id(0); const int y = get_global_id(1); - src_step /= sizeof(DATA_TYPE); - dst_step /= sizeof(DATA_TYPE); if (y < rows && x < cols) { - int src_idx = y * src_step + x; - int dst_idx = y * dst_step + x * 4; + int src_idx = mad24(y, src_step, src_offset + x); + int dst_idx = mad24(y, dst_step, dst_offset + x * 4); DATA_TYPE val = src[src_idx]; dst[dst_idx++] = val; dst[dst_idx++] = val; @@ -129,24 +126,25 @@ __kernel void Gray2RGB(int cols,int rows,int src_step,int dst_step, } ///////////////////////////////////// RGB <-> YUV ////////////////////////////////////// + __constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f }; __constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 }; __kernel void RGB2YUV(int cols,int rows,int src_step,int dst_step,int channels, - int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst) + int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst, + int src_offset, int dst_offset) { - const int x = get_global_id(0); - const int y = get_global_id(1); - - src_step /= sizeof(DATA_TYPE); - dst_step /= sizeof(DATA_TYPE); + int x = get_global_id(0); + int y = get_global_id(1); if (y < rows && x < cols) { - int src_idx = y * src_step + x * channels; - int dst_idx = y * dst_step + x * channels; + x *= channels; + int src_idx = mad24(y, src_step, src_offset + x); + int dst_idx = mad24(y, dst_step, dst_offset + x); dst += dst_idx; const DATA_TYPE rgb[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]}; + #if defined (DEPTH_5) __constant float * coeffs = c_RGB2YUVCoeffs_f; const DATA_TYPE Y = rgb[0] * coeffs[bidx] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx^2]; @@ -159,6 +157,7 @@ __kernel void RGB2YUV(int cols,int rows,int src_step,int dst_step,int channels, const int Cr = CV_DESCALE((rgb[bidx] - Y) * coeffs[3] + delta, yuv_shift); const int Cb = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[4] + delta, yuv_shift); #endif + dst[0] = SAT_CAST( Y ); dst[1] = SAT_CAST( Cr ); dst[2] = SAT_CAST( Cb ); @@ -169,18 +168,17 @@ __constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f }; __constant int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 }; __kernel void YUV2RGB(int cols,int rows,int src_step,int dst_step,int channels, - int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst) + int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst, + int src_offset, int dst_offset) { - const int x = get_global_id(0); - const int y = get_global_id(1); - - src_step /= sizeof(DATA_TYPE); - dst_step /= sizeof(DATA_TYPE); + int x = get_global_id(0); + int y = get_global_id(1); if (y < rows && x < cols) { - int src_idx = y * src_step + x * channels; - int dst_idx = y * dst_step + x * channels; + x *= channels; + int src_idx = mad24(y, src_step, src_offset + x); + int dst_idx = mad24(y, dst_step, dst_offset + x); dst += dst_idx; const DATA_TYPE yuv[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]}; @@ -195,6 +193,7 @@ __kernel void YUV2RGB(int cols,int rows,int src_step,int dst_step,int channels, const int g = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1], yuv_shift); const int r = yuv[0] + CV_DESCALE((yuv[1] - HALF_MAX) * coeffs[0], yuv_shift); #endif + dst[bidx^2] = SAT_CAST( b ); dst[1] = SAT_CAST( g ); dst[bidx] = SAT_CAST( r ); @@ -209,17 +208,19 @@ __constant int ITUR_BT_601_CVR = 1673527; __constant int ITUR_BT_601_SHIFT = 20; __kernel void YUV2RGBA_NV12(int cols,int rows,int src_step,int dst_step, - int bidx, int width, int height, __global const uchar* src, __global uchar* dst) + int bidx, int width, int height, __global const uchar* src, __global uchar* dst, + int src_offset, int dst_offset) { const int x = get_global_id(0); // max_x = width / 2 const int y = get_global_id(1); // max_y = height/ 2 if (y < height / 2 && x < width / 2 ) { - __global const uchar* ysrc = src + (y << 1) * src_step + (x << 1); - __global const uchar* usrc = src + (height + y) * src_step + (x << 1); - __global uchar* dst1 = dst + (y << 1) * dst_step + (x << 3); - __global uchar* dst2 = dst + ((y << 1) + 1) * dst_step + (x << 3); + __global const uchar* ysrc = src + mad24(y << 1, src_step, (x << 1) + src_offset); + __global const uchar* usrc = src + mad24(height + y, src_step, (x << 1) + src_offset); + __global uchar* dst1 = dst + mad24(y << 1, dst_step, (x << 3) + dst_offset); + __global uchar* dst2 = dst + mad24((y << 1) + 1, dst_step, (x << 3) + dst_offset); + int Y1 = ysrc[0]; int Y2 = ysrc[1]; int Y3 = ysrc[src_step]; @@ -259,24 +260,26 @@ __kernel void YUV2RGBA_NV12(int cols,int rows,int src_step,int dst_step, } ///////////////////////////////////// RGB <-> YUV ////////////////////////////////////// + __constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; __constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241}; __kernel void RGB2YCrCb(int cols,int rows,int src_step,int dst_step,int channels, - int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst) + int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst, + int src_offset, int dst_offset) { - const int x = get_global_id(0); - const int y = get_global_id(1); - - src_step /= sizeof(DATA_TYPE); - dst_step /= sizeof(DATA_TYPE); + int x = get_global_id(0); + int y = get_global_id(1); if (y < rows && x < cols) { - int src_idx = y * src_step + x * channels; - int dst_idx = y * dst_step + x * channels; + x *= channels; + int src_idx = mad24(y, src_step, src_offset + x); + int dst_idx = mad24(y, dst_step, dst_offset + x); + dst += dst_idx; - const DATA_TYPE rgb[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]}; + const DATA_TYPE rgb[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] }; + #if defined (DEPTH_5) __constant float * coeffs = c_RGB2YCrCbCoeffs_f; const DATA_TYPE Y = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx]; @@ -289,6 +292,7 @@ __kernel void RGB2YCrCb(int cols,int rows,int src_step,int dst_step,int channels const int Cr = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[3] + delta, yuv_shift); const int Cb = CV_DESCALE((rgb[bidx] - Y) * coeffs[4] + delta, yuv_shift); #endif + dst[0] = SAT_CAST( Y ); dst[1] = SAT_CAST( Cr ); dst[2] = SAT_CAST( Cb ); diff --git a/modules/ocl/src/opencl/filtering_laplacian.cl b/modules/ocl/src/opencl/filtering_laplacian.cl index f7430d5332..3c0cc0de38 100644 --- a/modules/ocl/src/opencl/filtering_laplacian.cl +++ b/modules/ocl/src/opencl/filtering_laplacian.cl @@ -211,7 +211,7 @@ __kernel void filter2D( barrier(CLK_LOCAL_MEM_FENCE); if(globalRow < rows && globalCol < cols) { - T_SUM sum = (T_SUM)SUM_ZERO; + T_SUM sum = (T_SUM)(SUM_ZERO); int filterIdx = 0; for(int i = 0; i < FILTER_SIZE; i++) { @@ -291,7 +291,7 @@ __kernel void filter2D_3x3( T_IMG data = src[mad24(selected_row, src_step, selected_cols)]; int con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols; - data = con ? data : 0; + data = con ? data : (T_IMG)(0); local_data[mad24(i, LOCAL_MEM_STEP, lX)] = data; if(lX < (ANX << 1)) @@ -300,7 +300,7 @@ __kernel void filter2D_3x3( data = src[mad24(selected_row, src_step, selected_cols)]; con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols; - data = con ? data : 0; + data = con ? data : (T_IMG)(0); local_data[mad24(i, LOCAL_MEM_STEP, lX) + groupX_size] = data; } #else diff --git a/modules/ocl/src/opencl/pyrlk_no_image.cl b/modules/ocl/src/opencl/pyrlk_no_image.cl deleted file mode 100644 index 98a11b5c12..0000000000 --- a/modules/ocl/src/opencl/pyrlk_no_image.cl +++ /dev/null @@ -1,764 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Sen Liu, sen@multicorewareinc.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#define BUFFER 256 -void reduce3(float val1, float val2, float val3, __local float *smem1, __local float *smem2, __local float *smem3, int tid) -{ - smem1[tid] = val1; - smem2[tid] = val2; - smem3[tid] = val3; - barrier(CLK_LOCAL_MEM_FENCE); - -#if BUFFER > 128 - - if (tid < 128) - { - smem1[tid] = val1 += smem1[tid + 128]; - smem2[tid] = val2 += smem2[tid + 128]; - smem3[tid] = val3 += smem3[tid + 128]; - } - - barrier(CLK_LOCAL_MEM_FENCE); -#endif - -#if BUFFER > 64 - - if (tid < 64) - { - smem1[tid] = val1 += smem1[tid + 64]; - smem2[tid] = val2 += smem2[tid + 64]; - smem3[tid] = val3 += smem3[tid + 64]; - } - - barrier(CLK_LOCAL_MEM_FENCE); -#endif - - if (tid < 32) - { - smem1[tid] = val1 += smem1[tid + 32]; - smem2[tid] = val2 += smem2[tid + 32]; - smem3[tid] = val3 += smem3[tid + 32]; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (tid < 16) - { - smem1[tid] = val1 += smem1[tid + 16]; - smem2[tid] = val2 += smem2[tid + 16]; - smem3[tid] = val3 += smem3[tid + 16]; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (tid < 8) - { - volatile __local float *vmem1 = smem1; - volatile __local float *vmem2 = smem2; - volatile __local float *vmem3 = smem3; - - vmem1[tid] = val1 += vmem1[tid + 8]; - vmem2[tid] = val2 += vmem2[tid + 8]; - vmem3[tid] = val3 += vmem3[tid + 8]; - - vmem1[tid] = val1 += vmem1[tid + 4]; - vmem2[tid] = val2 += vmem2[tid + 4]; - vmem3[tid] = val3 += vmem3[tid + 4]; - - vmem1[tid] = val1 += vmem1[tid + 2]; - vmem2[tid] = val2 += vmem2[tid + 2]; - vmem3[tid] = val3 += vmem3[tid + 2]; - - vmem1[tid] = val1 += vmem1[tid + 1]; - vmem2[tid] = val2 += vmem2[tid + 1]; - vmem3[tid] = val3 += vmem3[tid + 1]; - } -} - -void reduce2(float val1, float val2, __local float *smem1, __local float *smem2, int tid) -{ - smem1[tid] = val1; - smem2[tid] = val2; - barrier(CLK_LOCAL_MEM_FENCE); - -#if BUFFER > 128 - - if (tid < 128) - { - smem1[tid] = val1 += smem1[tid + 128]; - smem2[tid] = val2 += smem2[tid + 128]; - } - - barrier(CLK_LOCAL_MEM_FENCE); -#endif - -#if BUFFER > 64 - - if (tid < 64) - { - smem1[tid] = val1 += smem1[tid + 64]; - smem2[tid] = val2 += smem2[tid + 64]; - } - - barrier(CLK_LOCAL_MEM_FENCE); -#endif - - if (tid < 32) - { - smem1[tid] = val1 += smem1[tid + 32]; - smem2[tid] = val2 += smem2[tid + 32]; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (tid < 16) - { - smem1[tid] = val1 += smem1[tid + 16]; - smem2[tid] = val2 += smem2[tid + 16]; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (tid < 8) - { - volatile __local float *vmem1 = smem1; - volatile __local float *vmem2 = smem2; - - vmem1[tid] = val1 += vmem1[tid + 8]; - vmem2[tid] = val2 += vmem2[tid + 8]; - - vmem1[tid] = val1 += vmem1[tid + 4]; - vmem2[tid] = val2 += vmem2[tid + 4]; - - vmem1[tid] = val1 += vmem1[tid + 2]; - vmem2[tid] = val2 += vmem2[tid + 2]; - - vmem1[tid] = val1 += vmem1[tid + 1]; - vmem2[tid] = val2 += vmem2[tid + 1]; - } -} - -void reduce1(float val1, __local float *smem1, int tid) -{ - smem1[tid] = val1; - barrier(CLK_LOCAL_MEM_FENCE); - -#if BUFFER > 128 - - if (tid < 128) - { - smem1[tid] = val1 += smem1[tid + 128]; - } - - barrier(CLK_LOCAL_MEM_FENCE); -#endif - -#if BUFFER > 64 - - if (tid < 64) - { - smem1[tid] = val1 += smem1[tid + 64]; - } - - barrier(CLK_LOCAL_MEM_FENCE); -#endif - - if (tid < 32) - { - smem1[tid] = val1 += smem1[tid + 32]; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (tid < 16) - { - volatile __local float *vmem1 = smem1; - - vmem1[tid] = val1 += vmem1[tid + 16]; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (tid < 8) - { - volatile __local float *vmem1 = smem1; - - vmem1[tid] = val1 += vmem1[tid + 8]; - vmem1[tid] = val1 += vmem1[tid + 4]; - vmem1[tid] = val1 += vmem1[tid + 2]; - vmem1[tid] = val1 += vmem1[tid + 1]; - } -} - -#define SCALE (1.0f / (1 << 20)) -#define THRESHOLD 0.01f -#define DIMENSION 21 - -float readImage2Df_C1(__global const float *image, const float x, const float y, const int rows, const int cols, const int elemCntPerRow) -{ - float2 coor = (float2)(x, y); - - int i0 = clamp((int)floor(coor.x), 0, cols - 1); - int j0 = clamp((int)floor(coor.y), 0, rows - 1); - int i1 = clamp((int)floor(coor.x) + 1, 0, cols - 1); - int j1 = clamp((int)floor(coor.y) + 1, 0, rows - 1); - float a = coor.x - floor(coor.x); - float b = coor.y - floor(coor.y); - - return (1 - a) * (1 - b) * image[mad24(j0, elemCntPerRow, i0)] - + a * (1 - b) * image[mad24(j0, elemCntPerRow, i1)] - + (1 - a) * b * image[mad24(j1, elemCntPerRow, i0)] - + a * b * image[mad24(j1, elemCntPerRow, i1)]; -} - -__kernel void lkSparse_C1_D5(__global const float *I, __global const float *J, - __global const float2 *prevPts, int prevPtsStep, __global float2 *nextPts, int nextPtsStep, __global uchar *status, __global float *err, - const int level, const int rows, const int cols, const int elemCntPerRow, - int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr) -{ - __local float smem1[BUFFER]; - __local float smem2[BUFFER]; - __local float smem3[BUFFER]; - - float2 c_halfWin = (float2)((c_winSize_x - 1) >> 1, (c_winSize_y - 1) >> 1); - - const int tid = mad24(get_local_id(1), get_local_size(0), get_local_id(0)); - - float2 prevPt = prevPts[get_group_id(0)] * (1.0f / (1 << level)); - - if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows) - { - if (tid == 0 && level == 0) - { - status[get_group_id(0)] = 0; - } - - return; - } - - prevPt -= c_halfWin; - - // extract the patch from the first image, compute covariation matrix of derivatives - - float A11 = 0; - float A12 = 0; - float A22 = 0; - - float I_patch[1][3]; - float dIdx_patch[1][3]; - float dIdy_patch[1][3]; - - for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i) - { - for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j) - { - float x = (prevPt.x + xBase); - float y = (prevPt.y + yBase); - - I_patch[i][j] = readImage2Df_C1(I, x, y, rows, cols, elemCntPerRow); - float dIdx = 3.0f * readImage2Df_C1(I, x + 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x + 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x + 1, y + 1, rows, cols, elemCntPerRow) - - (3.0f * readImage2Df_C1(I, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x - 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x - 1, y + 1, rows, cols, elemCntPerRow)); - - float dIdy = 3.0f * readImage2Df_C1(I, x - 1, y + 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x, y + 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x + 1, y + 1, rows, cols, elemCntPerRow) - - (3.0f * readImage2Df_C1(I, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C1(I, x, y - 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C1(I, x + 1, y - 1, rows, cols, elemCntPerRow)); - - dIdx_patch[i][j] = dIdx; - dIdy_patch[i][j] = dIdy; - - A11 += dIdx * dIdx; - A12 += dIdx * dIdy; - A22 += dIdy * dIdy; - } - } - - reduce3(A11, A12, A22, smem1, smem2, smem3, tid); - barrier(CLK_LOCAL_MEM_FENCE); - - A11 = smem1[0]; - A12 = smem2[0]; - A22 = smem3[0]; - - float D = A11 * A22 - A12 * A12; - - if (D < 1.192092896e-07f) - { - if (tid == 0 && level == 0) - { - status[get_group_id(0)] = 0; - } - - return; - } - - D = 1.f / D; - - A11 *= D; - A12 *= D; - A22 *= D; - - float2 nextPt = nextPts[get_group_id(0)]; - nextPt = nextPt * 2.0f - c_halfWin; - - for (int k = 0; k < c_iters; ++k) - { - if (nextPt.x < -c_halfWin.x || nextPt.x >= cols || nextPt.y < -c_halfWin.y || nextPt.y >= rows) - { - if (tid == 0 && level == 0) - { - status[get_group_id(0)] = 0; - } - - return; - } - - float b1 = 0; - float b2 = 0; - - for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) - { - for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) - { - float diff = (readImage2Df_C1(J, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]) * 32.0f; - - b1 += diff * dIdx_patch[i][j]; - b2 += diff * dIdy_patch[i][j]; - } - } - - reduce2(b1, b2, smem1, smem2, tid); - barrier(CLK_LOCAL_MEM_FENCE); - - b1 = smem1[0]; - b2 = smem2[0]; - - float2 delta; - delta.x = A12 * b2 - A22 * b1; - delta.y = A12 * b1 - A11 * b2; - - nextPt += delta; - - //if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD) - // break; - } - - float errval = 0.0f; - - if (calcErr) - { - for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) - { - for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) - { - float diff = readImage2Df_C1(J, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]; - - errval += fabs(diff); - } - } - - reduce1(errval, smem1, tid); - } - - if (tid == 0) - { - nextPt += c_halfWin; - - nextPts[get_group_id(0)] = nextPt; - - if (calcErr) - { - err[get_group_id(0)] = smem1[0] / (c_winSize_x * c_winSize_y); - } - } -} - -float4 readImage2Df_C4(__global const float4 *image, const float x, const float y, const int rows, const int cols, const int elemCntPerRow) -{ - float2 coor = (float2)(x, y); - - int i0 = clamp((int)floor(coor.x), 0, cols - 1); - int j0 = clamp((int)floor(coor.y), 0, rows - 1); - int i1 = clamp((int)floor(coor.x) + 1, 0, cols - 1); - int j1 = clamp((int)floor(coor.y) + 1, 0, rows - 1); - float a = coor.x - floor(coor.x); - float b = coor.y - floor(coor.y); - - return (1 - a) * (1 - b) * image[mad24(j0, elemCntPerRow, i0)] - + a * (1 - b) * image[mad24(j0, elemCntPerRow, i1)] - + (1 - a) * b * image[mad24(j1, elemCntPerRow, i0)] - + a * b * image[mad24(j1, elemCntPerRow, i1)]; -} - -__kernel void lkSparse_C4_D5(__global const float *I, __global const float *J, - __global const float2 *prevPts, int prevPtsStep, __global float2 *nextPts, int nextPtsStep, __global uchar *status, __global float *err, - const int level, const int rows, const int cols, const int elemCntPerRow, - int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr) -{ - __local float smem1[BUFFER]; - __local float smem2[BUFFER]; - __local float smem3[BUFFER]; - - float2 c_halfWin = (float2)((c_winSize_x - 1) >> 1, (c_winSize_y - 1) >> 1); - - const int tid = mad24(get_local_id(1), get_local_size(0), get_local_id(0)); - - float2 prevPt = prevPts[get_group_id(0)] * (1.0f / (1 << level)); - - if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows) - { - if (tid == 0 && level == 0) - { - status[get_group_id(0)] = 0; - } - - return; - } - - prevPt -= c_halfWin; - - // extract the patch from the first image, compute covariation matrix of derivatives - - float A11 = 0; - float A12 = 0; - float A22 = 0; - - float4 I_patch[1][3]; - float4 dIdx_patch[1][3]; - float4 dIdy_patch[1][3]; - - __global float4 *ptrI = (__global float4 *)I; - - for (int yBase = get_local_id(1), i = 0; yBase < c_winSize_y; yBase += get_local_size(1), ++i) - { - for (int xBase = get_local_id(0), j = 0; xBase < c_winSize_x; xBase += get_local_size(0), ++j) - { - float x = (prevPt.x + xBase); - float y = (prevPt.y + yBase); - - I_patch[i][j] = readImage2Df_C4(ptrI, x, y, rows, cols, elemCntPerRow); - - float4 dIdx = 3.0f * readImage2Df_C4(ptrI, x + 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x + 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x + 1, y + 1, rows, cols, elemCntPerRow) - - (3.0f * readImage2Df_C4(ptrI, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x - 1, y, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x - 1, y + 1, rows, cols, elemCntPerRow)); - - float4 dIdy = 3.0f * readImage2Df_C4(ptrI, x - 1, y + 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x, y + 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x + 1, y + 1, rows, cols, elemCntPerRow) - - (3.0f * readImage2Df_C4(ptrI, x - 1, y - 1, rows, cols, elemCntPerRow) + 10.0f * readImage2Df_C4(ptrI, x, y - 1, rows, cols, elemCntPerRow) + 3.0f * readImage2Df_C4(ptrI, x + 1, y - 1, rows, cols, elemCntPerRow)); - - dIdx_patch[i][j] = dIdx; - dIdy_patch[i][j] = dIdy; - - A11 += (dIdx * dIdx).x + (dIdx * dIdx).y + (dIdx * dIdx).z; - A12 += (dIdx * dIdy).x + (dIdx * dIdy).y + (dIdx * dIdy).z; - A22 += (dIdy * dIdy).x + (dIdy * dIdy).y + (dIdy * dIdy).z; - } - } - - reduce3(A11, A12, A22, smem1, smem2, smem3, tid); - barrier(CLK_LOCAL_MEM_FENCE); - - A11 = smem1[0]; - A12 = smem2[0]; - A22 = smem3[0]; - - float D = A11 * A22 - A12 * A12; - //pD[get_group_id(0)] = D; - - if (D < 1.192092896e-07f) - { - if (tid == 0 && level == 0) - { - status[get_group_id(0)] = 0; - } - - return; - } - - D = 1.f / D; - - A11 *= D; - A12 *= D; - A22 *= D; - - float2 nextPt = nextPts[get_group_id(0)]; - - nextPt = nextPt * 2.0f - c_halfWin; - - __global float4 *ptrJ = (__global float4 *)J; - - for (int k = 0; k < c_iters; ++k) - { - if (nextPt.x < -c_halfWin.x || nextPt.x >= cols || nextPt.y < -c_halfWin.y || nextPt.y >= rows) - { - if (tid == 0 && level == 0) - { - status[get_group_id(0)] = 0; - } - - return; - } - - float b1 = 0; - float b2 = 0; - - for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) - { - for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) - { - float4 diff = (readImage2Df_C4(ptrJ, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]) * 32.0f; - - b1 += (diff * dIdx_patch[i][j]).x + (diff * dIdx_patch[i][j]).y + (diff * dIdx_patch[i][j]).z; - b2 += (diff * dIdy_patch[i][j]).x + (diff * dIdy_patch[i][j]).y + (diff * dIdy_patch[i][j]).z; - } - } - - reduce2(b1, b2, smem1, smem2, tid); - barrier(CLK_LOCAL_MEM_FENCE); - - b1 = smem1[0]; - b2 = smem2[0]; - - float2 delta; - delta.x = A12 * b2 - A22 * b1; - delta.y = A12 * b1 - A11 * b2; - - nextPt += delta; - - //if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD) - // break; - } - - float errval = 0.0f; - - if (calcErr) - { - for (int y = get_local_id(1), i = 0; y < c_winSize_y; y += get_local_size(1), ++i) - { - for (int x = get_local_id(0), j = 0; x < c_winSize_x; x += get_local_size(0), ++j) - { - float4 diff = readImage2Df_C4(ptrJ, nextPt.x + x, nextPt.y + y, rows, cols, elemCntPerRow) - I_patch[i][j]; - - errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z); - } - } - - reduce1(errval, smem1, tid); - } - - if (tid == 0) - { - nextPt += c_halfWin; - nextPts[get_group_id(0)] = nextPt; - - if (calcErr) - { - err[get_group_id(0)] = smem1[0] / (3 * c_winSize_x * c_winSize_y); - } - } -} - -int readImage2Di_C1(__global const int *image, float2 coor, int2 size, const int elemCntPerRow) -{ - int i = clamp((int)floor(coor.x), 0, size.x - 1); - int j = clamp((int)floor(coor.y), 0, size.y - 1); - return image[mad24(j, elemCntPerRow, i)]; -} - -__kernel void lkDense_C1_D0(__global const int *I, __global const int *J, __global float *u, int uStep, __global float *v, int vStep, __global const float *prevU, int prevUStep, __global const float *prevV, int prevVStep, - const int rows, const int cols, /*__global float* err, int errStep, int cn,*/ - const int elemCntPerRow, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr) -{ - int c_halfWin_x = (c_winSize_x - 1) / 2; - int c_halfWin_y = (c_winSize_y - 1) / 2; - - const int patchWidth = get_local_size(0) + 2 * c_halfWin_x; - const int patchHeight = get_local_size(1) + 2 * c_halfWin_y; - - __local int smem[8192]; - - __local int *I_patch = smem; - __local int *dIdx_patch = I_patch + patchWidth * patchHeight; - __local int *dIdy_patch = dIdx_patch + patchWidth * patchHeight; - - const int xBase = get_group_id(0) * get_local_size(0); - const int yBase = get_group_id(1) * get_local_size(1); - int2 size = (int2)(cols, rows); - - for (int i = get_local_id(1); i < patchHeight; i += get_local_size(1)) - { - for (int j = get_local_id(0); j < patchWidth; j += get_local_size(0)) - { - float x = xBase - c_halfWin_x + j + 0.5f; - float y = yBase - c_halfWin_y + i + 0.5f; - - I_patch[i * patchWidth + j] = readImage2Di_C1(I, (float2)(x, y), size, elemCntPerRow); - - // Sharr Deriv - - dIdx_patch[i * patchWidth + j] = 3 * readImage2Di_C1(I, (float2)(x + 1, y - 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x + 1, y), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x + 1, y + 1), size, elemCntPerRow) - - (3 * readImage2Di_C1(I, (float2)(x - 1, y - 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x - 1, y), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x - 1, y + 1), size, elemCntPerRow)); - - dIdy_patch[i * patchWidth + j] = 3 * readImage2Di_C1(I, (float2)(x - 1, y + 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x, y + 1), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x + 1, y + 1), size, elemCntPerRow) - - (3 * readImage2Di_C1(I, (float2)(x - 1, y - 1), size, elemCntPerRow) + 10 * readImage2Di_C1(I, (float2)(x, y - 1), size, elemCntPerRow) + 3 * readImage2Di_C1(I, (float2)(x + 1, y - 1), size, elemCntPerRow)); - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - // extract the patch from the first image, compute covariation matrix of derivatives - - const int x = get_global_id(0); - const int y = get_global_id(1); - - if (x >= cols || y >= rows) - { - return; - } - - int A11i = 0; - int A12i = 0; - int A22i = 0; - - for (int i = 0; i < c_winSize_y; ++i) - { - for (int j = 0; j < c_winSize_x; ++j) - { - int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; - int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; - - A11i += dIdx * dIdx; - A12i += dIdx * dIdy; - A22i += dIdy * dIdy; - } - } - - float A11 = A11i; - float A12 = A12i; - float A22 = A22i; - - float D = A11 * A22 - A12 * A12; - - //if (calcErr && GET_MIN_EIGENVALS) - // (err + y * errStep)[x] = minEig; - - if (D < 1.192092896e-07f) - { - //if (calcErr) - // err(y, x) = 3.402823466e+38f; - - return; - } - - D = 1.f / D; - - A11 *= D; - A12 *= D; - A22 *= D; - - float2 nextPt; - nextPt.x = x + prevU[y / 2 * prevUStep / 4 + x / 2] * 2.0f; - nextPt.y = y + prevV[y / 2 * prevVStep / 4 + x / 2] * 2.0f; - - for (int k = 0; k < c_iters; ++k) - { - if (nextPt.x < 0 || nextPt.x >= cols || nextPt.y < 0 || nextPt.y >= rows) - { - //if (calcErr) - // err(y, x) = 3.402823466e+38f; - - return; - } - - int b1 = 0; - int b2 = 0; - - for (int i = 0; i < c_winSize_y; ++i) - { - for (int j = 0; j < c_winSize_x; ++j) - { - int iI = I_patch[(get_local_id(1) + i) * patchWidth + get_local_id(0) + j]; - int iJ = readImage2Di_C1(J, (float2)(nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f), size, elemCntPerRow); - - int diff = (iJ - iI) * 32; - - int dIdx = dIdx_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; - int dIdy = dIdy_patch[(get_local_id(1) + i) * patchWidth + (get_local_id(0) + j)]; - - b1 += diff * dIdx; - b2 += diff * dIdy; - } - } - - float2 delta; - delta.x = A12 * b2 - A22 * b1; - delta.y = A12 * b1 - A11 * b2; - - nextPt.x += delta.x; - nextPt.y += delta.y; - - if (fabs(delta.x) < 0.01f && fabs(delta.y) < 0.01f) - { - break; - } - } - - u[y * uStep / 4 + x] = nextPt.x - x; - v[y * vStep / 4 + x] = nextPt.y - y; - - if (calcErr) - { - int errval = 0; - - for (int i = 0; i < c_winSize_y; ++i) - { - for (int j = 0; j < c_winSize_x; ++j) - { - int iI = I_patch[(get_local_id(1) + i) * patchWidth + get_local_id(0) + j]; - int iJ = readImage2Di_C1(J, (float2)(nextPt.x - c_halfWin_x + j + 0.5f, nextPt.y - c_halfWin_y + i + 0.5f), size, elemCntPerRow); - - errval += abs(iJ - iI); - } - } - - //err[y * errStep / 4 + x] = static_cast(errval) / (c_winSize_x * c_winSize_y); - } -} diff --git a/modules/ocl/src/opencl/stereobp.cl b/modules/ocl/src/opencl/stereobp.cl index 1d523e7885..24bf55cb21 100644 --- a/modules/ocl/src/opencl/stereobp.cl +++ b/modules/ocl/src/opencl/stereobp.cl @@ -290,7 +290,7 @@ void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_, minimum += cmax_disc_term; - float4 sum = 0; + float4 sum = (float4)(0); prev = convert_float4(t_dst[CNDISP - 1]); for (int disp = CNDISP - 2; disp >= 0; disp--) { @@ -308,7 +308,7 @@ void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_, t_dst[CNDISP - 1] = saturate_cast4(dst_reg); sum += dst_reg; - sum /= CNDISP; + sum /= (float4)(CNDISP); #pragma unroll for(int i = 0, idx = 0; i < CNDISP; ++i, idx+=msg_disp_step) { diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp index b7ef58b1f8..c32b448ddd 100644 --- a/modules/ocl/src/precomp.hpp +++ b/modules/ocl/src/precomp.hpp @@ -49,7 +49,7 @@ #define __OPENCV_PRECOMP_H__ #if defined _MSC_VER && _MSC_VER >= 1200 -#pragma warning( disable: 4267 4324 4244 4251 4710 4711 4514 4996 ) +#pragma warning( disable: 4127 4267 4324 4244 4251 4710 4711 4514 4996 ) #endif #if defined(_WIN32) diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp index 61aae49ed2..166287ba2a 100644 --- a/modules/ocl/src/pyrlk.cpp +++ b/modules/ocl/src/pyrlk.cpp @@ -115,18 +115,16 @@ static void lkSparse_run(oclMat &I, oclMat &J, int level, /*dim3 block, */dim3 patch, Size winSize, int iters) { Context *clCxt = I.clCxt; - int elemCntPerRow = I.step / I.elemSize(); String kernelName = "lkSparse"; - bool isImageSupported = support_image2d(); - size_t localThreads[3] = { 8, isImageSupported ? 8 : 32, 1 }; - size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1}; + size_t localThreads[3] = { 8, 8, 1 }; + size_t globalThreads[3] = { 8 * ptcount, 8, 1}; int cn = I.oclchannels(); char calcErr = level==0?1:0; std::vector > args; - cl_mem ITex = isImageSupported ? bindTexture(I) : (cl_mem)I.data; - cl_mem JTex = isImageSupported ? bindTexture(J) : (cl_mem)J.data; + cl_mem ITex = bindTexture(I); + cl_mem JTex = bindTexture(J); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex )); @@ -139,8 +137,6 @@ static void lkSparse_run(oclMat &I, oclMat &J, args.push_back( std::make_pair( sizeof(cl_int), (void *)&level )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols )); - if (!isImageSupported) - args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) ); args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn )); @@ -149,23 +145,27 @@ static void lkSparse_run(oclMat &I, oclMat &J, args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr )); - if(isImageSupported) + bool is_cpu = isCpuDevice(); + if (is_cpu) + { + openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU"); + } + else { std::stringstream idxStr; - idxStr << kernelName.c_str() << "_C" << I.oclchannels() << "_D" << I.depth(); - cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str().c_str()); + idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth(); + cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str()); int wave_size = (int)queryWaveFrontSize(kernel); openCLSafeCall(clReleaseKernel(kernel)); static char opt[32] = {0}; - sprintf(opt, " -D WAVE_SIZE=%d", wave_size); + sprintf(opt, "-D WAVE_SIZE=%d", wave_size); - openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), opt, CLFLUSH); - releaseTexture(ITex); - releaseTexture(JTex); + openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, + args, I.oclchannels(), I.depth(), opt); } - else - openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); + releaseTexture(ITex); + releaseTexture(JTex); } void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err) @@ -247,37 +247,19 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v, oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters) { Context *clCxt = I.clCxt; - bool isImageSupported = support_image2d(); - int elemCntPerRow = I.step / I.elemSize(); String kernelName = "lkDense"; size_t localThreads[3] = { 16, 16, 1 }; size_t globalThreads[3] = { I.cols, I.rows, 1}; - bool calcErr; - if (err) - { - calcErr = true; - } - else - { - calcErr = false; - } + cl_char calcErr = err ? 1 : 0; cl_mem ITex; cl_mem JTex; - if (isImageSupported) - { - ITex = bindTexture(I); - JTex = bindTexture(J); - } - else - { - ITex = (cl_mem)I.data; - JTex = (cl_mem)J.data; - } + ITex = bindTexture(I); + JTex = bindTexture(J); std::vector > args; @@ -294,24 +276,15 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v, args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevV.step )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols )); - - if (!isImageSupported) - args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) ); - args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr )); - if (isImageSupported) - { - openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); + openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth()); - releaseTexture(ITex); - releaseTexture(JTex); - } - else - openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); + releaseTexture(ITex); + releaseTexture(JTex); } void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err) diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp index d282908996..87006f5f24 100644 --- a/modules/ocl/src/split_merge.cpp +++ b/modules/ocl/src/split_merge.cpp @@ -58,7 +58,7 @@ namespace cv { if(!mat_dst.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_dst.type() == CV_64F) { - CV_Error(Error::GpuNotSupported, "Selected device don't support double\r\n"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } @@ -153,7 +153,7 @@ namespace cv if(!mat_src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_src.type() == CV_64F) { - CV_Error(Error::GpuNotSupported, "Selected device don't support double\r\n"); + CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); return; } diff --git a/modules/ocl/src/svm.cpp b/modules/ocl/src/svm.cpp index fccca0c423..becd2d7af9 100644 --- a/modules/ocl/src/svm.cpp +++ b/modules/ocl/src/svm.cpp @@ -45,6 +45,11 @@ #include "precomp.hpp" #include "opencl_kernels.hpp" +// TODO Remove this after HAVE_CLAMDBLAS eliminating +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif + using namespace cv; using namespace ocl; @@ -75,6 +80,7 @@ public: void calc_non_rbf_base( int vec_count, const int row_idx, Qfloat* results, Mat& src); void calc_rbf( int vec_count, const int row_idx, Qfloat* results, Mat& src); }; + class CvSVMSolver_ocl: public CvSVMSolver { public: @@ -90,13 +96,16 @@ typedef struct CvSparseVecElem32f int idx; float val; } CvSparseVecElem32f; + static int icvCmpSparseVecElems( const void* a, const void* b ) { return ((CvSparseVecElem32f*)a)->idx - ((CvSparseVecElem32f*)b)->idx; } + void cvPreparePredictData( const CvArr* sample, int dims_all, const CvMat* comp_idx, int class_count, const CvMat* prob, float** row_sample, int as_sparse CV_DEFAULT(0) ); + void cvPreparePredictData( const CvArr* _sample, int dims_all, const CvMat* comp_idx, int class_count, const CvMat* prob, float** _row_sample, @@ -135,9 +144,7 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all, } if( d == 1 ) - { sizes[1] = 1; - } if( sizes[0] + sizes[1] - 1 != dims_all ) CV_ERROR( CV_StsUnmatchedSizes, @@ -184,25 +191,19 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all, sample_step = CV_IS_MAT_CONT(sample->type) ? 1 : sample->step / sizeof(row_sample[0]); if( !comp_idx && CV_IS_MAT_CONT(sample->type) && !as_sparse ) - { *_row_sample = sample_data; - } else { CV_CALL( row_sample = (float*)cvAlloc( vec_size )); if( !comp_idx ) for( i = 0; i < dims_selected; i++ ) - { row_sample[i] = sample_data[sample_step * i]; - } else { int* comp = comp_idx->data.i; for( i = 0; i < dims_selected; i++ ) - { row_sample[i] = sample_data[sample_step * comp[i]]; - } } *_row_sample = row_sample; @@ -236,9 +237,7 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all, CV_CALL( inverse_comp_idx = (int*)cvAlloc( dims_all * sizeof(int) )); memset( inverse_comp_idx, -1, dims_all * sizeof(int) ); for( i = 0; i < dims_selected; i++ ) - { inverse_comp_idx[comp_idx->data.i[i]] = i; - } } if( !as_sparse ) @@ -252,9 +251,7 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all, { idx = inverse_comp_idx[idx]; if( idx < 0 ) - { continue; - } } row_sample[idx] = *(float*)CV_NODE_VAL( sparse, node ); } @@ -270,9 +267,7 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all, { idx = inverse_comp_idx[idx]; if( idx < 0 ) - { continue; - } } ptr->idx = idx; ptr->val = *(float*)CV_NODE_VAL( sparse, node ); @@ -290,9 +285,7 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all, __CV_END__; if( inverse_comp_idx ) - { cvFree( &inverse_comp_idx ); - } if( cvGetErrStatus() < 0 && _row_sample ) { @@ -300,6 +293,7 @@ void cvPreparePredictData( const CvArr* _sample, int dims_all, *_row_sample = 0; } } + float CvSVM_OCL::predict( const int row_index, int row_len, Mat& src, bool returnDFVal ) const { assert( kernel ); @@ -323,9 +317,7 @@ float CvSVM_OCL::predict( const int row_index, int row_len, Mat& src, bool retur ((CvSVMKernel_ocl*)kernel)->calc( sv_count, row_index, buffer, src); for( i = 0; i < sv_count; i++ ) - { sum += buffer[i] * df->alpha[i]; - } result = params.svm_type == ONE_CLASS ? (float)(sum > 0) : (float)sum; } @@ -341,27 +333,20 @@ float CvSVM_OCL::predict( const int row_index, int row_len, Mat& src, bool retur double sum = 0.; for( i = 0; i < class_count; i++ ) - { for( j = i + 1; j < class_count; j++, df++ ) { sum = -df->rho; int sv_count = df->sv_count; for( k = 0; k < sv_count; k++ ) - { sum += df->alpha[k] * buffer[df->sv_index[k]]; - } vote[sum > 0 ? i : j]++; } - } for( i = 1, k = 0; i < class_count; i++ ) - { if( vote[i] > vote[k] ) - { k = i; - } - } + result = returnDFVal && class_count == 2 ? (float)sum : (float)(class_labels->data.i[k]); } else @@ -370,11 +355,13 @@ float CvSVM_OCL::predict( const int row_index, int row_len, Mat& src, bool retur return result; } + float CvSVM_OCL::predict( const Mat& _sample, bool returnDFVal ) const { CvMat sample = _sample; return CvSVM::predict(&sample, returnDFVal); } + float CvSVM_OCL::predict( const int row_index, Mat& src, bool returnDFVal) const { float result = 0; @@ -383,6 +370,7 @@ float CvSVM_OCL::predict( const int row_index, Mat& src, bool returnDFVal) const return result; } + #undef get_C #define get_C(i) (C[y[i]>0]) #undef is_upper_bound @@ -397,12 +385,14 @@ CvSVMSolver_ocl::CvSVMSolver_ocl(const CvSVMParams* _params) { params = _params; } + float* CvSVMSolver_ocl::get_row( int i, float* dst, Mat& src ) { bool existed = false; float* row = get_row_base( i, &existed, src); return (this->*get_row_func)( i, row, dst, existed ); } + float* CvSVMSolver_ocl::get_row_base( int i, bool* _existed, Mat& src ) { int i1 = i < sample_count ? i : i - sample_count; @@ -434,19 +424,16 @@ float* CvSVMSolver_ocl::get_row_base( int i, bool* _existed, Mat& src ) row->prev->next = row->next->prev = row; if( !existed ) - { ((CvSVMKernel_ocl*)kernel)->calc( sample_count, i1, row->data, src); - } if( _existed ) - { *_existed = existed; - } return row->data; } #ifndef HAVE_CLAMDBLAS + static void matmul_sigmod(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1) { Context *clCxt = Context::getContext(); @@ -486,6 +473,7 @@ static void matmul_sigmod(oclMat & src, oclMat & src2, oclMat & dst, int src_row } openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1); } + static void matmul_poly(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1, double degree1, bool flag) { Context *clCxt = Context::getContext(); @@ -534,6 +522,7 @@ static void matmul_poly(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, } openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1, build_options); } + static void matmul_linear(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1) { Context *clCxt = Context::getContext(); @@ -573,6 +562,7 @@ static void matmul_linear(oclMat & src, oclMat & src2, oclMat & dst, int src_row } openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1); } + #endif // #ifndef HAVE_CLAMDBLAS static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, int src2_cols, int var_count, double gamma1, bool flag) @@ -594,9 +584,8 @@ static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, in char build_options[50]; if(flag) - { sprintf(build_options, "-D ADDEXP"); - } + std::vector< std::pair > args; args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step)); @@ -614,9 +603,7 @@ static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, in args.push_back(std::make_pair(sizeof(cl_float), (void* )&gamma)); } else - { args.push_back(std::make_pair(sizeof(cl_double), (void* )&gamma1)); - } openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1, build_options); } @@ -649,14 +636,12 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const CV_CALL( cvPreparePredictData(&sample, var_all, var_idx, class_count, 0, &row_sample )); for(int j = 0; j < var_count; ++j) - { src_temp.at(i, j) = row_sample[j]; - } __CV_END__; } Mat dst1; - double alpha1 = 0.0, beta1 = 0.0, gamma1 = 0.0, degree1 = 0.0; + double alpha1 = 0.0, beta1 = 0.0, gamma1 = 0.0; if(params.kernel_type == CvSVM::LINEAR) { alpha1 = 1; @@ -666,7 +651,6 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const { alpha1 = params.gamma; beta1 = params.coef0; - degree1 = params.degree; } if(params.kernel_type == CvSVM::SIGMOID) { @@ -674,27 +658,22 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const beta1 = - 2 * params.coef0; } if(params.kernel_type == CvSVM::RBF) - { gamma1 = - params.gamma; - } Mat sv_temp = Mat(sv_total, var_count, CV_32FC1, Scalar::all(0)); for(int i = 0; i < sv_total; ++i) - { for(int j = 0; j < var_count; ++j) - { sv_temp.at(i, j) = sv[i][j]; - } - } + oclMat src(sample_count, var_count, CV_32FC1, Scalar::all(0)); oclMat sv_; src.upload(src_temp); oclMat dst; -#if defined HAVE_CLAMDBLAS +#ifdef HAVE_CLAMDBLAS dst = oclMat(sample_count, sv_total, CV_32FC1); oclMat src3(sample_count, sv_total, CV_32FC1, Scalar::all(1)); @@ -707,15 +686,18 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const } #else + // TODO fix it + CV_Error(Error::StsNotImplemented, "This part of code contains mistakes. Install AMD BLAS in order to get a correct result or use CPU version of SVM"); + + double degree1 = 0.0; + if (params.kernel_type == CvSVM::POLY) + degree1 = params.degree; if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) - { dst = oclMat(sample_count, sv_total, CV_32FC1); - } else - { dst = oclMat(sample_count, sv_total, CV_64FC1); - } + if(params.kernel_type == CvSVM::LINEAR) { sv_.upload(sv_temp); @@ -731,13 +713,9 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const { sv_.upload(sv_temp); if(sample_count > 0) - { matmul_poly(src, sv_, dst, sample_count, sv_total, var_count, alpha1, beta1, degree1, true); - } else - { matmul_poly(src, sv_, dst, sample_count, sv_total, var_count, alpha1, beta1, degree1, false); - } } #endif @@ -745,21 +723,14 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const { sv_.upload(sv_temp); if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) - { dst = oclMat(sample_count, sv_total, CV_32FC1); - } else - { dst = oclMat(sample_count, sv_total, CV_64FC1); - } + if(sample_count > 0) - { matmul_rbf(src, sv_, dst, sample_count, sv_total, var_count, gamma1, true); - } else - { matmul_rbf(src, sv_, dst, sample_count, sv_total, var_count, gamma1, false); - } } dst.download(dst1); @@ -768,22 +739,20 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const { int r = (int)this->predict(i, dst1); if (results) - { results->data.fl[i] = (float)r; - } if (i == 0) - { result = (float)r; - } } return result; } + void CvSVM_OCL::predict( cv::InputArray _samples, cv::OutputArray _results ) const { _results.create(_samples.size().height, 1, CV_32F); CvMat samples = _samples.getMat(), results = _results.getMat(); predict(&samples, &results); } + bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) { int iter = 0; @@ -800,7 +769,7 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) } } Mat dst1; - double alpha1 = 0.0, beta1 = 0.0, gamma1 = 0.0, degree1 = 0.0; + double alpha1 = 0.0, beta1 = 0.0, gamma1 = 0.0; if(params->kernel_type == CvSVM::LINEAR) { alpha1 = 1; @@ -810,7 +779,6 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) { alpha1 = params->gamma; beta1 = params->coef0; - degree1 = params->degree; } if(params->kernel_type == CvSVM::SIGMOID) { @@ -834,7 +802,7 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) src.upload(src1); oclMat dst; -#if defined HAVE_CLAMDBLAS +#ifdef HAVE_CLAMDBLAS dst = oclMat(sample_count, sample_count, CV_32FC1); oclMat src3(sample_count, sample_count, CV_32FC1, Scalar::all(1)); @@ -845,14 +813,18 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) } #else + // TODO fix it + CV_Error(Error::StsNotImplemented, "This part of code contains mistakes. Install AMD BLAS in order to get a correct result or use CPU version of SVM"); + + double degree1 = 0.0; + if(params->kernel_type == CvSVM::POLY) + degree1 = params->degree; + if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) - { dst = oclMat(sample_count, sample_count, CV_32FC1); - } else - { dst = oclMat(sample_count, sample_count, CV_64FC1); - } + if(params->kernel_type == CvSVM::LINEAR ) { src_e = src; @@ -868,13 +840,9 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) { src_e = src; if(sample_count > 0) - { matmul_poly(src, src_e, dst, sample_count, sample_count, var_count, alpha1, beta1, degree1, true); - } else - { matmul_poly(src, src_e, dst, sample_count, sample_count, var_count, alpha1, beta1, degree1, false); - } } #endif @@ -883,21 +851,14 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) { src_e = src; if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) - { dst = oclMat(sample_count, sample_count, CV_32FC1); - } else - { dst = oclMat(sample_count, sample_count, CV_64FC1); - } + if(sample_count > 0) - { matmul_rbf(src, src_e, dst, sample_count, sample_count, var_count, gamma1, true); - } else - { matmul_rbf(src, src_e, dst, sample_count, sample_count, var_count, gamma1, false); - } } dst.download(dst1); for( i = 0; i < alpha_count; i++ ) @@ -908,9 +869,7 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) double alpha_i = alpha[i]; for( j = 0; j < alpha_count; j++ ) - { G[j] += alpha_i * Q_i[j]; - } } } @@ -926,14 +885,10 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) for( i = 0; i < alpha_count; i++ ) { if( fabs(G[i]) > 1e+300 ) - { return false; - } if( fabs(alpha[i]) > 1e16 ) - { return false; - } } #endif @@ -1021,9 +976,7 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) delta_alpha_j = alpha_j - old_alpha_j; for( k = 0; k < alpha_count; k++ ) - { G[k] += Q_i[k] * delta_alpha_i + Q_j[k] * delta_alpha_j; - } } // calculate rho @@ -1031,9 +984,7 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) // calculate objective value for( i = 0, si.obj = 0; i < alpha_count; i++ ) - { si.obj += alpha[i] * (G[i] + b[i]); - } si.obj *= 0.5; @@ -1053,14 +1004,11 @@ void CvSVMKernel_ocl::calc( int vcount, const int row_idx, Qfloat* results, Mat& const Qfloat max_val = (Qfloat)(FLT_MAX * 1e-3); int j; for( j = 0; j < vcount; j++ ) - { if( results[j] > max_val ) - { results[j] = max_val; - } - } // FIXIT #endif } + bool CvSVMKernel_ocl::create( const CvSVMParams* _params, Calc_ocl _calc_func, Calc _calc_func1 ) { clear(); @@ -1084,9 +1032,10 @@ CvSVMKernel_ocl::CvSVMKernel_ocl(const CvSVMParams* params, CvSVMKernel_ocl::Cal CvSVMKernel::clear(); CvSVMKernel_ocl::create( params, _calc_func, _calc_func1 ); } + void CvSVMKernel_ocl::calc_non_rbf_base( int vcount, const int row_idx, Qfloat* results, Mat& src) { -#if defined HAVE_CLAMDBLAS +#ifdef HAVE_CLAMDBLAS for(int i = 0; i < vcount; i++) { @@ -1109,23 +1058,17 @@ void CvSVMKernel_ocl::calc_non_rbf_base( int vcount, const int row_idx, Qfloat* } #endif } + void CvSVMKernel_ocl::calc_rbf( int vcount, const int row_idx, Qfloat* results, Mat& src) { if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) - { for(int m = 0; m < vcount; m++) - { results[m] = (Qfloat) * src.ptr(row_idx, m); - } - } else - { for(int m = 0; m < vcount; m++) - { results[m] = (Qfloat) * src.ptr(row_idx, m); - } - } } + void CvSVMKernel_ocl::calc_linear( int vcount, const int row_idx, Qfloat* results, Mat& src ) { calc_non_rbf_base( vcount, row_idx, results, src); @@ -1133,16 +1076,13 @@ void CvSVMKernel_ocl::calc_linear( int vcount, const int row_idx, Qfloat* result void CvSVMKernel_ocl::calc_poly( int vcount, const int row_idx, Qfloat* results, Mat& src) { - calc_non_rbf_base( vcount, row_idx, results, src); //FIXIT #if defined HAVE_CLAMDBLAS CvMat R = cvMat( 1, vcount, QFLOAT_TYPE, results ); if( vcount > 0 ) - { cvPow( &R, &R, params->degree ); - } //FIXIT #endif } @@ -1157,16 +1097,13 @@ void CvSVMKernel_ocl::calc_sigmoid( int vcount, const int row_idx, Qfloat* resul Qfloat t = results[j]; double e = ::exp(-fabs(t)); if( t > 0 ) - { results[j] = (Qfloat)((1. - e) / (1. + e)); - } else - { results[j] = (Qfloat)((e - 1.) / (e + 1.)); - } } //FIXIT #endif } + CvSVM_OCL::CvSVM_OCL() { CvSVM(); @@ -1191,6 +1128,7 @@ void CvSVM_OCL::create_kernel() { kernel = new CvSVMKernel_ocl(¶ms, 0, 0); } + void CvSVM_OCL::create_solver( ) { solver = new CvSVMSolver_ocl(¶ms); diff --git a/modules/ocl/src/tvl1flow.cpp b/modules/ocl/src/tvl1flow.cpp index 4e0d0ddda5..10c9e5f7f3 100644 --- a/modules/ocl/src/tvl1flow.cpp +++ b/modules/ocl/src/tvl1flow.cpp @@ -411,9 +411,6 @@ void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho) { Context* clCxt = I0.clCxt; - const bool isImgSupported = support_image2d(clCxt); - - CV_Assert(isImgSupported); int u1ElementSize = u1.elemSize(); int u1Step = u1.step/u1ElementSize; diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp index 9ff0e1d1ee..6a17d3d0b4 100644 --- a/modules/ocl/test/main.cpp +++ b/modules/ocl/test/main.cpp @@ -10,7 +10,8 @@ // Intel License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -41,105 +42,17 @@ #include "test_precomp.hpp" -#ifdef HAVE_OPENCL +#define DUMP_INFO_STDOUT(propertyDisplayName, propertyValue) \ + do { \ + std::cout << (propertyDisplayName) << ": " << (propertyValue) << std::endl; \ + } while (false) -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using std::cout; -using std::endl; +#define DUMP_INFO_XML(propertyXMLName, propertyValue) \ + do { \ + std::stringstream ss; ss << propertyValue; \ + ::testing::Test::RecordProperty((propertyXMLName), ss.str()); \ + } while (false) -void print_info() -{ - printf("\n"); -#if defined _WIN32 -# if defined _WIN64 - puts("OS: Windows 64"); -# else - puts("OS: Windows 32"); -# endif -#elif defined linux -# if defined _LP64 - puts("OS: Linux 64"); -# else - puts("OS: Linux 32"); -# endif -#elif defined __APPLE__ -# if defined _LP64 - puts("OS: Apple 64"); -# else - puts("OS: Apple 32"); -# endif -#endif +#include "opencv2/ocl/private/opencl_dumpinfo.hpp" -} -int main(int argc, char **argv) -{ - TS::ptr()->init("."); - InitGoogleTest(&argc, argv); - const char *keys = - "{ h | false | print help message }" - "{ t | gpu | set device type:i.e. -t=cpu or gpu}" - "{ p | -1 | set platform id i.e. -p=0}" - "{ d | 0 | set device id i.e. -d=0}"; - - if (getenv("OPENCV_OPENCL_DEVICE") == NULL) // TODO Remove this after buildbot updates - { - CommandLineParser cmd(argc, argv, keys); - if (cmd.has("help")) - { - cout << "Available options besides google test option:" << endl; - cmd.printMessage(); - return 0; - } - string type = cmd.get("type"); - int pid = cmd.get("platform"); - int device = cmd.get("device"); - - print_info(); - int flag = CVCL_DEVICE_TYPE_GPU; - if(type == "cpu") - { - flag = CVCL_DEVICE_TYPE_CPU; - } - - cv::ocl::PlatformsInfo platformsInfo; - cv::ocl::getOpenCLPlatforms(platformsInfo); - if (pid >= (int)platformsInfo.size()) - { - std::cout << "platform is invalid\n"; - return 1; - } - - cv::ocl::DevicesInfo devicesInfo; - int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); - if (device < 0 || device >= devnums) - { - std::cout << "device/platform invalid\n"; - return 1; - } - - cv::ocl::setDevice(devicesInfo[device]); - } - - const DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo(); - - cout << "Device type: " << (deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU ? - "CPU" : - (deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown")) << endl - << "Platform name: " << deviceInfo.platform->platformName << endl - << "Device name: " << deviceInfo.deviceName << endl; - return RUN_ALL_TESTS(); -} - -#else // DON'T HAVE_OPENCL - -int main() -{ - printf("OpenCV was built without OpenCL support\n"); - return 0; -} - - -#endif // HAVE_OPENCL +CV_TEST_MAIN(".", dumpOpenCLDevice()) diff --git a/modules/ocl/test/test_api.cpp b/modules/ocl/test/test_api.cpp new file mode 100644 index 0000000000..0b59fc6a55 --- /dev/null +++ b/modules/ocl/test/test_api.cpp @@ -0,0 +1,80 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" // for OpenCL types: cl_mem + +TEST(TestAPI, openCLExecuteKernelInterop) +{ + cv::RNG rng; + Size sz(10000, 1); + cv::Mat cpuMat = cvtest::randomMat(rng, sz, CV_32FC4, -10, 10, false); + + cv::ocl::oclMat gpuMat(cpuMat); + cv::ocl::oclMat gpuMatDst(sz, CV_32FC4); + + const char* kernelStr = +"__kernel void test_kernel(__global float4* src, __global float4* dst) {\n" +" int x = get_global_id(0);\n" +" dst[x] = src[x];\n" +"}\n"; + + cv::ocl::ProgramSource program("test_interop", kernelStr); + + using namespace std; + vector > args; + args.push_back( make_pair( sizeof(cl_mem), (void *) &gpuMat.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *) &gpuMatDst.data )); + + size_t globalThreads[3] = { sz.width, 1, 1 }; + cv::ocl::openCLExecuteKernelInterop( + gpuMat.clCxt, + program, + "test_kernel", + globalThreads, NULL, args, + -1, -1, + ""); + + cv::Mat dst; + gpuMatDst.download(dst); + + EXPECT_LE(checkNorm(cpuMat, dst), 1e-3); +} diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index f2f13ec41e..1adb615963 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -80,12 +80,14 @@ PARAM_TEST_CASE(Lut, int, int, bool, bool) cv::Mat dst_roi; // ocl dst mat for testing + cv::ocl::oclMat gsrc_whole; + cv::ocl::oclMat glut_whole; cv::ocl::oclMat gdst_whole; // ocl mat with roi - cv::ocl::oclMat gsrc; - cv::ocl::oclMat glut; - cv::ocl::oclMat gdst; + cv::ocl::oclMat gsrc_roi; + cv::ocl::oclMat glut_roi; + cv::ocl::oclMat gdst_roi; virtual void SetUp() { @@ -93,77 +95,45 @@ PARAM_TEST_CASE(Lut, int, int, bool, bool) cn = GET_PARAM(1); same_cn = GET_PARAM(2); use_roi = GET_PARAM(3); - - const int src_type = CV_MAKE_TYPE(CV_8U, cn); - const int lut_type = CV_MAKE_TYPE(lut_depth, same_cn ? cn : 1); - const int dst_type = CV_MAKE_TYPE(lut_depth, cn); - - cv::RNG &rng = TS::ptr()->get_rng(); - - src = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), src_type, 0, 256, false); - lut = randomMat(rng, use_roi ? randomSize(260, 300) : Size(256, 1), lut_type, 5, 16, false); - dst = randomMat(rng, use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), dst_type, 5, 16, false); } void random_roi() { - // set up roi - int roicols, roirows; - int srcx, srcy; - int lutx, luty; - int dstx, dsty; - - if (use_roi) - { - // randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); - - roicols = rng.uniform(1, MIN_VALUE); - roirows = rng.uniform(1, MIN_VALUE); + const int src_type = CV_MAKE_TYPE(CV_8U, cn); + const int lut_type = CV_MAKE_TYPE(lut_depth, same_cn ? cn : 1); + const int dst_type = CV_MAKE_TYPE(lut_depth, cn); - srcx = rng.uniform(0, src.cols - roicols); - srcy = rng.uniform(0, src.rows - roirows); - lutx = rng.uniform(0, lut.cols - 256); - luty = rng.uniform(0, lut.rows - 1); + Size roiSize = randomSize(1, MAX_VALUE); + Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, src_type, 0, 256); - dstx = rng.uniform(0, dst.cols - roicols); - dsty = rng.uniform(0, dst.rows - roirows); - } - else - { - roicols = src.cols; - roirows = src.rows; - srcx = srcy = 0; - lutx = luty = 0; - dstx = dsty = 0; - } + Size lutRoiSize = Size(256, 1); + Border lutBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(lut, lut_roi, lutRoiSize, lutBorder, lut_type, 5, 16); - src_roi = src(Rect(srcx, srcy, roicols, roirows)); - lut_roi = lut(Rect(lutx, luty, 256, 1)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, roiSize, dstBorder, dst_type, 5, 16); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gsrc = src_roi; - glut = lut_roi; + generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder); + generateOclMat(glut_whole, glut_roi, lut, lutRoiSize, lutBorder); + generateOclMat(gdst_whole, gdst_roi, dst, roiSize, dstBorder); } void Near(double threshold = 0.) { EXPECT_MAT_NEAR(dst, Mat(gdst_whole), threshold); - EXPECT_MAT_NEAR(dst_roi, Mat(gdst), threshold); + EXPECT_MAT_NEAR(dst_roi, Mat(gdst_roi), threshold); } }; -TEST_P(Lut, Mat) +OCL_TEST_P(Lut, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::LUT(src_roi, lut_roi, dst_roi); - cv::ocl::LUT(gsrc, glut, gdst); + cv::ocl::LUT(gsrc_roi, glut_roi, gdst_roi); Near(); } @@ -183,50 +153,34 @@ PARAM_TEST_CASE(ArithmTestBase, int, int, bool) cv::Mat src2; cv::Mat mask; cv::Mat dst1; - cv::Mat dst2; // for two outputs - - // set up roi - int roicols, roirows; - int src1x, src1y; - int src2x, src2y; - int dst1x, dst1y; - int dst2x, dst2y; - int maskx, masky; + cv::Mat dst2; // src mat with roi cv::Mat src1_roi; cv::Mat src2_roi; cv::Mat mask_roi; cv::Mat dst1_roi; - cv::Mat dst2_roi; // for two outputs + cv::Mat dst2_roi; // ocl dst mat for testing + cv::ocl::oclMat gsrc1_whole; + cv::ocl::oclMat gsrc2_whole; cv::ocl::oclMat gdst1_whole; - cv::ocl::oclMat gdst2_whole; // for two outputs + cv::ocl::oclMat gdst2_whole; + cv::ocl::oclMat gmask_whole; // ocl mat with roi - cv::ocl::oclMat gsrc1; - cv::ocl::oclMat gsrc2; - cv::ocl::oclMat gdst1; - cv::ocl::oclMat gdst2; // for two outputs - cv::ocl::oclMat gmask; + cv::ocl::oclMat gsrc1_roi; + cv::ocl::oclMat gsrc2_roi; + cv::ocl::oclMat gdst1_roi; + cv::ocl::oclMat gdst2_roi; + cv::ocl::oclMat gmask_roi; virtual void SetUp() { depth = GET_PARAM(0); cn = GET_PARAM(1); use_roi = GET_PARAM(2); - const int type = CV_MAKE_TYPE(depth, cn); - - cv::RNG &rng = TS::ptr()->get_rng(); - - src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 2, 11, false); - src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -1540, 1740, false); - dst1 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); - dst2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); - mask = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), CV_8UC1, 0, 2, false); - - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); val = cv::Scalar(rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0)); @@ -234,65 +188,43 @@ PARAM_TEST_CASE(ArithmTestBase, int, int, bool) void random_roi() { - if (use_roi) - { - // randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); - - roicols = rng.uniform(1, MIN_VALUE); - roirows = rng.uniform(1, MIN_VALUE); + const int type = CV_MAKE_TYPE(depth, cn); - src1x = rng.uniform(0, src1.cols - roicols); - src1y = rng.uniform(0, src1.rows - roirows); - src2x = rng.uniform(0, src2.cols - roicols); - src2y = rng.uniform(0, src2.rows - roirows); + Size roiSize = randomSize(1, MAX_VALUE); + Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src1, src1_roi, roiSize, srcBorder, type, 2, 11); - dst1x = rng.uniform(0, dst1.cols - roicols); - dst1y = rng.uniform(0, dst1.rows - roirows); - dst2x = rng.uniform(0, dst2.cols - roicols); - dst2y = rng.uniform(0, dst2.rows - roirows); + Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src2, src2_roi, roiSize, src2Border, type, -1540, 1740); - maskx = rng.uniform(0, mask.cols - roicols); - masky = rng.uniform(0, mask.rows - roirows); - } - else - { - roicols = src1.cols; - roirows = src1.rows; - src1x = src1y = 0; - src2x = src2y = 0; - dst1x = dst1y = 0; - dst2x = dst2y = 0; - maskx = masky = 0; - } + Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst1, dst1_roi, roiSize, dst1Border, type, 5, 16); - src1_roi = src1(Rect(src1x, src1y, roicols, roirows)); - src2_roi = src2(Rect(src2x, src2y, roicols, roirows)); - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows)); - dst2_roi = dst2(Rect(dst2x, dst2y, roicols, roirows)); + Border dst2Border = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst2, dst2_roi, roiSize, dst2Border, type, 5, 16); - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows)); + Border maskBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(mask, mask_roi, roiSize, maskBorder, CV_8UC1, 0, 2); + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - gdst2_whole = dst2; - gdst2 = gdst2_whole(Rect(dst2x, dst2y, roicols, roirows)); - gsrc1 = src1_roi; - gsrc2 = src2_roi; - gmask = mask_roi; + generateOclMat(gsrc1_whole, gsrc1_roi, src1, roiSize, srcBorder); + generateOclMat(gsrc2_whole, gsrc2_roi, src2, roiSize, src2Border); + generateOclMat(gdst1_whole, gdst1_roi, dst1, roiSize, dst1Border); + generateOclMat(gdst2_whole, gdst2_roi, dst2, roiSize, dst2Border); + generateOclMat(gmask_whole, gmask_roi, mask, roiSize, maskBorder); } void Near(double threshold = 0.) { EXPECT_MAT_NEAR(dst1, Mat(gdst1_whole), threshold); - EXPECT_MAT_NEAR(dst1_roi, Mat(gdst1), threshold); + EXPECT_MAT_NEAR(dst1_roi, Mat(gdst1_roi), threshold); } void Near1(double threshold = 0.) { EXPECT_MAT_NEAR(dst2, Mat(gdst2_whole), threshold); - EXPECT_MAT_NEAR(dst2_roi, Mat(gdst2), threshold); + EXPECT_MAT_NEAR(dst2_roi, Mat(gdst2_roi), threshold); } }; @@ -300,14 +232,14 @@ PARAM_TEST_CASE(ArithmTestBase, int, int, bool) typedef ArithmTestBase Exp; -TEST_P(Exp, Mat) +OCL_TEST_P(Exp, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::exp(src1_roi, dst1_roi); - cv::ocl::exp(gsrc1, gdst1); + cv::ocl::exp(gsrc1_roi, gdst1_roi); Near(2); } @@ -317,14 +249,14 @@ TEST_P(Exp, Mat) typedef ArithmTestBase Log; -TEST_P(Log, Mat) +OCL_TEST_P(Log, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::log(src1_roi, dst1_roi); - cv::ocl::log(gsrc1, gdst1); + cv::ocl::log(gsrc1_roi, gdst1_roi); Near(1); } } @@ -333,50 +265,50 @@ TEST_P(Log, Mat) typedef ArithmTestBase Add; -TEST_P(Add, Mat) +OCL_TEST_P(Add, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::add(src1_roi, src2_roi, dst1_roi); - cv::ocl::add(gsrc1, gsrc2, gdst1); + cv::ocl::add(gsrc1_roi, gsrc2_roi, gdst1_roi); Near(0); } } -TEST_P(Add, Mat_Mask) +OCL_TEST_P(Add, Mat_Mask) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::add(src1_roi, src2_roi, dst1_roi, mask_roi); - cv::ocl::add(gsrc1, gsrc2, gdst1, gmask); + cv::ocl::add(gsrc1_roi, gsrc2_roi, gdst1_roi, gmask_roi); Near(0); } } -TEST_P(Add, Scalar) +OCL_TEST_P(Add, Scalar) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::add(src1_roi, val, dst1_roi); - cv::ocl::add(gsrc1, val, gdst1); + cv::ocl::add(gsrc1_roi, val, gdst1_roi); Near(1e-5); } } -TEST_P(Add, Scalar_Mask) +OCL_TEST_P(Add, Scalar_Mask) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::add(src1_roi, val, dst1_roi, mask_roi); - cv::ocl::add(gsrc1, val, gdst1, gmask); + cv::ocl::add(gsrc1_roi, val, gdst1_roi, gmask_roi); Near(1e-5); } } @@ -385,52 +317,52 @@ TEST_P(Add, Scalar_Mask) typedef ArithmTestBase Sub; -TEST_P(Sub, Mat) +OCL_TEST_P(Sub, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::subtract(src1_roi, src2_roi, dst1_roi); - cv::ocl::subtract(gsrc1, gsrc2, gdst1); + cv::ocl::subtract(gsrc1_roi, gsrc2_roi, gdst1_roi); Near(0); } } -TEST_P(Sub, Mat_Mask) +OCL_TEST_P(Sub, Mat_Mask) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::subtract(src1_roi, src2_roi, dst1_roi, mask_roi); - cv::ocl::subtract(gsrc1, gsrc2, gdst1, gmask); + cv::ocl::subtract(gsrc1_roi, gsrc2_roi, gdst1_roi, gmask_roi); Near(0); } } -TEST_P(Sub, Scalar) +OCL_TEST_P(Sub, Scalar) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::subtract(src1_roi, val, dst1_roi); - cv::ocl::subtract(gsrc1, val, gdst1); + cv::ocl::subtract(gsrc1_roi, val, gdst1_roi); Near(1e-5); } } -TEST_P(Sub, Scalar_Mask) +OCL_TEST_P(Sub, Scalar_Mask) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::subtract(src1_roi, val, dst1_roi, mask_roi); - cv::ocl::subtract(gsrc1, val, gdst1, gmask); + cv::ocl::subtract(gsrc1_roi, val, gdst1_roi, gmask_roi); Near(1e-5); } } @@ -439,41 +371,41 @@ TEST_P(Sub, Scalar_Mask) typedef ArithmTestBase Mul; -TEST_P(Mul, Mat) +OCL_TEST_P(Mul, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::multiply(src1_roi, src2_roi, dst1_roi); - cv::ocl::multiply(gsrc1, gsrc2, gdst1); + cv::ocl::multiply(gsrc1_roi, gsrc2_roi, gdst1_roi); Near(0); } } -TEST_P(Mul, Scalar) +OCL_TEST_P(Mul, Scalar) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::multiply(val[0], src1_roi, dst1_roi); - cv::ocl::multiply(val[0], gsrc1, gdst1); + cv::ocl::multiply(val[0], gsrc1_roi, gdst1_roi); - Near(gdst1.depth() >= CV_32F ? 1e-3 : 1); + Near(gdst1_roi.depth() >= CV_32F ? 1e-3 : 1); } } -TEST_P(Mul, Mat_Scalar) +OCL_TEST_P(Mul, Mat_Scalar) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::multiply(src1_roi, src2_roi, dst1_roi, val[0]); - cv::ocl::multiply(gsrc1, gsrc2, gdst1, val[0]); + cv::ocl::multiply(gsrc1_roi, gsrc2_roi, gdst1_roi, val[0]); - Near(gdst1.depth() >= CV_32F ? 1e-3 : 1); + Near(gdst1_roi.depth() >= CV_32F ? 1e-3 : 1); } } @@ -481,41 +413,87 @@ TEST_P(Mul, Mat_Scalar) typedef ArithmTestBase Div; -TEST_P(Div, Mat) +OCL_TEST_P(Div, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::divide(src1_roi, src2_roi, dst1_roi); - cv::ocl::divide(gsrc1, gsrc2, gdst1); + cv::ocl::divide(gsrc1_roi, gsrc2_roi, gdst1_roi); Near(1); } } -TEST_P(Div, Scalar) +OCL_TEST_P(Div, Scalar) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::divide(val[0], src1_roi, dst1_roi); - cv::ocl::divide(val[0], gsrc1, gdst1); + cv::ocl::divide(val[0], gsrc1_roi, gdst1_roi); - Near(gdst1.depth() >= CV_32F ? 1e-3 : 1); + Near(gdst1_roi.depth() >= CV_32F ? 1e-3 : 1); } } -TEST_P(Div, Mat_Scalar) +OCL_TEST_P(Div, Mat_Scalar) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::divide(src1_roi, src2_roi, dst1_roi, val[0]); - cv::ocl::divide(gsrc1, gsrc2, gdst1, val[0]); + cv::ocl::divide(gsrc1_roi, gsrc2_roi, gdst1_roi, val[0]); - Near(gdst1.depth() >= CV_32F ? 1e-3 : 1); + Near(gdst1_roi.depth() >= CV_32F ? 4e-3 : 1); + } +} + +//////////////////////////////// Absdiff ///////////////////////////////////////////////// + +typedef ArithmTestBase Min; + +OCL_TEST_P(Min, Mat) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + dst1_roi = cv::min(src1_roi, src2_roi); + cv::ocl::min(gsrc1_roi, gsrc2_roi, gdst1_roi); + Near(0); + } +} + +typedef ArithmTestBase Max; + +OCL_TEST_P(Max, Mat) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + dst1_roi = cv::min(src1_roi, src2_roi); + cv::ocl::min(gsrc1_roi, gsrc2_roi, gdst1_roi); + Near(0); + } +} + +//////////////////////////////// Abs ///////////////////////////////////////////////////// + +typedef ArithmTestBase Abs; + +OCL_TEST_P(Abs, Abs) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + dst1_roi = cv::abs(src1_roi); + cv::ocl::abs(gsrc1_roi, gdst1_roi); + Near(0); } } @@ -523,26 +501,26 @@ TEST_P(Div, Mat_Scalar) typedef ArithmTestBase Absdiff; -TEST_P(Absdiff, Mat) +OCL_TEST_P(Absdiff, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::absdiff(src1_roi, src2_roi, dst1_roi); - cv::ocl::absdiff(gsrc1, gsrc2, gdst1); + cv::ocl::absdiff(gsrc1_roi, gsrc2_roi, gdst1_roi); Near(0); } } -TEST_P(Absdiff, Scalar) +OCL_TEST_P(Absdiff, Scalar) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::absdiff(src1_roi, val, dst1_roi); - cv::ocl::absdiff(gsrc1, val, gdst1); + cv::ocl::absdiff(gsrc1_roi, val, gdst1_roi); Near(1e-5); } } @@ -551,27 +529,27 @@ TEST_P(Absdiff, Scalar) typedef ArithmTestBase CartToPolar; -TEST_P(CartToPolar, angleInDegree) +OCL_TEST_P(CartToPolar, angleInDegree) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::cartToPolar(src1_roi, src2_roi, dst1_roi, dst2_roi, true); - cv::ocl::cartToPolar(gsrc1, gsrc2, gdst1, gdst2, true); + cv::ocl::cartToPolar(gsrc1_roi, gsrc2_roi, gdst1_roi, gdst2_roi, true); Near(.5); Near1(.5); } } -TEST_P(CartToPolar, angleInRadians) +OCL_TEST_P(CartToPolar, angleInRadians) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::cartToPolar(src1_roi, src2_roi, dst1_roi, dst2_roi); - cv::ocl::cartToPolar(gsrc1, gsrc2, gdst1, gdst2); + cv::ocl::cartToPolar(gsrc1_roi, gsrc2_roi, gdst1_roi, gdst2_roi); Near(.5); Near1(.5); } @@ -581,28 +559,28 @@ TEST_P(CartToPolar, angleInRadians) typedef ArithmTestBase PolarToCart; -TEST_P(PolarToCart, angleInDegree) +OCL_TEST_P(PolarToCart, angleInDegree) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::polarToCart(src1_roi, src2_roi, dst1_roi, dst2_roi, true); - cv::ocl::polarToCart(gsrc1, gsrc2, gdst1, gdst2, true); + cv::ocl::polarToCart(gsrc1_roi, gsrc2_roi, gdst1_roi, gdst2_roi, true); Near(.5); Near1(.5); } } -TEST_P(PolarToCart, angleInRadians) +OCL_TEST_P(PolarToCart, angleInRadians) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::polarToCart(src1_roi, src2_roi, dst1_roi, dst2_roi); - cv::ocl::polarToCart(gsrc1, gsrc2, gdst1, gdst2); + cv::ocl::polarToCart(gsrc1_roi, gsrc2_roi, gdst1_roi, gdst2_roi); Near(.5); Near1(.5); @@ -613,14 +591,14 @@ TEST_P(PolarToCart, angleInRadians) typedef ArithmTestBase Magnitude; -TEST_P(Magnitude, Mat) +OCL_TEST_P(Magnitude, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::magnitude(src1_roi, src2_roi, dst1_roi); - cv::ocl::magnitude(gsrc1, gsrc2, gdst1); + cv::ocl::magnitude(gsrc1_roi, gsrc2_roi, gdst1_roi); Near(depth == CV_64F ? 1e-5 : 1e-2); } } @@ -629,50 +607,38 @@ TEST_P(Magnitude, Mat) typedef ArithmTestBase Transpose; -TEST_P(Transpose, Mat) +OCL_TEST_P(Transpose, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::transpose(src1_roi, dst1_roi); - cv::ocl::transpose(gsrc1, gdst1); + cv::ocl::transpose(gsrc1_roi, gdst1_roi); Near(1e-5); } } -TEST_P(Transpose, SquareInplace) +OCL_TEST_P(Transpose, SquareInplace) { - cv::RNG &rng = TS::ptr()->get_rng(); - int value = randomInt(MIN_VALUE, MAX_VALUE); - src1 = randomMat(rng, Size(value, value), CV_MAKE_TYPE(depth, cn), 5, 16, false); + const int type = CV_MAKE_TYPE(depth, cn); - if (use_roi) + for (int j = 0; j < LOOP_TIMES; j++) { - roirows = roicols = randomInt(1, src1.cols); + Size roiSize = randomSize(1, MAX_VALUE); + roiSize.height = roiSize.width; // make it square - src1x = randomInt(0, src1.cols - roicols); - src1y = randomInt(0, src1.rows - roirows); - } - else - { - roicols = roirows = src1.cols; - src1x = src1y = 0; - } + Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src1, src1_roi, roiSize, srcBorder, type, 5, 16); - Rect r(src1x, src1y, roicols, roirows); - src1_roi = src1(r); - gdst1_whole = src1; - gdst1 = gdst1_whole(r); + generateOclMat(gsrc1_whole, gsrc1_roi, src1, roiSize, srcBorder); - for (int j = 0; j < LOOP_TIMES; j++) - { cv::transpose(src1_roi, src1_roi); - cv::ocl::transpose(gdst1, gdst1); + cv::ocl::transpose(gsrc1_roi, gsrc1_roi); - EXPECT_MAT_NEAR(src1, Mat(gdst1_whole), 0.0); - EXPECT_MAT_NEAR(src1_roi, Mat(gdst1), 0.0); + EXPECT_MAT_NEAR(src1, Mat(gsrc1_whole), 0.0); + EXPECT_MAT_NEAR(src1_roi, Mat(gsrc1_roi), 0.0); } } @@ -680,38 +646,38 @@ TEST_P(Transpose, SquareInplace) typedef ArithmTestBase Flip; -TEST_P(Flip, X) +OCL_TEST_P(Flip, X) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::flip(src1_roi, dst1_roi, 0); - cv::ocl::flip(gsrc1, gdst1, 0); + cv::ocl::flip(gsrc1_roi, gdst1_roi, 0); Near(1e-5); } } -TEST_P(Flip, Y) +OCL_TEST_P(Flip, Y) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::flip(src1_roi, dst1_roi, 1); - cv::ocl::flip(gsrc1, gdst1, 1); + cv::ocl::flip(gsrc1_roi, gdst1_roi, 1); Near(1e-5); } } -TEST_P(Flip, BOTH) +OCL_TEST_P(Flip, BOTH) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::flip(src1_roi, dst1_roi, -1); - cv::ocl::flip(gsrc1, gdst1, -1); + cv::ocl::flip(gsrc1_roi, gdst1_roi, -1); Near(1e-5); } } @@ -720,7 +686,7 @@ TEST_P(Flip, BOTH) typedef ArithmTestBase MinMax; -TEST_P(MinMax, MAT) +OCL_TEST_P(MinMax, MAT) { for (int j = 0; j < LOOP_TIMES; j++) { @@ -739,19 +705,19 @@ TEST_P(MinMax, MAT) { signed char val = src1_roi.at(i, j); if (val < minVal) minVal = val; - else if (val > maxVal) maxVal = val; + if (val > maxVal) maxVal = val; } } double minVal_, maxVal_; - cv::ocl::minMax(gsrc1, &minVal_, &maxVal_); + cv::ocl::minMax(gsrc1_roi, &minVal_, &maxVal_); EXPECT_DOUBLE_EQ(minVal_, minVal); EXPECT_DOUBLE_EQ(maxVal_, maxVal); } } -TEST_P(MinMax, MASK) +OCL_TEST_P(MinMax, MASK) { for (int j = 0; j < LOOP_TIMES; j++) { @@ -777,7 +743,7 @@ TEST_P(MinMax, MASK) } double minVal_, maxVal_; - cv::ocl::minMax(gsrc1, &minVal_, &maxVal_, gmask); + cv::ocl::minMax(gsrc1_roi, &minVal_, &maxVal_, gmask_roi); EXPECT_DOUBLE_EQ(minVal, minVal_); EXPECT_DOUBLE_EQ(maxVal, maxVal_); @@ -788,7 +754,7 @@ TEST_P(MinMax, MASK) typedef ArithmTestBase MinMaxLoc; -TEST_P(MinMaxLoc, MAT) +OCL_TEST_P(MinMaxLoc, MAT) { for (int j = 0; j < LOOP_TIMES; j++) { @@ -825,7 +791,7 @@ TEST_P(MinMaxLoc, MAT) double minVal_, maxVal_; cv::Point minLoc_, maxLoc_; - cv::ocl::minMaxLoc(gsrc1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat()); + cv::ocl::minMaxLoc(gsrc1_roi, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat()); double error0 = 0., error1 = 0., minlocVal = 0., minlocVal_ = 0., maxlocVal = 0., maxlocVal_ = 0.; if (depth == 0) @@ -902,7 +868,7 @@ TEST_P(MinMaxLoc, MAT) } } -TEST_P(MinMaxLoc, MASK) +OCL_TEST_P(MinMaxLoc, MASK) { for (int j = 0; j < LOOP_TIMES; j++) { @@ -938,7 +904,7 @@ TEST_P(MinMaxLoc, MASK) double minVal_, maxVal_; cv::Point minLoc_, maxLoc_; - cv::ocl::minMaxLoc(gsrc1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask); + cv::ocl::minMaxLoc(gsrc1_roi, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask_roi); double error0 = 0., error1 = 0., minlocVal = 0., minlocVal_ = 0., maxlocVal = 0., maxlocVal_ = 0.; if (minLoc_.x == -1 || minLoc_.y == -1 || maxLoc_.x == -1 || maxLoc_.y == -1) continue; @@ -1020,14 +986,14 @@ TEST_P(MinMaxLoc, MASK) typedef ArithmTestBase Sum; -TEST_P(Sum, MAT) +OCL_TEST_P(Sum, MAT) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); Scalar cpures = cv::sum(src1_roi); - Scalar gpures = cv::ocl::sum(gsrc1); + Scalar gpures = cv::ocl::sum(gsrc1_roi); // check results EXPECT_NEAR(cpures[0], gpures[0], 0.1); @@ -1066,7 +1032,7 @@ static Scalar sqrSum(const Mat & src) typedef Scalar (*sumFunc)(const Mat &); -TEST_P(SqrSum, MAT) +OCL_TEST_P(SqrSum, MAT) { for (int j = 0; j < LOOP_TIMES; j++) { @@ -1085,7 +1051,7 @@ TEST_P(SqrSum, MAT) CV_Assert(func != 0); Scalar cpures = func(src1_roi); - Scalar gpures = cv::ocl::sqrSum(gsrc1); + Scalar gpures = cv::ocl::sqrSum(gsrc1_roi); // check results EXPECT_NEAR(cpures[0], gpures[0], 1.0); @@ -1122,7 +1088,7 @@ static Scalar absSum(const Mat & src) return sum; } -TEST_P(AbsSum, MAT) +OCL_TEST_P(AbsSum, MAT) { for (int j = 0; j < LOOP_TIMES; j++) { @@ -1141,7 +1107,7 @@ TEST_P(AbsSum, MAT) CV_Assert(func != 0); Scalar cpures = func(src1_roi); - Scalar gpures = cv::ocl::absSum(gsrc1); + Scalar gpures = cv::ocl::absSum(gsrc1_roi); // check results EXPECT_NEAR(cpures[0], gpures[0], 0.1); @@ -1155,13 +1121,13 @@ TEST_P(AbsSum, MAT) typedef ArithmTestBase CountNonZero; -TEST_P(CountNonZero, MAT) +OCL_TEST_P(CountNonZero, MAT) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); int cpures = cv::countNonZero(src1_roi); - int gpures = cv::ocl::countNonZero(gsrc1); + int gpures = cv::ocl::countNonZero(gsrc1_roi); EXPECT_DOUBLE_EQ((double)cpures, (double)gpures); } @@ -1171,25 +1137,25 @@ TEST_P(CountNonZero, MAT) typedef ArithmTestBase Phase; -TEST_P(Phase, angleInDegrees) +OCL_TEST_P(Phase, angleInDegrees) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::phase(src1_roi, src2_roi, dst1_roi, true); - cv::ocl::phase(gsrc1, gsrc2, gdst1, true); + cv::ocl::phase(gsrc1_roi, gsrc2_roi, gdst1_roi, true); Near(1e-2); } } -TEST_P(Phase, angleInRadians) +OCL_TEST_P(Phase, angleInRadians) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::phase(src1_roi, src2_roi, dst1_roi); - cv::ocl::phase(gsrc1, gsrc2, gdst1); + cv::ocl::phase(gsrc1_roi, gsrc2_roi, gdst1_roi); Near(1e-2); } @@ -1199,50 +1165,50 @@ TEST_P(Phase, angleInRadians) typedef ArithmTestBase Bitwise_and; -TEST_P(Bitwise_and, Mat) +OCL_TEST_P(Bitwise_and, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_and(src1_roi, src2_roi, dst1_roi); - cv::ocl::bitwise_and(gsrc1, gsrc2, gdst1); + cv::ocl::bitwise_and(gsrc1_roi, gsrc2_roi, gdst1_roi); Near(0); } } -TEST_P(Bitwise_and, Mat_Mask) +OCL_TEST_P(Bitwise_and, Mat_Mask) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_and(src1_roi, src2_roi, dst1_roi, mask_roi); - cv::ocl::bitwise_and(gsrc1, gsrc2, gdst1, gmask); + cv::ocl::bitwise_and(gsrc1_roi, gsrc2_roi, gdst1_roi, gmask_roi); Near(0); } } -TEST_P(Bitwise_and, Scalar) +OCL_TEST_P(Bitwise_and, Scalar) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_and(src1_roi, val, dst1_roi); - cv::ocl::bitwise_and(gsrc1, val, gdst1); + cv::ocl::bitwise_and(gsrc1_roi, val, gdst1_roi); Near(1e-5); } } -TEST_P(Bitwise_and, Scalar_Mask) +OCL_TEST_P(Bitwise_and, Scalar_Mask) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_and(src1_roi, val, dst1_roi, mask_roi); - cv::ocl::bitwise_and(gsrc1, val, gdst1, gmask); + cv::ocl::bitwise_and(gsrc1_roi, val, gdst1_roi, gmask_roi); Near(1e-5); } } @@ -1251,50 +1217,50 @@ TEST_P(Bitwise_and, Scalar_Mask) typedef ArithmTestBase Bitwise_or; -TEST_P(Bitwise_or, Mat) +OCL_TEST_P(Bitwise_or, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_or(src1_roi, src2_roi, dst1_roi); - cv::ocl::bitwise_or(gsrc1, gsrc2, gdst1); + cv::ocl::bitwise_or(gsrc1_roi, gsrc2_roi, gdst1_roi); Near(0); } } -TEST_P(Bitwise_or, Mat_Mask) +OCL_TEST_P(Bitwise_or, Mat_Mask) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_or(src1_roi, src2_roi, dst1_roi, mask_roi); - cv::ocl::bitwise_or(gsrc1, gsrc2, gdst1, gmask); + cv::ocl::bitwise_or(gsrc1_roi, gsrc2_roi, gdst1_roi, gmask_roi); Near(0); } } -TEST_P(Bitwise_or, Scalar) +OCL_TEST_P(Bitwise_or, Scalar) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_or(src1_roi, val, dst1_roi); - cv::ocl::bitwise_or(gsrc1, val, gdst1); + cv::ocl::bitwise_or(gsrc1_roi, val, gdst1_roi); Near(1e-5); } } -TEST_P(Bitwise_or, Scalar_Mask) +OCL_TEST_P(Bitwise_or, Scalar_Mask) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_or(src1_roi, val, dst1_roi, mask_roi); - cv::ocl::bitwise_or(gsrc1, val, gdst1, gmask); + cv::ocl::bitwise_or(gsrc1_roi, val, gdst1_roi, gmask_roi); Near(1e-5); } } @@ -1303,50 +1269,50 @@ TEST_P(Bitwise_or, Scalar_Mask) typedef ArithmTestBase Bitwise_xor; -TEST_P(Bitwise_xor, Mat) +OCL_TEST_P(Bitwise_xor, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_xor(src1_roi, src2_roi, dst1_roi); - cv::ocl::bitwise_xor(gsrc1, gsrc2, gdst1); + cv::ocl::bitwise_xor(gsrc1_roi, gsrc2_roi, gdst1_roi); Near(0); } } -TEST_P(Bitwise_xor, Mat_Mask) +OCL_TEST_P(Bitwise_xor, Mat_Mask) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_xor(src1_roi, src2_roi, dst1_roi, mask_roi); - cv::ocl::bitwise_xor(gsrc1, gsrc2, gdst1, gmask); + cv::ocl::bitwise_xor(gsrc1_roi, gsrc2_roi, gdst1_roi, gmask_roi); Near(0); } } -TEST_P(Bitwise_xor, Scalar) +OCL_TEST_P(Bitwise_xor, Scalar) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_xor(src1_roi, val, dst1_roi); - cv::ocl::bitwise_xor(gsrc1, val, gdst1); + cv::ocl::bitwise_xor(gsrc1_roi, val, gdst1_roi); Near(1e-5); } } -TEST_P(Bitwise_xor, Scalar_Mask) +OCL_TEST_P(Bitwise_xor, Scalar_Mask) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_xor(src1_roi, val, dst1_roi, mask_roi); - cv::ocl::bitwise_xor(gsrc1, val, gdst1, gmask); + cv::ocl::bitwise_xor(gsrc1_roi, val, gdst1_roi, gmask_roi); Near(1e-5); } } @@ -1355,14 +1321,14 @@ TEST_P(Bitwise_xor, Scalar_Mask) typedef ArithmTestBase Bitwise_not; -TEST_P(Bitwise_not, Mat) +OCL_TEST_P(Bitwise_not, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::bitwise_not(src1_roi, dst1_roi); - cv::ocl::bitwise_not(gsrc1, gdst1); + cv::ocl::bitwise_not(gsrc1_roi, gdst1_roi); Near(0); } } @@ -1371,7 +1337,7 @@ TEST_P(Bitwise_not, Mat) typedef ArithmTestBase Compare; -TEST_P(Compare, Mat) +OCL_TEST_P(Compare, Mat) { int cmp_codes[] = { CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE }; int cmp_num = sizeof(cmp_codes) / sizeof(int); @@ -1382,7 +1348,7 @@ TEST_P(Compare, Mat) random_roi(); cv::compare(src1_roi, src2_roi, dst1_roi, cmp_codes[i]); - cv::ocl::compare(gsrc1, gsrc2, gdst1, cmp_codes[i]); + cv::ocl::compare(gsrc1_roi, gsrc2_roi, gdst1_roi, cmp_codes[i]); Near(0); } @@ -1392,14 +1358,14 @@ TEST_P(Compare, Mat) typedef ArithmTestBase Pow; -TEST_P(Pow, Mat) +OCL_TEST_P(Pow, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); double p = 4.5; cv::pow(src1_roi, p, dst1_roi); - cv::ocl::pow(gsrc1, p, gdst1); + cv::ocl::pow(gsrc1_roi, p, gdst1_roi); Near(1); } } @@ -1408,7 +1374,7 @@ TEST_P(Pow, Mat) typedef ArithmTestBase AddWeighted; -TEST_P(AddWeighted, Mat) +OCL_TEST_P(AddWeighted, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { @@ -1417,9 +1383,9 @@ TEST_P(AddWeighted, Mat) const double alpha = 2.0, beta = 1.0, gama = 3.0; cv::addWeighted(src1_roi, alpha, src2_roi, beta, gama, dst1_roi); - cv::ocl::addWeighted(gsrc1, alpha, gsrc2, beta, gama, gdst1); + cv::ocl::addWeighted(gsrc1_roi, alpha, gsrc2_roi, beta, gama, gdst1_roi); - Near(1e-5); + Near(3e-4); } } @@ -1427,14 +1393,14 @@ TEST_P(AddWeighted, Mat) typedef ArithmTestBase SetIdentity; -TEST_P(SetIdentity, Mat) +OCL_TEST_P(SetIdentity, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { random_roi(); cv::setIdentity(dst1_roi, val); - cv::ocl::setIdentity(gdst1, val); + cv::ocl::setIdentity(gdst1_roi, val); Near(0); } @@ -1444,7 +1410,7 @@ TEST_P(SetIdentity, Mat) typedef ArithmTestBase MeanStdDev; -TEST_P(MeanStdDev, Mat) +OCL_TEST_P(MeanStdDev, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { @@ -1454,7 +1420,7 @@ TEST_P(MeanStdDev, Mat) Scalar gpu_mean, gpu_stddev; cv::meanStdDev(src1_roi, cpu_mean, cpu_stddev); - cv::ocl::meanStdDev(gsrc1, gpu_mean, gpu_stddev); + cv::ocl::meanStdDev(gsrc1_roi, gpu_mean, gpu_stddev); for (int i = 0; i < 4; ++i) { @@ -1468,7 +1434,7 @@ TEST_P(MeanStdDev, Mat) typedef ArithmTestBase Norm; -TEST_P(Norm, NORM_INF) +OCL_TEST_P(Norm, NORM_INF) { for (int relative = 0; relative < 2; ++relative) for (int j = 0; j < LOOP_TIMES; j++) @@ -1480,13 +1446,13 @@ TEST_P(Norm, NORM_INF) type |= NORM_RELATIVE; const double cpuRes = cv::norm(src1_roi, src2_roi, type); - const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type); + const double gpuRes = cv::ocl::norm(gsrc1_roi, gsrc2_roi, type); EXPECT_NEAR(cpuRes, gpuRes, 0.1); } } -TEST_P(Norm, NORM_L1) +OCL_TEST_P(Norm, NORM_L1) { for (int relative = 0; relative < 2; ++relative) for (int j = 0; j < LOOP_TIMES; j++) @@ -1498,13 +1464,13 @@ TEST_P(Norm, NORM_L1) type |= NORM_RELATIVE; const double cpuRes = cv::norm(src1_roi, src2_roi, type); - const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type); + const double gpuRes = cv::ocl::norm(gsrc1_roi, gsrc2_roi, type); EXPECT_NEAR(cpuRes, gpuRes, 0.1); } } -TEST_P(Norm, NORM_L2) +OCL_TEST_P(Norm, NORM_L2) { for (int relative = 0; relative < 2; ++relative) for (int j = 0; j < LOOP_TIMES; j++) @@ -1516,7 +1482,7 @@ TEST_P(Norm, NORM_L2) type |= NORM_RELATIVE; const double cpuRes = cv::norm(src1_roi, src2_roi, type); - const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type); + const double gpuRes = cv::ocl::norm(gsrc1_roi, gsrc2_roi, type); EXPECT_NEAR(cpuRes, gpuRes, 0.1); } @@ -1531,6 +1497,9 @@ INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(testing::Range(CV_8U, CV_USRTYPE1), INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Min, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Max, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Abs, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); diff --git a/modules/ocl/test/test_bgfg.cpp b/modules/ocl/test/test_bgfg.cpp index 2fad374208..d2e1363017 100644 --- a/modules/ocl/test/test_bgfg.cpp +++ b/modules/ocl/test/test_bgfg.cpp @@ -85,14 +85,12 @@ PARAM_TEST_CASE(mog, UseGray, LearningRate, bool) virtual void SetUp() { useGray = GET_PARAM(0); - learningRate = GET_PARAM(1); - useRoi = GET_PARAM(2); } }; -TEST_P(mog, Update) +OCL_TEST_P(mog, Update) { std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi"; cv::VideoCapture cap(inputFile); @@ -103,7 +101,7 @@ TEST_P(mog, Update) ASSERT_FALSE(frame.empty()); cv::ocl::MOG mog; - cv::ocl::oclMat foreground = createMat_ocl(frame.size(), CV_8UC1, useRoi); + cv::ocl::oclMat foreground = createMat_ocl(rng, frame.size(), CV_8UC1, useRoi); Ptr mog_gold = createBackgroundSubtractorMOG(); cv::Mat foreground_gold; @@ -120,7 +118,7 @@ TEST_P(mog, Update) cv::swap(temp, frame); } - mog(loadMat_ocl(frame, useRoi), foreground, (float)learningRate); + mog(loadMat_ocl(rng, frame, useRoi), foreground, (float)learningRate); mog_gold->apply(frame, foreground_gold, learningRate); @@ -153,7 +151,7 @@ PARAM_TEST_CASE(mog2, UseGray, DetectShadow, bool) } }; -TEST_P(mog2, Update) +OCL_TEST_P(mog2, Update) { std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/video/768x576.avi"; cv::VideoCapture cap(inputFile); @@ -165,7 +163,7 @@ TEST_P(mog2, Update) cv::ocl::MOG2 mog2; mog2.bShadowDetection = detectShadow; - cv::ocl::oclMat foreground = createMat_ocl(frame.size(), CV_8UC1, useRoi); + cv::ocl::oclMat foreground = createMat_ocl(rng, frame.size(), CV_8UC1, useRoi); cv::Ptr mog2_gold = createBackgroundSubtractorMOG2(); mog2_gold->setDetectShadows(detectShadow); @@ -183,7 +181,7 @@ TEST_P(mog2, Update) cv::swap(temp, frame); } - mog2(loadMat_ocl(frame, useRoi), foreground); + mog2(loadMat_ocl(rng, frame, useRoi), foreground); mog2_gold->apply(frame, foreground_gold); @@ -194,7 +192,7 @@ TEST_P(mog2, Update) } } -TEST_P(mog2, getBackgroundImage) +OCL_TEST_P(mog2, getBackgroundImage) { if (useGray) return; @@ -218,12 +216,12 @@ TEST_P(mog2, getBackgroundImage) cap >> frame; ASSERT_FALSE(frame.empty()); - mog2(loadMat_ocl(frame, useRoi), foreground); + mog2(loadMat_ocl(rng, frame, useRoi), foreground); mog2_gold->apply(frame, foreground_gold); } - cv::ocl::oclMat background = createMat_ocl(frame.size(), frame.type(), useRoi); + cv::ocl::oclMat background = createMat_ocl(rng, frame.size(), frame.type(), useRoi); mog2.getBackgroundImage(background); cv::Mat background_gold; diff --git a/modules/ocl/test/test_blend.cpp b/modules/ocl/test/test_blend.cpp index ae0cbd41bd..8e6e269399 100644 --- a/modules/ocl/test/test_blend.cpp +++ b/modules/ocl/test/test_blend.cpp @@ -88,7 +88,7 @@ PARAM_TEST_CASE(Blend, cv::Size, MatType/*, UseRoi*/) } }; -TEST_P(Blend, Accuracy) +OCL_TEST_P(Blend, Accuracy) { int depth = CV_MAT_DEPTH(type); diff --git a/modules/ocl/test/test_brute_force_matcher.cpp b/modules/ocl/test/test_brute_force_matcher.cpp index 4d0b45fb78..5b80449e28 100644 --- a/modules/ocl/test/test_brute_force_matcher.cpp +++ b/modules/ocl/test/test_brute_force_matcher.cpp @@ -72,8 +72,6 @@ namespace queryDescCount = 300; // must be even number because we split train data in some cases in two countFactor = 4; // do not change it - cv::RNG &rng = cvtest::TS::ptr()->get_rng(); - cv::Mat queryBuf, trainBuf; // Generate query descriptors randomly. @@ -108,7 +106,7 @@ namespace } }; - TEST_P(BruteForceMatcher, Match_Single) + OCL_TEST_P(BruteForceMatcher, Match_Single) { cv::ocl::BruteForceMatcher_OCL_base matcher(distType); @@ -128,7 +126,7 @@ namespace ASSERT_EQ(0, badCount); } - TEST_P(BruteForceMatcher, KnnMatch_2_Single) + OCL_TEST_P(BruteForceMatcher, KnnMatch_2_Single) { const int knn = 2; @@ -160,7 +158,7 @@ namespace ASSERT_EQ(0, badCount); } - TEST_P(BruteForceMatcher, RadiusMatch_Single) + OCL_TEST_P(BruteForceMatcher, RadiusMatch_Single) { float radius = 1.f / countFactor; diff --git a/modules/ocl/test/test_calib3d.cpp b/modules/ocl/test/test_calib3d.cpp index 7e5c4a4196..532e61d134 100644 --- a/modules/ocl/test/test_calib3d.cpp +++ b/modules/ocl/test/test_calib3d.cpp @@ -46,10 +46,10 @@ #include "test_precomp.hpp" #include -#ifdef HAVE_OPENCL - using namespace cv; +#ifdef HAVE_OPENCL + PARAM_TEST_CASE(StereoMatchBM, int, int) { int n_disp; @@ -62,7 +62,7 @@ PARAM_TEST_CASE(StereoMatchBM, int, int) } }; -TEST_P(StereoMatchBM, Regression) +OCL_TEST_P(StereoMatchBM, Regression) { Mat left_image = readImage("gpu/stereobm/aloe-L.png", IMREAD_GRAYSCALE); @@ -110,7 +110,7 @@ PARAM_TEST_CASE(StereoMatchBP, int, int, int, float, float, float, float) disc_single_jump_ = GET_PARAM(6); } }; -TEST_P(StereoMatchBP, Regression) +OCL_TEST_P(StereoMatchBP, Regression) { Mat left_image = readImage("gpu/stereobp/aloe-L.png"); Mat right_image = readImage("gpu/stereobp/aloe-R.png"); @@ -163,7 +163,7 @@ PARAM_TEST_CASE(StereoMatchConstSpaceBP, int, int, int, int, float, float, float msg_type_ = GET_PARAM(9); } }; -TEST_P(StereoMatchConstSpaceBP, Regression) +OCL_TEST_P(StereoMatchConstSpaceBP, Regression) { Mat left_image = readImage("gpu/csstereobp/aloe-L.png"); Mat right_image = readImage("gpu/csstereobp/aloe-R.png"); diff --git a/modules/ocl/test/test_canny.cpp b/modules/ocl/test/test_canny.cpp index 5d8d108e70..b7d2d6d44a 100644 --- a/modules/ocl/test/test_canny.cpp +++ b/modules/ocl/test/test_canny.cpp @@ -64,7 +64,7 @@ PARAM_TEST_CASE(Canny, AppertureSize, L2gradient) } }; -TEST_P(Canny, Accuracy) +OCL_TEST_P(Canny, Accuracy) { cv::Mat img = readImage("cv/shared/fruits.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(img.empty()); diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp index b0a223091a..366809a9fb 100644 --- a/modules/ocl/test/test_color.cpp +++ b/modules/ocl/test/test_color.cpp @@ -93,7 +93,7 @@ PARAM_TEST_CASE(CvtColor, cv::Size, MatDepth) }; #define CVTCODE(name) cv::COLOR_ ## name -#define TEST_P_CVTCOLOR(name) TEST_P(CvtColor, name)\ +#define OCL_TEST_P_CVTCOLOR(name) OCL_TEST_P(CvtColor, name)\ {\ cv::Mat src = img;\ cv::ocl::oclMat ocl_img, dst;\ @@ -107,17 +107,17 @@ PARAM_TEST_CASE(CvtColor, cv::Size, MatDepth) } //add new ones here using macro -TEST_P_CVTCOLOR(RGB2GRAY) -TEST_P_CVTCOLOR(BGR2GRAY) -TEST_P_CVTCOLOR(RGBA2GRAY) -TEST_P_CVTCOLOR(BGRA2GRAY) - -TEST_P_CVTCOLOR(RGB2YUV) -TEST_P_CVTCOLOR(BGR2YUV) -TEST_P_CVTCOLOR(YUV2RGB) -TEST_P_CVTCOLOR(YUV2BGR) -TEST_P_CVTCOLOR(RGB2YCrCb) -TEST_P_CVTCOLOR(BGR2YCrCb) +OCL_TEST_P_CVTCOLOR(RGB2GRAY) +OCL_TEST_P_CVTCOLOR(BGR2GRAY) +OCL_TEST_P_CVTCOLOR(RGBA2GRAY) +OCL_TEST_P_CVTCOLOR(BGRA2GRAY) + +OCL_TEST_P_CVTCOLOR(RGB2YUV) +OCL_TEST_P_CVTCOLOR(BGR2YUV) +OCL_TEST_P_CVTCOLOR(YUV2RGB) +OCL_TEST_P_CVTCOLOR(YUV2BGR) +OCL_TEST_P_CVTCOLOR(RGB2YCrCb) +OCL_TEST_P_CVTCOLOR(BGR2YCrCb) PARAM_TEST_CASE(CvtColor_Gray2RGB, cv::Size, MatDepth, int) { @@ -134,7 +134,7 @@ PARAM_TEST_CASE(CvtColor_Gray2RGB, cv::Size, MatDepth, int) img = randomMat(size, CV_MAKETYPE(depth, 1), 0.0, depth == CV_32F ? 1.0 : 255.0); } }; -TEST_P(CvtColor_Gray2RGB, Accuracy) +OCL_TEST_P(CvtColor_Gray2RGB, Accuracy) { cv::Mat src = img; cv::ocl::oclMat ocl_img, dst; @@ -163,7 +163,7 @@ PARAM_TEST_CASE(CvtColor_YUV420, cv::Size, int) } }; -TEST_P(CvtColor_YUV420, Accuracy) +OCL_TEST_P(CvtColor_YUV420, Accuracy) { cv::Mat src = img; cv::ocl::oclMat ocl_img, dst; diff --git a/modules/ocl/test/test_fft.cpp b/modules/ocl/test/test_fft.cpp index 29a2b815a3..d9cc7b1595 100644 --- a/modules/ocl/test/test_fft.cpp +++ b/modules/ocl/test/test_fft.cpp @@ -44,10 +44,12 @@ //M*/ #include "test_precomp.hpp" + using namespace std; -#ifdef HAVE_CLAMDFFT + //////////////////////////////////////////////////////////////////////////// // Dft + PARAM_TEST_CASE(Dft, cv::Size, int) { cv::Size dft_size; @@ -59,7 +61,7 @@ PARAM_TEST_CASE(Dft, cv::Size, int) } }; -TEST_P(Dft, C2C) +OCL_TEST_P(Dft, C2C) { cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 100.0); cv::Mat b_gold; @@ -71,7 +73,7 @@ TEST_P(Dft, C2C) EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4); } -TEST_P(Dft, R2C) +OCL_TEST_P(Dft, R2C) { cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 100.0); cv::Mat b_gold, b_gold_roi; @@ -88,7 +90,7 @@ TEST_P(Dft, R2C) EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4); } -TEST_P(Dft, R2CthenC2R) +OCL_TEST_P(Dft, R2CthenC2R) { cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); @@ -98,7 +100,6 @@ TEST_P(Dft, R2CthenC2R) EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4); } - INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine( testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)), testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) )); @@ -119,12 +120,12 @@ PARAM_TEST_CASE(MulSpectrums, cv::Size, DftFlags, bool) flag = GET_PARAM(1); ccorr = GET_PARAM(2); - a = randomMat(size, CV_32FC2); - b = randomMat(size, CV_32FC2); + a = randomMat(size, CV_32FC2, -100, 100, false); + b = randomMat(size, CV_32FC2, -100, 100, false); } }; -TEST_P(MulSpectrums, Simple) +OCL_TEST_P(MulSpectrums, Simple) { cv::ocl::oclMat c; cv::ocl::mulSpectrums(cv::ocl::oclMat(a), cv::ocl::oclMat(b), c, flag, 1.0, ccorr); @@ -135,7 +136,7 @@ TEST_P(MulSpectrums, Simple) EXPECT_MAT_NEAR(c_gold, c, 1e-2); } -TEST_P(MulSpectrums, Scaled) +OCL_TEST_P(MulSpectrums, Scaled) { float scale = 1.f / size.area(); @@ -219,7 +220,7 @@ PARAM_TEST_CASE(Convolve_DFT, cv::Size, KSize, Ccorr) } }; -TEST_P(Convolve_DFT, Accuracy) +OCL_TEST_P(Convolve_DFT, Accuracy) { cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0); cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0); @@ -236,5 +237,4 @@ TEST_P(Convolve_DFT, Accuracy) INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Convolve_DFT, testing::Combine( DIFFERENT_CONVOLVE_SIZES, testing::Values(KSize(19), KSize(23), KSize(45)), - testing::Values(Ccorr(true)/*, Ccorr(false)*/))); // false ccorr cannot pass for some instances -#endif // HAVE_CLAMDFFT + testing::Values(Ccorr(true)/*, Ccorr(false)*/))); // TODO false ccorr cannot pass for some instances diff --git a/modules/ocl/test/test_filters.cpp b/modules/ocl/test/test_filters.cpp index 4a22ec5033..b5bf7acae7 100644 --- a/modules/ocl/test/test_filters.cpp +++ b/modules/ocl/test/test_filters.cpp @@ -91,7 +91,6 @@ PARAM_TEST_CASE(FilterTestBase, { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(2, mat1.cols); roirows = rng.uniform(2, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -146,7 +145,7 @@ struct Blur : FilterTestBase } }; -TEST_P(Blur, Mat) +OCL_TEST_P(Blur, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -173,7 +172,7 @@ struct Laplacian : FilterTestBase } }; -TEST_P(Laplacian, Accuracy) +OCL_TEST_P(Laplacian, Accuracy) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -201,13 +200,12 @@ struct ErodeDilate : FilterTestBase type = GET_PARAM(0); iterations = GET_PARAM(3); Init(type); - // rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3)); kernel = randomMat(Size(3, 3), CV_8UC1, 0, 3); } }; -TEST_P(ErodeDilate, Mat) +OCL_TEST_P(ErodeDilate, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -246,7 +244,7 @@ struct Sobel : FilterTestBase } }; -TEST_P(Sobel, Mat) +OCL_TEST_P(Sobel, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -276,7 +274,7 @@ struct Scharr : FilterTestBase } }; -TEST_P(Scharr, Mat) +OCL_TEST_P(Scharr, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -304,13 +302,12 @@ struct GaussianBlur : FilterTestBase ksize = GET_PARAM(1); bordertype = GET_PARAM(3); Init(type); - cv::RNG &rng = TS::ptr()->get_rng(); sigma1 = rng.uniform(0.1, 1.0); sigma2 = rng.uniform(0.1, 1.0); } }; -TEST_P(GaussianBlur, Mat) +OCL_TEST_P(GaussianBlur, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -342,7 +339,7 @@ struct Filter2D : FilterTestBase } }; -TEST_P(Filter2D, Mat) +OCL_TEST_P(Filter2D, Mat) { cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0); for(int j = 0; j < LOOP_TIMES; j++) @@ -368,13 +365,12 @@ struct Bilateral : FilterTestBase ksize = GET_PARAM(1); bordertype = GET_PARAM(3); Init(type); - cv::RNG &rng = TS::ptr()->get_rng(); sigmacolor = rng.uniform(20, 100); sigmaspace = rng.uniform(10, 40); } }; -TEST_P(Bilateral, Mat) +OCL_TEST_P(Bilateral, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -404,7 +400,7 @@ struct AdaptiveBilateral : FilterTestBase } }; -TEST_P(AdaptiveBilateral, Mat) +OCL_TEST_P(AdaptiveBilateral, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { diff --git a/modules/ocl/test/test_gemm.cpp b/modules/ocl/test/test_gemm.cpp index 00d428770f..68dab0ac01 100644 --- a/modules/ocl/test/test_gemm.cpp +++ b/modules/ocl/test/test_gemm.cpp @@ -42,12 +42,13 @@ // //M*/ - #include "test_precomp.hpp" + using namespace std; -#ifdef HAVE_CLAMDBLAS + //////////////////////////////////////////////////////////////////////////// // GEMM + PARAM_TEST_CASE(Gemm, int, cv::Size, int) { int type; @@ -62,7 +63,7 @@ PARAM_TEST_CASE(Gemm, int, cv::Size, int) } }; -TEST_P(Gemm, Accuracy) +OCL_TEST_P(Gemm, Accuracy) { cv::Mat a = randomMat(mat_size, type, 0.0, 10.0); cv::Mat b = randomMat(mat_size, type, 0.0, 10.0); @@ -81,4 +82,3 @@ INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine( testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/), testing::Values(cv::Size(20, 20), cv::Size(300, 300)), testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T)))); -#endif diff --git a/modules/ocl/test/test_hough.cpp b/modules/ocl/test/test_hough.cpp index 9e45076cd7..f5d2578010 100644 --- a/modules/ocl/test/test_hough.cpp +++ b/modules/ocl/test/test_hough.cpp @@ -58,7 +58,7 @@ PARAM_TEST_CASE(HoughCircles, cv::Size) } }; -TEST_P(HoughCircles, Accuracy) +OCL_TEST_P(HoughCircles, Accuracy) { const cv::Size size = GET_PARAM(0); diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index c79e5e73f9..1b80f850b7 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -351,33 +351,32 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bo type3 = GET_PARAM(2); type4 = GET_PARAM(3); type5 = GET_PARAM(4); - cv::RNG &rng = TS::ptr()->get_rng(); cv::Size size(MWIDTH, MHEIGHT); double min = 1, max = 20; if(type1 != nulltype) { - mat1 = randomMat(rng, size, type1, min, max, false); + mat1 = randomMat(size, type1, min, max, false); clmat1 = mat1; } if(type2 != nulltype) { - mat2 = randomMat(rng, size, type2, min, max, false); + mat2 = randomMat(size, type2, min, max, false); clmat2 = mat2; } if(type3 != nulltype) { - dst = randomMat(rng, size, type3, min, max, false); + dst = randomMat(size, type3, min, max, false); cldst = dst; } if(type4 != nulltype) { - dst1 = randomMat(rng, size, type4, min, max, false); + dst1 = randomMat(size, type4, min, max, false); cldst1 = dst1; } if(type5 != nulltype) { - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + mask = randomMat(size, CV_8UC1, 0, 2, false); cv::threshold(mask, mask, 0.5, 255., type5); clmask = mask; } @@ -388,7 +387,6 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bo { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, mat1.cols); roirows = rng.uniform(1, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -455,7 +453,7 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bo struct equalizeHist : ImgprocTestBase {}; -TEST_P(equalizeHist, Mat) +OCL_TEST_P(equalizeHist, Mat) { if (mat1.type() != CV_8UC1 || mat1.type() != dst.type()) { @@ -479,10 +477,9 @@ TEST_P(equalizeHist, Mat) struct CopyMakeBorder : ImgprocTestBase {}; -TEST_P(CopyMakeBorder, Mat) +OCL_TEST_P(CopyMakeBorder, Mat) { int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101}; - cv::RNG &rng = TS::ptr()->get_rng(); int top = rng.uniform(0, 10); int bottom = rng.uniform(0, 10); int left = rng.uniform(0, 10); @@ -535,7 +532,7 @@ TEST_P(CopyMakeBorder, Mat) struct cornerMinEigenVal : ImgprocTestBase {}; -TEST_P(cornerMinEigenVal, Mat) +OCL_TEST_P(cornerMinEigenVal, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -557,7 +554,7 @@ TEST_P(cornerMinEigenVal, Mat) struct cornerHarris : ImgprocTestBase {}; -TEST_P(cornerHarris, Mat) +OCL_TEST_P(cornerHarris, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -579,7 +576,7 @@ TEST_P(cornerHarris, Mat) struct integral : ImgprocTestBase {}; -TEST_P(integral, Mat1) +OCL_TEST_P(integral, Mat1) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -591,7 +588,7 @@ TEST_P(integral, Mat1) } } -TEST_P(integral, Mat2) +OCL_TEST_P(integral, Mat2) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -646,22 +643,17 @@ PARAM_TEST_CASE(WarpTestBase, MatType, int) virtual void SetUp() { type = GET_PARAM(0); - //dsize = GET_PARAM(1); interpolation = GET_PARAM(1); - - cv::RNG &rng = TS::ptr()->get_rng(); size = cv::Size(MWIDTH, MHEIGHT); - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - + mat1 = randomMat(size, type, 5, 16, false); + dst = randomMat(size, type, 5, 16, false); } void random_roi() { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); src_roicols = rng.uniform(1, mat1.cols); src_roirows = rng.uniform(1, mat1.rows); dst_roicols = rng.uniform(1, dst.cols); @@ -698,7 +690,7 @@ PARAM_TEST_CASE(WarpTestBase, MatType, int) struct WarpAffine : WarpTestBase {}; -TEST_P(WarpAffine, Mat) +OCL_TEST_P(WarpAffine, Mat) { static const double coeffs[2][3] = { @@ -726,7 +718,7 @@ TEST_P(WarpAffine, Mat) struct WarpPerspective : WarpTestBase {}; -TEST_P(WarpPerspective, Mat) +OCL_TEST_P(WarpPerspective, Mat) { static const double coeffs[3][3] = { @@ -810,23 +802,22 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) interpolation = GET_PARAM(3); bordertype = GET_PARAM(4); - cv::RNG &rng = TS::ptr()->get_rng(); cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT); cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT); double min = 5, max = 16; if(srcType != nulltype) { - src = randomMat(rng, srcSize, srcType, min, max, false); + src = randomMat(srcSize, srcType, min, max, false); } if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype)) { - map1 = randomMat(rng, map1Size, map1Type, min, max, false); + map1 = randomMat(map1Size, map1Type, min, max, false); } else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) { - map1 = randomMat(rng, map1Size, map1Type, min, max, false); - map2 = randomMat(rng, map1Size, map1Type, min, max, false); + map1 = randomMat(map1Size, map1Type, min, max, false); + map2 = randomMat(map1Size, map1Type, min, max, false); } else @@ -835,7 +826,7 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) return; } - dst = randomMat(rng, map1Size, srcType, min, max, false); + dst = randomMat(map1Size, srcType, min, max, false); switch (src.channels()) { case 1: @@ -855,8 +846,6 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) } void random_roi() { - cv::RNG &rng = TS::ptr()->get_rng(); - dst_roicols = rng.uniform(1, dst.cols); dst_roirows = rng.uniform(1, dst.rows); @@ -898,7 +887,7 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) } }; -TEST_P(Remap, Mat) +OCL_TEST_P(Remap, Mat) { if((interpolation == 1 && map1Type == CV_16SC2) || (map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1)) { @@ -966,8 +955,6 @@ PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int) fy = GET_PARAM(3); interpolation = GET_PARAM(4); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); if(dsize == cv::Size() && !(fx > 0 && fy > 0)) @@ -982,8 +969,8 @@ PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int) dsize.height = (int)(size.height * fy); } - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, dsize, type, 5, 16, false); + mat1 = randomMat(size, type, 5, 16, false); + dst = randomMat(dsize, type, 5, 16, false); } @@ -991,7 +978,6 @@ PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int) { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); src_roicols = rng.uniform(1, mat1.cols); src_roirows = rng.uniform(1, mat1.rows); dst_roicols = (int)(src_roicols * fx); @@ -1026,7 +1012,7 @@ PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int) }; -TEST_P(Resize, Mat) +OCL_TEST_P(Resize, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -1082,18 +1068,16 @@ PARAM_TEST_CASE(Threshold, MatType, ThreshOp) type = GET_PARAM(0); threshOp = GET_PARAM(1); - cv::RNG &rng = TS::ptr()->get_rng(); cv::Size size(MWIDTH, MHEIGHT); - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); + mat1 = randomMat(size, type, 5, 16, false); + dst = randomMat(size, type, 5, 16, false); } void random_roi() { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, mat1.cols); roirows = rng.uniform(1, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -1121,7 +1105,7 @@ PARAM_TEST_CASE(Threshold, MatType, ThreshOp) }; -TEST_P(Threshold, Mat) +OCL_TEST_P(Threshold, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -1179,22 +1163,18 @@ PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria) sr = GET_PARAM(3); crit = GET_PARAM(4); - cv::RNG &rng = TS::ptr()->get_rng(); - // MWIDTH=256, MHEIGHT=256. defined in utility.hpp cv::Size size = cv::Size(MWIDTH, MHEIGHT); - src = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dstCoor = randomMat(rng, size, typeCoor, 5, 16, false); + src = randomMat(size, type, 5, 16, false); + dst = randomMat(size, type, 5, 16, false); + dstCoor = randomMat(size, typeCoor, 5, 16, false); } void random_roi() { #ifdef RANDOMROI - cv::RNG &rng = TS::ptr()->get_rng(); - //randomize ROI roicols = rng.uniform(1, src.cols); roirows = rng.uniform(1, src.rows); @@ -1226,7 +1206,7 @@ PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria) /////////////////////////meanShiftFiltering///////////////////////////// struct meanShiftFiltering : meanShiftTestBase {}; -TEST_P(meanShiftFiltering, Mat) +OCL_TEST_P(meanShiftFiltering, Mat) { for(int j = 0; j < LOOP_TIMES; j++) @@ -1247,7 +1227,7 @@ TEST_P(meanShiftFiltering, Mat) ///////////////////////////meanShiftProc////////////////////////////////// struct meanShiftProc : meanShiftTestBase {}; -TEST_P(meanShiftProc, Mat) +OCL_TEST_P(meanShiftProc, Mat) { for(int j = 0; j < LOOP_TIMES; j++) @@ -1307,18 +1287,15 @@ PARAM_TEST_CASE(histTestBase, MatType, MatType) { type_src = GET_PARAM(0); - cv::RNG &rng = TS::ptr()->get_rng(); cv::Size size = cv::Size(MWIDTH, MHEIGHT); - src = randomMat(rng, size, type_src, 0, 256, false); + src = randomMat(size, type_src, 0, 256, false); } void random_roi() { #ifdef RANDOMROI - cv::RNG &rng = TS::ptr()->get_rng(); - //randomize ROI roicols = rng.uniform(1, src.cols); roirows = rng.uniform(1, src.rows); @@ -1338,7 +1315,7 @@ PARAM_TEST_CASE(histTestBase, MatType, MatType) ///////////////////////////calcHist/////////////////////////////////////// struct calcHist : histTestBase {}; -TEST_P(calcHist, Mat) +OCL_TEST_P(calcHist, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -1372,13 +1349,12 @@ PARAM_TEST_CASE(CLAHE, cv::Size, double) gridSize = GET_PARAM(0); clipLimit = GET_PARAM(1); - cv::RNG &rng = TS::ptr()->get_rng(); - src = randomMat(rng, cv::Size(MWIDTH, MHEIGHT), CV_8UC1, 0, 256, false); + src = randomMat(cv::Size(MWIDTH, MHEIGHT), CV_8UC1, 0, 256, false); g_src.upload(src); } }; -TEST_P(CLAHE, Accuracy) +OCL_TEST_P(CLAHE, Accuracy) { cv::Ptr clahe = cv::ocl::createCLAHE(clipLimit, gridSize); clahe->apply(g_src, g_dst); @@ -1425,19 +1401,15 @@ PARAM_TEST_CASE(ConvolveTestBase, MatType, bool) { type = GET_PARAM(0); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - mat1 = randomMat(rng, size, type, 5, 16, false); - mat2 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dst1 = randomMat(rng, size, type, 5, 16, false); + mat1 = randomMat(size, type, 5, 16, false); + mat2 = randomMat(size, type, 5, 16, false); + dst = randomMat(size, type, 5, 16, false); + dst1 = randomMat(size, type, 5, 16, false); } void random_roi() { - cv::RNG &rng = TS::ptr()->get_rng(); - #ifdef RANDOMROI //randomize ROI roicols = rng.uniform(1, mat1.cols); @@ -1505,7 +1477,7 @@ void conv2( cv::Mat x, cv::Mat y, cv::Mat z) dstdata[i * (z.step >> 2) + j] = temp; } } -TEST_P(Convolve, Mat) +OCL_TEST_P(Convolve, Mat) { if(mat1.type() != CV_32FC1) { @@ -1540,9 +1512,9 @@ PARAM_TEST_CASE(ColumnSum, cv::Size) } }; -TEST_P(ColumnSum, Accuracy) +OCL_TEST_P(ColumnSum, Accuracy) { - cv::Mat src = randomMat(size, CV_32FC1); + cv::Mat src = randomMat(size, CV_32FC1, 0, 255); cv::ocl::oclMat d_dst; cv::ocl::oclMat d_src(src); diff --git a/modules/ocl/test/test_kalman.cpp b/modules/ocl/test/test_kalman.cpp index 13f9d0b81b..f02df6af71 100644 --- a/modules/ocl/test/test_kalman.cpp +++ b/modules/ocl/test/test_kalman.cpp @@ -43,7 +43,9 @@ //M*/ #include "test_precomp.hpp" + #ifdef HAVE_OPENCL + using namespace cv; using namespace cv::ocl; using namespace cvtest; @@ -51,6 +53,7 @@ using namespace testing; using namespace std; ////////////////////////////////////////////////////////////////////////// + PARAM_TEST_CASE(Kalman, int, int) { int size_; @@ -62,15 +65,13 @@ PARAM_TEST_CASE(Kalman, int, int) } }; -TEST_P(Kalman, Accuracy) +OCL_TEST_P(Kalman, Accuracy) { const int Dim = size_; const int Steps = iteration; const double max_init = 1; const double max_noise = 0.1; - cv::RNG &rng = TS::ptr()->get_rng(); - Mat sample_mat(Dim, 1, CV_32F), temp_mat; oclMat Sample(Dim, 1, CV_32F); oclMat Temp(Dim, 1, CV_32F); @@ -78,7 +79,7 @@ TEST_P(Kalman, Accuracy) Size size(Sample.cols, Sample.rows); - sample_mat = randomMat(rng, size, Sample.type(), -max_init, max_init, false); + sample_mat = randomMat(size, Sample.type(), -max_init, max_init, false); Sample.upload(sample_mat); //ocl start @@ -120,7 +121,7 @@ TEST_P(Kalman, Accuracy) cv::gemm(kalman_filter_cpu.transitionMatrix, sample_mat, 1, cv::Mat(), 0, Temp_cpu); Size size1(Temp.cols, Temp.rows); - Mat temp = randomMat(rng, size1, Temp.type(), 0, 0xffff, false); + Mat temp = randomMat(size1, Temp.type(), 0, 0xffff, false); cv::multiply(2, temp, temp); @@ -141,6 +142,7 @@ TEST_P(Kalman, Accuracy) //test end EXPECT_MAT_NEAR(kalman_filter_cpu.statePost, kalman_filter_ocl.statePost, 0); } + INSTANTIATE_TEST_CASE_P(OCL_Video, Kalman, Combine(Values(3, 7), Values(30))); -#endif // HAVE_OPENCL \ No newline at end of file +#endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_kmeans.cpp b/modules/ocl/test/test_kmeans.cpp index 008acfaa4f..5e4026694d 100644 --- a/modules/ocl/test/test_kmeans.cpp +++ b/modules/ocl/test/test_kmeans.cpp @@ -66,12 +66,11 @@ PARAM_TEST_CASE(Kmeans, int, int, int) Mat labels, centers; ocl::oclMat d_labels, d_centers; - cv::RNG rng ; - virtual void SetUp(){ + virtual void SetUp() + { K = GET_PARAM(0); type = GET_PARAM(1); flags = GET_PARAM(2); - rng = TS::ptr()->get_rng(); // MWIDTH=256, MHEIGHT=256. defined in utility.hpp cv::Size size = cv::Size(MWIDTH, MHEIGHT); @@ -92,14 +91,14 @@ PARAM_TEST_CASE(Kmeans, int, int, int) { Mat cur_row_header = src.row(row_idx + 1 + j); center_row_header.copyTo(cur_row_header); - Mat tmpmat = randomMat(rng, cur_row_header.size(), cur_row_header.type(), -200, 200, false); + Mat tmpmat = randomMat(cur_row_header.size(), cur_row_header.type(), -200, 200, false); cur_row_header += tmpmat; } row_idx += 1 + max_neighbour; } } }; -TEST_P(Kmeans, Mat){ +OCL_TEST_P(Kmeans, Mat){ if(flags & KMEANS_USE_INITIAL_LABELS) { diff --git a/modules/ocl/test/test_match_template.cpp b/modules/ocl/test/test_match_template.cpp index 651d34b81b..0c2e9bd4e0 100644 --- a/modules/ocl/test/test_match_template.cpp +++ b/modules/ocl/test/test_match_template.cpp @@ -70,10 +70,10 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho } }; -TEST_P(MatchTemplate8U, Accuracy) +OCL_TEST_P(MatchTemplate8U, Accuracy) { - cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn)); - cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn)); + cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn), 0, 255); + cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn), 0, 255); cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ); cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); @@ -103,10 +103,10 @@ PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMeth } }; -TEST_P(MatchTemplate32F, Accuracy) +OCL_TEST_P(MatchTemplate32F, Accuracy) { - cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn)); - cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn)); + cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn), 0, 255); + cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn), 0, 255); cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ); cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); diff --git a/modules/ocl/test/test_matrix_operation.cpp b/modules/ocl/test/test_matrix_operation.cpp index bc8cdf2bb3..0b79d88906 100644 --- a/modules/ocl/test/test_matrix_operation.cpp +++ b/modules/ocl/test/test_matrix_operation.cpp @@ -90,10 +90,8 @@ PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType, int, bool) use_roi = GET_PARAM(3); - cv::RNG &rng = TS::ptr()->get_rng(); - - mat = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), src_type, 5, 136, false); - dst = randomMat(rng, use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : mat.size(), dst_type, 5, 136, false); + mat = randomMat(randomSize(MIN_VALUE, MAX_VALUE), src_type, 5, 136, false); + dst = randomMat(use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : mat.size(), dst_type, 5, 136, false); } void random_roi() @@ -101,7 +99,6 @@ PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType, int, bool) if (use_roi) { // randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, MIN_VALUE); roirows = rng.uniform(1, MIN_VALUE); srcx = rng.uniform(0, mat.cols - roicols); @@ -129,7 +126,7 @@ PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType, int, bool) typedef ConvertToTestBase ConvertTo; -TEST_P(ConvertTo, Accuracy) +OCL_TEST_P(ConvertTo, Accuracy) { if((src_depth == CV_64F || dst_depth == CV_64F) && !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) @@ -178,11 +175,9 @@ PARAM_TEST_CASE(CopyToTestBase, MatType, int, bool) int type = CV_MAKETYPE(GET_PARAM(0), GET_PARAM(1)); use_roi = GET_PARAM(2); - cv::RNG &rng = TS::ptr()->get_rng(); - - src = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); - dst = randomMat(rng, use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), type, 5, 16, false); - mask = randomMat(rng, use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), CV_8UC1, 0, 2, false); + src = randomMat(randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); + dst = randomMat(use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), type, 5, 16, false); + mask = randomMat(use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), CV_8UC1, 0, 2, false); cv::threshold(mask, mask, 0.5, 255., CV_8UC1); } @@ -192,7 +187,6 @@ PARAM_TEST_CASE(CopyToTestBase, MatType, int, bool) if (use_roi) { // randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, MIN_VALUE); roirows = rng.uniform(1, MIN_VALUE); srcx = rng.uniform(0, src.cols - roicols); @@ -225,7 +219,7 @@ PARAM_TEST_CASE(CopyToTestBase, MatType, int, bool) typedef CopyToTestBase CopyTo; -TEST_P(CopyTo, Without_mask) +OCL_TEST_P(CopyTo, Without_mask) { if((src.depth() == CV_64F) && !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) @@ -243,7 +237,7 @@ TEST_P(CopyTo, Without_mask) } } -TEST_P(CopyTo, With_mask) +OCL_TEST_P(CopyTo, With_mask) { if(src.depth() == CV_64F && !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) @@ -295,11 +289,10 @@ PARAM_TEST_CASE(SetToTestBase, MatType, int, bool) channels = GET_PARAM(1); use_roi = GET_PARAM(2); - cv::RNG &rng = TS::ptr()->get_rng(); int type = CV_MAKE_TYPE(depth, channels); - src = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); - mask = randomMat(rng, use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), CV_8UC1, 0, 2, false); + src = randomMat(randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); + mask = randomMat(use_roi ? randomSize(MIN_VALUE, MAX_VALUE) : src.size(), CV_8UC1, 0, 2, false); cv::threshold(mask, mask, 0.5, 255., CV_8UC1); val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), @@ -311,7 +304,6 @@ PARAM_TEST_CASE(SetToTestBase, MatType, int, bool) if (use_roi) { // randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, MIN_VALUE); roirows = rng.uniform(1, MIN_VALUE); srcx = rng.uniform(0, src.cols - roicols); @@ -339,7 +331,7 @@ PARAM_TEST_CASE(SetToTestBase, MatType, int, bool) typedef SetToTestBase SetTo; -TEST_P(SetTo, Without_mask) +OCL_TEST_P(SetTo, Without_mask) { if(depth == CV_64F && !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) @@ -357,7 +349,7 @@ TEST_P(SetTo, Without_mask) } } -TEST_P(SetTo, With_mask) +OCL_TEST_P(SetTo, With_mask) { if(depth == CV_64F && !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) @@ -401,8 +393,7 @@ PARAM_TEST_CASE(convertC3C4, MatType, bool) use_roi = GET_PARAM(1); int type = CV_MAKE_TYPE(depth, 3); - cv::RNG &rng = TS::ptr()->get_rng(); - src = randomMat(rng, randomSize(1, MAX_VALUE), type, 0, 40, false); + src = randomMat(randomSize(1, MAX_VALUE), type, 0, 40, false); } void random_roi() @@ -410,7 +401,6 @@ PARAM_TEST_CASE(convertC3C4, MatType, bool) if (use_roi) { //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, src.cols); roirows = rng.uniform(1, src.rows); srcx = rng.uniform(0, src.cols - roicols); @@ -427,7 +417,7 @@ PARAM_TEST_CASE(convertC3C4, MatType, bool) } }; -TEST_P(convertC3C4, Accuracy) +OCL_TEST_P(convertC3C4, Accuracy) { if(depth == CV_64F && !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) diff --git a/modules/ocl/test/test_ml.cpp b/modules/ocl/test/test_ml.cpp index af86d35a65..76940fff05 100644 --- a/modules/ocl/test/test_ml.cpp +++ b/modules/ocl/test/test_ml.cpp @@ -44,16 +44,19 @@ //M*/ #include "test_precomp.hpp" + #ifdef HAVE_OPENCL + using namespace cv; using namespace cv::ocl; using namespace cvtest; using namespace testing; + ///////K-NEAREST NEIGHBOR////////////////////////// -static void genTrainData(Mat& trainData, int trainDataRow, int trainDataCol, + +static void genTrainData(cv::RNG& rng, Mat& trainData, int trainDataRow, int trainDataCol, Mat& trainLabel = Mat().setTo(Scalar::all(0)), int nClasses = 0) { - cv::RNG &rng = TS::ptr()->get_rng(); cv::Size size(trainDataCol, trainDataRow); trainData = randomMat(rng, size, CV_32FC1, 1.0, 1000.0, false); if(nClasses != 0) @@ -81,14 +84,14 @@ PARAM_TEST_CASE(KNN, int, Size, int, bool) } }; -TEST_P(KNN, Accuracy) +OCL_TEST_P(KNN, Accuracy) { Mat trainData, trainLabels; const int trainDataRow = 500; - genTrainData(trainData, trainDataRow, trainDataCol, trainLabels, nClass); + genTrainData(rng, trainData, trainDataRow, trainDataCol, trainLabels, nClass); Mat testData, testLabels; - genTrainData(testData, testDataRow, trainDataCol); + genTrainData(rng, testData, testDataRow, trainDataCol); KNearestNeighbour knn_ocl; CvKNearest knn_cpu; @@ -119,10 +122,14 @@ TEST_P(KNN, Accuracy) EXPECT_MAT_NEAR(Mat(best_label_ocl), best_label_cpu, 0.0); } } + INSTANTIATE_TEST_CASE_P(OCL_ML, KNN, Combine(Values(6, 5), Values(Size(200, 400), Size(300, 600)), Values(4, 3), Values(false, true))); +#ifdef HAVE_CLAMDBLAS // TODO does not work non-blas version of SVM + ////////////////////////////////SVM///////////////////////////////////////////////// + PARAM_TEST_CASE(SVM_OCL, int, int, int) { cv::Size size; @@ -130,7 +137,6 @@ PARAM_TEST_CASE(SVM_OCL, int, int, int) int svm_type; Mat src, labels, samples, labels_predict; int K; - cv::RNG rng ; virtual void SetUp() { @@ -138,7 +144,6 @@ PARAM_TEST_CASE(SVM_OCL, int, int, int) kernel_type = GET_PARAM(0); svm_type = GET_PARAM(1); K = GET_PARAM(2); - rng = TS::ptr()->get_rng(); cv::Size size = cv::Size(MWIDTH, MHEIGHT); src.create(size, CV_32FC1); labels.create(1, size.height, CV_32SC1); @@ -160,7 +165,7 @@ PARAM_TEST_CASE(SVM_OCL, int, int, int) { Mat cur_row_header = src.row(row_idx + 1 + j); center_row_header.copyTo(cur_row_header); - Mat tmpmat = randomMat(rng, cur_row_header.size(), cur_row_header.type(), 1, 100, false); + Mat tmpmat = randomMat(cur_row_header.size(), cur_row_header.type(), 1, 100, false); cur_row_header += tmpmat; labels.at(0, row_idx + 1 + j) = i; } @@ -187,7 +192,7 @@ PARAM_TEST_CASE(SVM_OCL, int, int, int) { Mat cur_row_header = samples.row(row_idx + 1 + j); center_row_header.copyTo(cur_row_header); - Mat tmpmat = randomMat(rng, cur_row_header.size(), cur_row_header.type(), 1, 100, false); + Mat tmpmat = randomMat(cur_row_header.size(), cur_row_header.type(), 1, 100, false); cur_row_header += tmpmat; labels_predict.at(0, row_idx + 1 + j) = i; } @@ -196,7 +201,8 @@ PARAM_TEST_CASE(SVM_OCL, int, int, int) labels_predict.convertTo(labels_predict, CV_32FC1); } }; -TEST_P(SVM_OCL, Accuracy) + +OCL_TEST_P(SVM_OCL, Accuracy) { CvSVMParams params; params.degree = 0.4; @@ -292,9 +298,16 @@ TEST_P(SVM_OCL, Accuracy) } } } + +// TODO FIXIT: CvSVM::EPS_SVR case is crashed inside CPU implementation +// Anonymous enums are not supported well so cast them to 'int' + INSTANTIATE_TEST_CASE_P(OCL_ML, SVM_OCL, testing::Combine( - Values(CvSVM::LINEAR, CvSVM::POLY, CvSVM::RBF, CvSVM::SIGMOID), - Values(CvSVM::C_SVC, CvSVM::NU_SVC, CvSVM::ONE_CLASS, CvSVM::EPS_SVR, CvSVM::NU_SVR), + Values((int)CvSVM::LINEAR, (int)CvSVM::POLY, (int)CvSVM::RBF, (int)CvSVM::SIGMOID), + Values((int)CvSVM::C_SVC, (int)CvSVM::NU_SVC, (int)CvSVM::ONE_CLASS, (int)CvSVM::NU_SVR), Values(2, 3, 4) )); + +#endif // HAVE_CLAMDBLAS + #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_moments.cpp b/modules/ocl/test/test_moments.cpp index f9bc4afaa2..7118609f54 100644 --- a/modules/ocl/test/test_moments.cpp +++ b/modules/ocl/test/test_moments.cpp @@ -7,7 +7,6 @@ using namespace cv; using namespace cv::ocl; using namespace cvtest; using namespace testing; - PARAM_TEST_CASE(MomentsTest, MatType, bool) { int type; @@ -18,9 +17,8 @@ PARAM_TEST_CASE(MomentsTest, MatType, bool) { type = GET_PARAM(0); test_contours = GET_PARAM(1); - cv::RNG &rng = TS::ptr()->get_rng(); cv::Size size(10*MWIDTH, 10*MHEIGHT); - mat1 = randomMat(rng, size, type, 5, 16, false); + mat1 = randomMat(size, type, 5, 16, false); } void Compare(Moments& cpu_moments, Moments& gpu_moments) @@ -34,10 +32,9 @@ PARAM_TEST_CASE(MomentsTest, MatType, bool) }; -TEST_P(MomentsTest, Mat) +OCL_TEST_P(MomentsTest, Mat) { bool binaryImage = 0; - SetUp(); for(int j = 0; j < LOOP_TIMES; j++) { diff --git a/modules/ocl/test/test_objdetect.cpp b/modules/ocl/test/test_objdetect.cpp index 295ae642b0..34ac4f5e98 100644 --- a/modules/ocl/test/test_objdetect.cpp +++ b/modules/ocl/test/test_objdetect.cpp @@ -49,8 +49,6 @@ using namespace cv; using namespace testing; -#ifdef HAVE_OPENCL - ///////////////////// HOG ///////////////////////////// PARAM_TEST_CASE(HOG, Size, int) { @@ -66,7 +64,7 @@ PARAM_TEST_CASE(HOG, Size, int) } }; -TEST_P(HOG, GetDescriptors) +OCL_TEST_P(HOG, GetDescriptors) { // Convert image Mat img; @@ -112,7 +110,7 @@ TEST_P(HOG, GetDescriptors) EXPECT_MAT_SIMILAR(down_descriptors, cpu_descriptors, 1e-2); } -TEST_P(HOG, Detect) +OCL_TEST_P(HOG, Detect) { // Convert image Mat img; @@ -210,13 +208,14 @@ PARAM_TEST_CASE(Haar, int, CascadeName) } }; -TEST_P(Haar, FaceDetect) +OCL_TEST_P(Haar, FaceDetect) { cascade.detectMultiScale(d_img, oclfaces, 1.1, 3, flags, Size(30, 30)); - cpucascade.detectMultiScale(img, faces, 1.1, 3, - flags, Size(30, 30)); + cpucascade.detectMultiScale(img, faces, 1.1, 3, + flags, + Size(30, 30), Size(0, 0)); EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0); } @@ -224,6 +223,3 @@ TEST_P(Haar, FaceDetect) INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, Haar, Combine(Values((int)CASCADE_SCALE_IMAGE, 0), Values(cascade_frontalface_alt, cascade_frontalface_alt2))); - - -#endif //HAVE_OPENCL diff --git a/modules/ocl/test/test_optflow.cpp b/modules/ocl/test/test_optflow.cpp index 5d1817e913..da38fe4716 100644 --- a/modules/ocl/test/test_optflow.cpp +++ b/modules/ocl/test/test_optflow.cpp @@ -52,6 +52,7 @@ using namespace cv; using namespace cv::ocl; using namespace cvtest; using namespace testing; +using namespace std; ////////////////////////////////////////////////////// // GoodFeaturesToTrack @@ -69,7 +70,7 @@ PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance) } }; -TEST_P(GoodFeaturesToTrack, Accuracy) +OCL_TEST_P(GoodFeaturesToTrack, Accuracy) { cv::Mat frame = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame.empty()); @@ -110,7 +111,7 @@ TEST_P(GoodFeaturesToTrack, Accuracy) ASSERT_LE(bad_ratio, 0.01); } -TEST_P(GoodFeaturesToTrack, EmptyCorners) +OCL_TEST_P(GoodFeaturesToTrack, EmptyCorners) { int maxCorners = 1000; double qualityLevel = 0.01; @@ -140,7 +141,7 @@ PARAM_TEST_CASE(TVL1, bool) }; -TEST_P(TVL1, DISABLED_Accuracy) // TODO implementations of TV1 in video module are different in 2.4 and master branches +OCL_TEST_P(TVL1, DISABLED_Accuracy) // TODO implementations of TV1 in video module are different in 2.4 and master branches { cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame0.empty()); @@ -149,9 +150,8 @@ TEST_P(TVL1, DISABLED_Accuracy) // TODO implementations of TV1 in video module a ASSERT_FALSE(frame1.empty()); cv::ocl::OpticalFlowDual_TVL1_OCL d_alg; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Mat flowx = randomMat(rng, frame0.size(), CV_32FC1, 0, 0, useRoi); - cv::Mat flowy = randomMat(rng, frame0.size(), CV_32FC1, 0, 0, useRoi); + cv::Mat flowx = randomMat(frame0.size(), CV_32FC1, 0, 0, useRoi); + cv::Mat flowy = randomMat(frame0.size(), CV_32FC1, 0, 0, useRoi); cv::ocl::oclMat d_flowx(flowx), d_flowy(flowy); d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy); @@ -182,7 +182,7 @@ PARAM_TEST_CASE(Sparse, bool, bool) } }; -TEST_P(Sparse, Mat) +OCL_TEST_P(Sparse, Mat) { cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); ASSERT_FALSE(frame0.empty()); @@ -292,7 +292,7 @@ PARAM_TEST_CASE(Farneback, PyrScale, PolyN, FarnebackOptFlowFlags, UseInitFlow) } }; -TEST_P(Farneback, Accuracy) +OCL_TEST_P(Farneback, Accuracy) { cv::Mat frame0 = readImage("gpu/opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame0.empty()); diff --git a/modules/ocl/test/test_precomp.hpp b/modules/ocl/test/test_precomp.hpp index 743fa96759..af467f5b88 100644 --- a/modules/ocl/test/test_precomp.hpp +++ b/modules/ocl/test/test_precomp.hpp @@ -72,4 +72,6 @@ #include "opencv2/core/private.hpp" +using namespace cvtest; + #endif diff --git a/modules/ocl/test/test_pyramids.cpp b/modules/ocl/test/test_pyramids.cpp index 9070ee5aa7..2d1dd33599 100644 --- a/modules/ocl/test/test_pyramids.cpp +++ b/modules/ocl/test/test_pyramids.cpp @@ -74,12 +74,12 @@ PARAM_TEST_CASE(PyrBase, MatType, int) typedef PyrBase PyrDown; -TEST_P(PyrDown, Mat) +OCL_TEST_P(PyrDown, Mat) { for (int j = 0; j < LOOP_TIMES; j++) { Size size(MWIDTH, MHEIGHT); - Mat src = randomMat(size, CV_MAKETYPE(depth, channels)); + Mat src = randomMat(size, CV_MAKETYPE(depth, channels), 0, 255); oclMat gsrc(src); pyrDown(src, dst_cpu); @@ -97,12 +97,12 @@ INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine( typedef PyrBase PyrUp; -TEST_P(PyrUp, Accuracy) +OCL_TEST_P(PyrUp, Accuracy) { for (int j = 0; j < LOOP_TIMES; j++) { Size size(MWIDTH, MHEIGHT); - Mat src = randomMat(size, CV_MAKETYPE(depth, channels)); + Mat src = randomMat(size, CV_MAKETYPE(depth, channels), 0, 255); oclMat gsrc(src); pyrUp(src, dst_cpu); diff --git a/modules/ocl/test/test_sort.cpp b/modules/ocl/test/test_sort.cpp index d303665568..5a7d4a3004 100644 --- a/modules/ocl/test/test_sort.cpp +++ b/modules/ocl/test/test_sort.cpp @@ -229,7 +229,7 @@ PARAM_TEST_CASE(SortByKey, InputSize, MatType, MatType, SortMethod, IsGreaterTha } }; -TEST_P(SortByKey, Accuracy) +OCL_TEST_P(SortByKey, Accuracy) { using namespace cv; ocl::oclMat oclmat_key(mat_key); diff --git a/modules/ocl/test/test_split_merge.cpp b/modules/ocl/test/test_split_merge.cpp index 52db49b028..3cbd65de32 100644 --- a/modules/ocl/test/test_split_merge.cpp +++ b/modules/ocl/test/test_split_merge.cpp @@ -90,12 +90,11 @@ PARAM_TEST_CASE(MergeTestBase, MatType, int, bool) channels = GET_PARAM(1); use_roi = GET_PARAM(2); - cv::RNG &rng = TS::ptr()->get_rng(); cv::Size size(MWIDTH, MHEIGHT); for (int i = 0; i < channels; ++i) - mat[i] = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); + mat[i] = randomMat(size, CV_MAKETYPE(type, 1), 5, 16, false); + dst = randomMat(size, CV_MAKETYPE(type, channels), 5, 16, false); } void random_roi() @@ -103,7 +102,6 @@ PARAM_TEST_CASE(MergeTestBase, MatType, int, bool) if (use_roi) { //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, mat[0].cols); roirows = rng.uniform(1, mat[0].rows); @@ -141,7 +139,7 @@ PARAM_TEST_CASE(MergeTestBase, MatType, int, bool) struct Merge : MergeTestBase {}; -TEST_P(Merge, Accuracy) +OCL_TEST_P(Merge, Accuracy) { for(int j = 0; j < LOOP_TIMES; j++) { @@ -191,19 +189,17 @@ PARAM_TEST_CASE(SplitTestBase, MatType, int, bool) channels = GET_PARAM(1); use_roi = GET_PARAM(2); - cv::RNG &rng = TS::ptr()->get_rng(); cv::Size size(MWIDTH, MHEIGHT); - mat = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); + mat = randomMat(size, CV_MAKETYPE(type, channels), 5, 16, false); for (int i = 0; i < channels; ++i) - dst[i] = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); } + dst[i] = randomMat(size, CV_MAKETYPE(type, 1), 5, 16, false); } void random_roi() { if (use_roi) { //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, mat.cols); roirows = rng.uniform(1, mat.rows); srcx = rng.uniform(0, mat.cols - roicols); @@ -242,7 +238,7 @@ PARAM_TEST_CASE(SplitTestBase, MatType, int, bool) struct Split : SplitTestBase {}; -TEST_P(Split, Accuracy) +OCL_TEST_P(Split, Accuracy) { for(int j = 0; j < LOOP_TIMES; j++) { diff --git a/modules/ocl/test/utility.cpp b/modules/ocl/test/utility.cpp index d169ec3b26..6e519991d7 100644 --- a/modules/ocl/test/utility.cpp +++ b/modules/ocl/test/utility.cpp @@ -45,7 +45,7 @@ using namespace std; using namespace cv; using namespace cvtest; - +namespace cvtest { //std::string generateVarList(int first,...) //{ // vector varname; @@ -72,41 +72,14 @@ using namespace cvtest; // return ss.str(); //}; -int randomInt(int minVal, int maxVal) -{ - RNG &rng = TS::ptr()->get_rng(); - return rng.uniform(minVal, maxVal); -} - -double randomDouble(double minVal, double maxVal) -{ - RNG &rng = TS::ptr()->get_rng(); - return rng.uniform(minVal, maxVal); -} - -Size randomSize(int minVal, int maxVal) -{ - return cv::Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal)); -} - -Scalar randomScalar(double minVal, double maxVal) -{ - return Scalar(randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal)); -} - -Mat randomMat(Size size, int type, double minVal, double maxVal) -{ - return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false); -} - -cv::ocl::oclMat createMat_ocl(Size size, int type, bool useRoi) +cv::ocl::oclMat createMat_ocl(cv::RNG& rng, Size size, int type, bool useRoi) { Size size0 = size; if (useRoi) { - size0.width += randomInt(5, 15); - size0.height += randomInt(5, 15); + size0.width += rng.uniform(5, 15); + size0.height += rng.uniform(5, 15); } cv::ocl::oclMat d_m(size0, type); @@ -117,11 +90,11 @@ cv::ocl::oclMat createMat_ocl(Size size, int type, bool useRoi) return d_m; } -cv::ocl::oclMat loadMat_ocl(const Mat& m, bool useRoi) +cv::ocl::oclMat loadMat_ocl(cv::RNG& rng, const Mat& m, bool useRoi) { CV_Assert(m.type() == CV_8UC1 || m.type() == CV_8UC3); cv::ocl::oclMat d_m; - d_m = createMat_ocl(m.size(), m.type(), useRoi); + d_m = createMat_ocl(rng, m.size(), m.type(), useRoi); Size ls; Point pt; @@ -137,38 +110,6 @@ cv::ocl::oclMat loadMat_ocl(const Mat& m, bool useRoi) m_ocl.copyTo(d_m); return d_m; } -/* -void showDiff(InputArray gold_, InputArray actual_, double eps) -{ - Mat gold; - if (gold_.kind() == _InputArray::MAT) - gold = gold_.getMat(); - else - gold_.getGpuMat().download(gold); - - Mat actual; - if (actual_.kind() == _InputArray::MAT) - actual = actual_.getMat(); - else - actual_.getGpuMat().download(actual); - - Mat diff; - absdiff(gold, actual, diff); - threshold(diff, diff, eps, 255.0, cv::THRESH_BINARY); - - namedWindow("gold", WINDOW_NORMAL); - namedWindow("actual", WINDOW_NORMAL); - namedWindow("diff", WINDOW_NORMAL); - - imshow("gold", gold); - imshow("actual", actual); - imshow("diff", diff); - - waitKey(); -} -*/ - - vector types(int depth_start, int depth_end, int cn_start, int cn_end) { @@ -288,3 +229,5 @@ double checkRectSimilarity(Size sz, std::vector& ob1, std::vector& o } return final_test_result; } + +} // namespace cvtest diff --git a/modules/ocl/test/utility.hpp b/modules/ocl/test/utility.hpp index 3494c6f9fe..50e351019b 100644 --- a/modules/ocl/test/utility.hpp +++ b/modules/ocl/test/utility.hpp @@ -52,16 +52,12 @@ #define MIN_VALUE 171 #define MAX_VALUE 357 -//#define RANDOMROI -int randomInt(int minVal, int maxVal); -double randomDouble(double minVal, double maxVal); -//std::string generateVarList(int first,...); -std::string generateVarList(int &p1, int &p2); -cv::Size randomSize(int minVal, int maxVal); -cv::Scalar randomScalar(double minVal, double maxVal); -cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0); +namespace cvtest { -void showDiff(cv::InputArray gold, cv::InputArray actual, double eps); +//void showDiff(cv::InputArray gold, cv::InputArray actual, double eps); + +cv::ocl::oclMat createMat_ocl(cv::RNG& rng, Size size, int type, bool useRoi); +cv::ocl::oclMat loadMat_ocl(cv::RNG& rng, const Mat& m, bool useRoi); // This function test if gpu_rst matches cpu_rst. // If the two vectors are not equal, it will return the difference in vector size @@ -78,10 +74,6 @@ double checkNorm(const cv::Mat &m); double checkNorm(const cv::Mat &m1, const cv::Mat &m2); double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2); -//oclMat create -cv::ocl::oclMat createMat_ocl(cv::Size size, int type, bool useRoi = false); -cv::ocl::oclMat loadMat_ocl(const cv::Mat& m, bool useRoi = false); - #define EXPECT_MAT_NORM(mat, eps) \ { \ EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \ @@ -101,13 +93,6 @@ cv::ocl::oclMat loadMat_ocl(const cv::Mat& m, bool useRoi = false); EXPECT_LE(checkSimilarity(cv::Mat(mat1), cv::Mat(mat2)), eps); \ } -namespace cv -{ - namespace ocl - { - // void PrintTo(const DeviceInfo& info, std::ostream* os); - } -} using perf::MatDepth; using perf::MatType; @@ -134,79 +119,105 @@ private: void PrintTo(const Inverse &useRoi, std::ostream *os); -enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1}; -CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y) +#define OCL_RNG_SEED 123456 -CV_ENUM(CmpCode, CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE) -CV_ENUM(NormCode, NORM_INF, NORM_L1, NORM_L2, NORM_TYPE_MASK, NORM_RELATIVE, NORM_MINMAX) -CV_ENUM(ReduceOp, REDUCE_SUM, REDUCE_AVG, REDUCE_MAX, REDUCE_MIN) -CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BLACKHAT) -CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV) -CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC) -CV_ENUM(Border, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP) -CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED) +template +struct TSTestWithParam : public ::testing::TestWithParam +{ + cv::RNG rng; -CV_FLAGS(GemmFlags, GEMM_1_T, GEMM_2_T, GEMM_3_T); -CV_FLAGS(WarpFlags, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, WARP_INVERSE_MAP) -CV_FLAGS(DftFlags, DFT_INVERSE, DFT_SCALE, DFT_ROWS, DFT_COMPLEX_OUTPUT, DFT_REAL_OUTPUT) + TSTestWithParam() + { + rng = cv::RNG(OCL_RNG_SEED); + } -void run_perf_test(); + int randomInt(int minVal, int maxVal) + { + return rng.uniform(minVal, maxVal); + } -#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > > + double randomDouble(double minVal, double maxVal) + { + return rng.uniform(minVal, maxVal); + } -#define GET_PARAM(k) std::tr1::get< k >(GetParam()) + double randomDoubleLog(double minVal, double maxVal) + { + double logMin = log((double)minVal + 1); + double logMax = log((double)maxVal + 1); + double pow = rng.uniform(logMin, logMax); + double v = exp(pow) - 1; + CV_Assert(v >= minVal && (v < maxVal || (v == minVal && v == maxVal))); + return v; + } -#define ALL_DEVICES testing::ValuesIn(devices()) -#define DEVICES(feature) testing::ValuesIn(devices(feature)) + Size randomSize(int minVal, int maxVal) + { +#if 1 + return cv::Size((int)randomDoubleLog(minVal, maxVal), (int)randomDoubleLog(minVal, maxVal)); +#else + return cv::Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal)); +#endif + } -#define ALL_TYPES testing::ValuesIn(all_types()) -#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end)) + Size randomSize(int minValX, int maxValX, int minValY, int maxValY) + { +#if 1 + return cv::Size(randomDoubleLog(minValX, maxValX), randomDoubleLog(minValY, maxValY)); +#else + return cv::Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal)); +#endif + } -#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)) + Scalar randomScalar(double minVal, double maxVal) + { + return Scalar(randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal)); + } -#define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true)) + Mat randomMat(Size size, int type, double minVal, double maxVal, bool useRoi = false) + { + RNG dataRng(rng.next()); + return cvtest::randomMat(dataRng, size, type, minVal, maxVal, useRoi); + } -#ifndef ALL_DEPTH -#define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F), MatDepth(CV_64F)) -#endif -#define REPEAT 1000 -#define COUNT_U 0 // count the uploading execution time for ocl mat structures -#define COUNT_D 0 -// the following macro section tests the target function (kernel) performance -// upload is the code snippet for converting cv::mat to cv::ocl::oclMat -// downloading is the code snippet for converting cv::ocl::oclMat back to cv::mat -// change COUNT_U and COUNT_D to take downloading and uploading time into account -#define P_TEST_FULL( upload, kernel_call, download ) \ -{ \ - std::cout<< "\n" #kernel_call "\n----------------------"; \ - {upload;} \ - R_TEST( kernel_call, 2 ); \ - double t = (double)cvGetTickCount(); \ - R_T( { \ - if( COUNT_U ) {upload;} \ - kernel_call; \ - if( COUNT_D ) {download;} \ - } ); \ - t = (double)cvGetTickCount() - t; \ - std::cout << "runtime is " << t/((double)cvGetTickFrequency()* 1000.) << "ms" << std::endl; \ -} + struct Border + { + int top, bot, lef, rig; + }; -#define R_T2( test ) \ -{ \ - std::cout<< "\n" #test "\n----------------------"; \ - R_TEST( test, 15 ) \ - clock_t st = clock(); \ - R_T( test ) \ - std::cout<< clock() - st << "ms\n"; \ -} -#define R_T( test ) \ - R_TEST( test, REPEAT ) -#define R_TEST( test, repeat ) \ - try{ \ - for( int i = 0; i < repeat; i ++ ) { test; } \ - } catch( ... ) { std::cout << "||||| Exception catched! |||||\n"; return; } + Border randomBorder(int minValue = 0, int maxValue = MAX_VALUE) + { + Border border = { + (int)randomDoubleLog(minValue, maxValue), + (int)randomDoubleLog(minValue, maxValue), + (int)randomDoubleLog(minValue, maxValue), + (int)randomDoubleLog(minValue, maxValue) + }; + return border; + } + + void randomSubMat(Mat& whole, Mat& subMat, const Size& roiSize, const Border& border, int type, double minVal, double maxVal) + { + Size wholeSize = Size(roiSize.width + border.lef + border.rig, roiSize.height + border.top + border.bot); + whole = randomMat(wholeSize, type, minVal, maxVal, false); + subMat = whole(Rect(border.lef, border.top, roiSize.width, roiSize.height)); + } + + void generateOclMat(cv::ocl::oclMat& whole, cv::ocl::oclMat& subMat, const Mat& wholeMat, const Size& roiSize, const Border& border) + { + whole = wholeMat; + subMat = whole(Rect(border.lef, border.top, roiSize.width, roiSize.height)); + } +}; -//////// Utility +#define PARAM_TEST_CASE(name, ...) struct name : public TSTestWithParam< std::tr1::tuple< __VA_ARGS__ > > + +#define GET_PARAM(k) std::tr1::get< k >(GetParam()) + +#define ALL_TYPES testing::ValuesIn(all_types()) +#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end)) + +#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)) #define IMAGE_CHANNELS testing::Values(Channels(1), Channels(3), Channels(4)) #ifndef IMPLEMENT_PARAM_CLASS @@ -227,4 +238,70 @@ void run_perf_test(); IMPLEMENT_PARAM_CLASS(Channels, int) #endif // IMPLEMENT_PARAM_CLASS +} // namespace cvtest + +enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1}; +CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y) + +CV_ENUM(CmpCode, CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE) +CV_ENUM(NormCode, NORM_INF, NORM_L1, NORM_L2, NORM_TYPE_MASK, NORM_RELATIVE, NORM_MINMAX) +CV_ENUM(ReduceOp, REDUCE_SUM, REDUCE_AVG, REDUCE_MAX, REDUCE_MIN) +CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BLACKHAT) +CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV) +CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC) +CV_ENUM(Border, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP) +CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED) + +CV_FLAGS(GemmFlags, GEMM_1_T, GEMM_2_T, GEMM_3_T); +CV_FLAGS(WarpFlags, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, WARP_INVERSE_MAP) +CV_FLAGS(DftFlags, DFT_INVERSE, DFT_SCALE, DFT_ROWS, DFT_COMPLEX_OUTPUT, DFT_REAL_OUTPUT) + +# define OCL_TEST_P(test_case_name, test_name) \ + class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : \ + public test_case_name { \ + public: \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() { } \ + virtual void TestBody(); \ + void OCLTestBody(); \ + private: \ + static int AddToRegistry() \ + { \ + ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ + GetTestCasePatternHolder(\ + #test_case_name, __FILE__, __LINE__)->AddTestPattern(\ + #test_case_name, \ + #test_name, \ + new ::testing::internal::TestMetaFactory< \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \ + return 0; \ + } \ + \ + static int gtest_registering_dummy_; \ + GTEST_DISALLOW_COPY_AND_ASSIGN_(\ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \ + }; \ + \ + int GTEST_TEST_CLASS_NAME_(test_case_name, \ + test_name)::gtest_registering_dummy_ = \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \ + \ + void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() \ + { \ + try \ + { \ + OCLTestBody(); \ + } \ + catch (const cv::Exception & ex) \ + { \ + if (ex.code == cv::Error::OpenCLDoubleNotSupported)\ + std::cout << "Test skipped (selected device does not support double)" << std::endl; \ + else if (ex.code == cv::Error::OpenCLNoAMDBlasFft) \ + std::cout << "Test skipped (AMD Blas / Fft libraries are not available)" << std::endl; \ + else \ + throw; \ + } \ + } \ + \ + void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::OCLTestBody() + #endif // __OPENCV_TEST_UTILITY_HPP__ diff --git a/modules/python/CMakeLists.txt b/modules/python/CMakeLists.txt index d3bed1695b..e352ec419d 100644 --- a/modules/python/CMakeLists.txt +++ b/modules/python/CMakeLists.txt @@ -106,9 +106,28 @@ else() set(PYTHON_INSTALL_CONFIGURATIONS "") endif() -install(TARGETS ${the_module} - ${PYTHON_INSTALL_CONFIGURATIONS} - RUNTIME DESTINATION ${PYTHON_PACKAGES_PATH} COMPONENT main - LIBRARY DESTINATION ${PYTHON_PACKAGES_PATH} COMPONENT main - ARCHIVE DESTINATION ${PYTHON_PACKAGES_PATH} COMPONENT main - ) +if(WIN32) + set(PYTHON_INSTALL_ARCHIVE "") +else() + set(PYTHON_INSTALL_ARCHIVE ARCHIVE DESTINATION ${PYTHON_PACKAGES_PATH} COMPONENT main) +endif() + +if(NOT INSTALL_CREATE_DISTRIB) + install(TARGETS ${the_module} + ${PYTHON_INSTALL_CONFIGURATIONS} + RUNTIME DESTINATION ${PYTHON_PACKAGES_PATH} COMPONENT main + LIBRARY DESTINATION ${PYTHON_PACKAGES_PATH} COMPONENT main + ${PYTHON_INSTALL_ARCHIVE} + ) +else() + if(DEFINED PYTHON_VERSION_MAJOR) + set(__ver "${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}") + else() + set(__ver "unknown") + endif() + install(TARGETS ${the_module} + CONFIGURATIONS Release + RUNTIME DESTINATION python/${__ver}/${OpenCV_ARCH} COMPONENT main + LIBRARY DESTINATION python/${__ver}/${OpenCV_ARCH} COMPONENT main + ) +endif() diff --git a/modules/ts/include/opencv2/ts.hpp b/modules/ts/include/opencv2/ts.hpp index 5d98810145..348a1b4d1f 100644 --- a/modules/ts/include/opencv2/ts.hpp +++ b/modules/ts/include/opencv2/ts.hpp @@ -538,12 +538,23 @@ CV_EXPORTS void smoothBorder(Mat& img, const Scalar& color, int delta = 3); CV_EXPORTS void printVersionInfo(bool useStdOut = true); } //namespace cvtest -#define CV_TEST_MAIN(resourcesubdir) \ +#ifndef __CV_TEST_EXEC_ARGS +#if defined(_MSC_VER) && (_MSC_VER <= 1400) +#define __CV_TEST_EXEC_ARGS(...) \ + while (++argc >= (--argc,-1)) {__VA_ARGS__; break;} /*this ugly construction is needed for VS 2005*/ +#else +#define __CV_TEST_EXEC_ARGS(...) \ + __VA_ARGS__; +#endif +#endif + +#define CV_TEST_MAIN(resourcesubdir, ...) \ int main(int argc, char **argv) \ { \ cvtest::TS::ptr()->init(resourcesubdir); \ ::testing::InitGoogleTest(&argc, argv); \ - cvtest::printVersionInfo();\ + cvtest::printVersionInfo(); \ + __CV_TEST_EXEC_ARGS(__VA_ARGS__) \ return RUN_ALL_TESTS(); \ } diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp index 1982ca10d9..eff530cec5 100644 --- a/modules/ts/include/opencv2/ts/ts_perf.hpp +++ b/modules/ts/include/opencv2/ts/ts_perf.hpp @@ -241,9 +241,20 @@ typedef struct CV_EXPORTS performance_metrics }; performance_metrics(); + void clear(); } performance_metrics; +/*****************************************************************************************\ +* Strategy for performance measuring * +\*****************************************************************************************/ +enum PERF_STRATEGY +{ + PERF_STRATEGY_BASE = 0, + PERF_STRATEGY_SIMPLE = 1, +}; + + /*****************************************************************************************\ * Base fixture for performance tests * \*****************************************************************************************/ @@ -259,6 +270,9 @@ public: static std::string getDataPath(const std::string& relativePath); static std::string getSelectedImpl(); + static enum PERF_STRATEGY getPerformanceStrategy(); + static enum PERF_STRATEGY setPerformanceStrategy(enum PERF_STRATEGY strategy); + protected: virtual void PerfTestBody() = 0; @@ -471,23 +485,25 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os); INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\ void fixture##_##name::PerfTestBody() +#ifndef __CV_TEST_EXEC_ARGS #if defined(_MSC_VER) && (_MSC_VER <= 1400) -#define CV_PERF_TEST_MAIN_INTERNALS_ARGS(...) \ +#define __CV_TEST_EXEC_ARGS(...) \ while (++argc >= (--argc,-1)) {__VA_ARGS__; break;} /*this ugly construction is needed for VS 2005*/ #else -#define CV_PERF_TEST_MAIN_INTERNALS_ARGS(...) \ +#define __CV_TEST_EXEC_ARGS(...) \ __VA_ARGS__; #endif +#endif #define CV_PERF_TEST_MAIN_INTERNALS(modulename, impls, ...) \ - CV_PERF_TEST_MAIN_INTERNALS_ARGS(__VA_ARGS__) \ - ::perf::Regression::Init(#modulename);\ - ::perf::TestBase::Init(std::vector(impls, impls + sizeof impls / sizeof *impls),\ - argc, argv);\ - ::testing::InitGoogleTest(&argc, argv);\ - cvtest::printVersionInfo();\ - ::testing::Test::RecordProperty("cv_module_name", #modulename);\ - ::perf::TestBase::RecordRunParameters();\ + ::perf::Regression::Init(#modulename); \ + ::perf::TestBase::Init(std::vector(impls, impls + sizeof impls / sizeof *impls), \ + argc, argv); \ + ::testing::InitGoogleTest(&argc, argv); \ + cvtest::printVersionInfo(); \ + ::testing::Test::RecordProperty("cv_module_name", #modulename); \ + ::perf::TestBase::RecordRunParameters(); \ + __CV_TEST_EXEC_ARGS(__VA_ARGS__) \ return RUN_ALL_TESTS(); // impls must be an array, not a pointer; "plain" should always be one of the implementations diff --git a/modules/ts/misc/run.py b/modules/ts/misc/run.py index a64127f0d4..b09abb6175 100755 --- a/modules/ts/misc/run.py +++ b/modules/ts/misc/run.py @@ -7,6 +7,8 @@ from subprocess import Popen, PIPE hostos = os.name # 'nt', 'posix' hostmachine = platform.machine() # 'x86', 'AMD64', 'x86_64' +errorCode = 0 + SIMD_DETECTION_PROGRAM=""" #if __SSE5__ # error SSE5 @@ -641,6 +643,8 @@ class TestSuite(object): return True def runTest(self, path, workingDir, _stdout, _stderr, args = []): + global errorCode + if self.error: return args = args[:] @@ -759,9 +763,9 @@ class TestSuite(object): print >> _stderr, "Run command:", " ".join(cmd) try: - Popen(cmd, stdout=_stdout, stderr=_stderr, cwd = self.java_test_binary_dir + "/.build").wait() - except OSError: - pass + errorCode = Popen(cmd, stdout=_stdout, stderr=_stderr, cwd = self.java_test_binary_dir + "/.build").wait() + except: + print "Unexpected error:", sys.exc_info()[0] return None else: @@ -777,9 +781,9 @@ class TestSuite(object): print >> _stderr, "Run command:", " ".join(cmd) try: - Popen(cmd, stdout=_stdout, stderr=_stderr, cwd = workingDir).wait() - except OSError: - pass + errorCode = Popen(cmd, stdout=_stdout, stderr=_stderr, cwd = workingDir).wait() + except: + print "Unexpected error:", sys.exc_info()[0] # clean temporary files if orig_temp_path: @@ -891,3 +895,7 @@ if __name__ == "__main__": if logs: print >> sys.stderr, "Collected: ", " ".join(logs) + + if errorCode != 0: + print "Error code: ", errorCode, (" (0x%x)" % (errorCode & 0xffffffff)) + exit(errorCode) diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index 80839d09fd..b4e92424d9 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -18,6 +18,9 @@ int64 TestBase::_timeadjustment = 0; static std::vector available_impls; static std::string param_impl; + +static enum PERF_STRATEGY param_strategy = PERF_STRATEGY_BASE; + static double param_max_outliers; static double param_max_deviation; static unsigned int param_min_samples; @@ -152,7 +155,7 @@ void Regression::init(const std::string& testSuitName, const std::string& ext) { if (!storageInPath.empty()) { - LOGE("Subsequent initialisation of Regression utility is not allowed."); + LOGE("Subsequent initialization of Regression utility is not allowed."); return; } @@ -598,6 +601,11 @@ Regression& Regression::operator() (const std::string& name, cv::InputArray arra * ::perf::performance_metrics \*****************************************************************************************/ performance_metrics::performance_metrics() +{ + clear(); +} + +void performance_metrics::clear() { bytesIn = 0; bytesOut = 0; @@ -643,6 +651,7 @@ void TestBase::Init(const std::vector & availableImpls, "|the implementation variant of functions under test}" "{ perf_list_impls |false |list available implementation variants and exit}" "{ perf_run_cpu |false |deprecated, equivalent to --perf_impl=plain}" + "{ perf_strategy |default |specifies performance measuring strategy: default, base or simple (weak restrictions)}" #ifdef ANDROID "{ perf_time_limit |6.0 |default time limit for a single test (in seconds)}" "{ perf_affinity_mask |0 |set affinity mask for the main thread}" @@ -668,6 +677,24 @@ void TestBase::Init(const std::vector & availableImpls, ::testing::AddGlobalTestEnvironment(new PerfEnvironment); param_impl = args.has("perf_run_cpu") ? "plain" : args.get("perf_impl"); + std::string perf_strategy = args.get("perf_strategy"); + if (perf_strategy == "default") + { + // nothing + } + else if (perf_strategy == "base") + { + param_strategy = PERF_STRATEGY_BASE; + } + else if (perf_strategy == "simple") + { + param_strategy = PERF_STRATEGY_SIMPLE; + } + else + { + printf("No such strategy: %s\n", perf_strategy.c_str()); + exit(1); + } param_max_outliers = std::min(100., std::max(0., args.get("perf_max_outliers"))); param_min_samples = std::max(1u, args.get("perf_min_samples")); param_max_deviation = std::max(0., args.get("perf_max_deviation")); @@ -761,6 +788,18 @@ std::string TestBase::getSelectedImpl() return param_impl; } +enum PERF_STRATEGY TestBase::getPerformanceStrategy() +{ + return param_strategy; +} + +enum PERF_STRATEGY TestBase::setPerformanceStrategy(enum PERF_STRATEGY strategy) +{ + enum PERF_STRATEGY ret = param_strategy; + param_strategy = strategy; + return ret; +} + int64 TestBase::_calibrate() { @@ -791,6 +830,11 @@ int64 TestBase::_calibrate() _helper h; h.PerfTestBody(); double compensation = h.getMetrics().min; + if (param_strategy == PERF_STRATEGY_SIMPLE) + { + CV_Assert(compensation < 0.01 * cv::getTickFrequency()); + compensation = 0.0f; // simple strategy doesn't require any compensation + } LOGD("Time compensation is %.0f", compensation); return (int64)compensation; } @@ -854,8 +898,64 @@ cv::Size TestBase::getSize(cv::InputArray a) bool TestBase::next() { - bool has_next = ++currentIter < nIters && totalTime < timeLimit; + static int64 lastActivityPrintTime = 0; + + if (currentIter != (unsigned int)-1) + { + if (currentIter + 1 != times.size()) + ADD_FAILURE() << " next() is called before stopTimer()"; + } + else + { + lastActivityPrintTime = 0; + metrics.clear(); + } + cv::theRNG().state = param_seed; //this rng should generate same numbers for each run + ++currentIter; + + bool has_next = false; + + do { + assert(currentIter == times.size()); + if (currentIter == 0) + { + has_next = true; + break; + } + + if (param_strategy == PERF_STRATEGY_BASE) + { + has_next = currentIter < nIters && totalTime < timeLimit; + } + else + { + assert(param_strategy == PERF_STRATEGY_SIMPLE); + if (totalTime - lastActivityPrintTime >= cv::getTickFrequency() * 10) + { + std::cout << '.' << std::endl; + lastActivityPrintTime = totalTime; + } + if (currentIter >= nIters) + { + has_next = false; + break; + } + if (currentIter < param_min_samples) + { + has_next = true; + break; + } + + calcMetrics(); + + double criteria = 0.03; // 3% + if (fabs(metrics.mean) > 1e-6) + has_next = metrics.stddev > criteria * fabs(metrics.mean); + else + has_next = true; + } + } while (false); #ifdef ANDROID if (log_power_checkpoints) @@ -868,6 +968,9 @@ bool TestBase::next() if (!has_next) RecordProperty("test_complete", cv::format("%llu",t1).c_str()); } #endif + + if (has_next) + startTimer(); // really we should measure activity from this moment, so reset start time return has_next; } @@ -914,7 +1017,7 @@ void TestBase::stopTimer() { int64 time = cv::getTickCount(); if (lastTime == 0) - ADD_FAILURE() << " stopTimer() is called before startTimer()"; + ADD_FAILURE() << " stopTimer() is called before startTimer()/next()"; lastTime = time - lastTime; totalTime += lastTime; lastTime -= _timeadjustment; @@ -925,6 +1028,7 @@ void TestBase::stopTimer() performance_metrics& TestBase::calcMetrics() { + CV_Assert(metrics.samples <= (unsigned int)currentIter); if ((metrics.samples == (unsigned int)currentIter) || times.size() == 0) return metrics; @@ -946,47 +1050,61 @@ performance_metrics& TestBase::calcMetrics() std::sort(times.begin(), times.end()); - //estimate mean and stddev for log(time) - double gmean = 0; - double gstddev = 0; - int n = 0; - for(TimeVector::const_iterator i = times.begin(); i != times.end(); ++i) - { - double x = static_cast(*i)/runsPerIteration; - if (x < DBL_EPSILON) continue; - double lx = log(x); + TimeVector::const_iterator start = times.begin(); + TimeVector::const_iterator end = times.end(); - ++n; - double delta = lx - gmean; - gmean += delta / n; - gstddev += delta * (lx - gmean); - } + if (param_strategy == PERF_STRATEGY_BASE) + { + //estimate mean and stddev for log(time) + double gmean = 0; + double gstddev = 0; + int n = 0; + for(TimeVector::const_iterator i = times.begin(); i != times.end(); ++i) + { + double x = static_cast(*i)/runsPerIteration; + if (x < DBL_EPSILON) continue; + double lx = log(x); - gstddev = n > 1 ? sqrt(gstddev / (n - 1)) : 0; + ++n; + double delta = lx - gmean; + gmean += delta / n; + gstddev += delta * (lx - gmean); + } - TimeVector::const_iterator start = times.begin(); - TimeVector::const_iterator end = times.end(); + gstddev = n > 1 ? sqrt(gstddev / (n - 1)) : 0; - //filter outliers assuming log-normal distribution - //http://stackoverflow.com/questions/1867426/modeling-distribution-of-performance-measurements - int offset = 0; - if (gstddev > DBL_EPSILON) + //filter outliers assuming log-normal distribution + //http://stackoverflow.com/questions/1867426/modeling-distribution-of-performance-measurements + if (gstddev > DBL_EPSILON) + { + double minout = exp(gmean - 3 * gstddev) * runsPerIteration; + double maxout = exp(gmean + 3 * gstddev) * runsPerIteration; + while(*start < minout) ++start, ++metrics.outliers; + do --end, ++metrics.outliers; while(*end > maxout); + ++end, --metrics.outliers; + } + } + else if (param_strategy == PERF_STRATEGY_SIMPLE) + { + metrics.outliers = static_cast(times.size() * param_max_outliers / 100); + for (unsigned int i = 0; i < metrics.outliers; i++) + --end; + } + else { - double minout = exp(gmean - 3 * gstddev) * runsPerIteration; - double maxout = exp(gmean + 3 * gstddev) * runsPerIteration; - while(*start < minout) ++start, ++metrics.outliers, ++offset; - do --end, ++metrics.outliers; while(*end > maxout); - ++end, --metrics.outliers; + assert(false); } + int offset = static_cast(start - times.begin()); + metrics.min = static_cast(*start)/runsPerIteration; //calc final metrics - n = 0; - gmean = 0; - gstddev = 0; + unsigned int n = 0; + double gmean = 0; + double gstddev = 0; double mean = 0; double stddev = 0; - int m = 0; + unsigned int m = 0; for(; start != end; ++start) { double x = static_cast(*start)/runsPerIteration; @@ -1008,11 +1126,10 @@ performance_metrics& TestBase::calcMetrics() metrics.gmean = exp(gmean); metrics.gstddev = m > 1 ? sqrt(gstddev / (m - 1)) : 0; metrics.stddev = n > 1 ? sqrt(stddev / (n - 1)) : 0; - metrics.median = n % 2 + metrics.median = (n % 2 ? (double)times[offset + n / 2] - : 0.5 * (times[offset + n / 2] + times[offset + n / 2 - 1]); - - metrics.median /= runsPerIteration; + : 0.5 * (times[offset + n / 2] + times[offset + n / 2 - 1]) + ) / runsPerIteration; return metrics; } @@ -1026,17 +1143,31 @@ void TestBase::validateMetrics() ASSERT_GE(m.samples, 1u) << " No time measurements was performed.\nstartTimer() and stopTimer() commands are required for performance tests."; - EXPECT_GE(m.samples, param_min_samples) - << " Only a few samples are collected.\nPlease increase number of iterations or/and time limit to get reliable performance measurements."; + if (param_strategy == PERF_STRATEGY_BASE) + { + EXPECT_GE(m.samples, param_min_samples) + << " Only a few samples are collected.\nPlease increase number of iterations or/and time limit to get reliable performance measurements."; + + if (m.gstddev > DBL_EPSILON) + { + EXPECT_GT(/*m.gmean * */1., /*m.gmean * */ 2 * sinh(m.gstddev * param_max_deviation)) + << " Test results are not reliable ((mean-sigma,mean+sigma) deviation interval is greater than measured time interval)."; + } - if (m.gstddev > DBL_EPSILON) + EXPECT_LE(m.outliers, std::max((unsigned int)cvCeil(m.samples * param_max_outliers / 100.), 1u)) + << " Test results are not reliable (too many outliers)."; + } + else if (param_strategy == PERF_STRATEGY_SIMPLE) { - EXPECT_GT(/*m.gmean * */1., /*m.gmean * */ 2 * sinh(m.gstddev * param_max_deviation)) - << " Test results are not reliable ((mean-sigma,mean+sigma) deviation interval is greater than measured time interval)."; + double mean = metrics.mean * 1000.0f / metrics.frequency; + double stddev = metrics.stddev * 1000.0f / metrics.frequency; + double percents = stddev / mean * 100.f; + printf(" samples = %d, mean = %.2f, stddev = %.2f (%.1f%%)\n", (int)metrics.samples, mean, stddev, percents); + } + else + { + assert(false); } - - EXPECT_LE(m.outliers, std::max((unsigned int)cvCeil(m.samples * param_max_outliers / 100.), 1u)) - << " Test results are not reliable (too many outliers)."; } void TestBase::reportMetrics(bool toJUnitXML) @@ -1199,12 +1330,12 @@ void TestBase::RunPerfTestBody() { this->PerfTestBody(); } - catch(PerfEarlyExitException) + catch(PerfEarlyExitException&) { metrics.terminationReason = performance_metrics::TERM_INTERRUPT; return;//no additional failure logging } - catch(cv::Exception e) + catch(cv::Exception& e) { metrics.terminationReason = performance_metrics::TERM_EXCEPTION; #ifdef HAVE_CUDA @@ -1213,7 +1344,7 @@ void TestBase::RunPerfTestBody() #endif FAIL() << "Expected: PerfTestBody() doesn't throw an exception.\n Actual: it throws cv::Exception:\n " << e.what(); } - catch(std::exception e) + catch(std::exception& e) { metrics.terminationReason = performance_metrics::TERM_EXCEPTION; FAIL() << "Expected: PerfTestBody() doesn't throw an exception.\n Actual: it throws std::exception:\n " << e.what(); @@ -1234,6 +1365,7 @@ TestBase::_declareHelper& TestBase::_declareHelper::iterations(unsigned int n) test->times.reserve(n); test->nIters = std::min(n, TestBase::iterationsLimitDefault); test->currentIter = (unsigned int)-1; + test->metrics.clear(); return *this; } @@ -1242,6 +1374,7 @@ TestBase::_declareHelper& TestBase::_declareHelper::time(double timeLimitSecs) test->times.clear(); test->currentIter = (unsigned int)-1; test->timeLimit = (int64)(timeLimitSecs * cv::getTickFrequency()); + test->metrics.clear(); return *this; } diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 799f34f100..48a419d75c 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -1,3 +1,10 @@ +# Detect if we want to build samples with library binaries or not +if(NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_LIST_DIR) +# +# BUILD CASE 1: Build samples with library sources +# + + # ---------------------------------------------------------------------------- # CMake file for samples. See root CMakeLists.txt # @@ -11,3 +18,51 @@ add_subdirectory(ocl) if(ANDROID AND BUILD_ANDROID_EXAMPLES) add_subdirectory(android) endif() + + +# +# END OF BUILD CASE 1: Build samples with library sources +# +else() +# +# BUILD CASE 2: Build samples with library binaries +# +cmake_minimum_required(VERSION 2.8) + +project(samples C CXX) +option(BUILD_EXAMPLES "Build samples" ON) + +find_package(OpenCV REQUIRED) + +if(MSVC) + add_definitions(-D_CRT_SECURE_NO_WARNINGS) + + if(NOT OpenCV_SHARED) + foreach(flag_var + CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif() + if(${flag_var} MATCHES "/MDd") + string(REGEX REPLACE "/MDd" "/MTd" ${flag_var} "${${flag_var}}") + endif() + endforeach(flag_var) + + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:msvcrt.lib /NODEFAULTLIB:msvcrtd.lib") + set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /NODEFAULTLIB:libcmt.lib") + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /NODEFAULTLIB:libcmtd.lib") + endif() +endif() + +add_subdirectory(c) +add_subdirectory(cpp) +add_subdirectory(ocl) +# FIXIT: can't use cvconfig.h in samples: add_subdirectory(gpu) + +# +# END OF BUILD CASE 2: Build samples with library binaries +# +endif() \ No newline at end of file diff --git a/samples/android/face-detection/jni/DetectionBasedTracker_jni.cpp b/samples/android/face-detection/jni/DetectionBasedTracker_jni.cpp index 33a8334c6a..cf91daa45c 100644 --- a/samples/android/face-detection/jni/DetectionBasedTracker_jni.cpp +++ b/samples/android/face-detection/jni/DetectionBasedTracker_jni.cpp @@ -100,7 +100,7 @@ JNIEXPORT jlong JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker { LOGD("nativeCreateObject caught unknown exception"); jclass je = jenv->FindClass("java/lang/Exception"); - jenv->ThrowNew(je, "Unknown exception in JNI code {Java_org_opencv_samples_facedetect_DetectionBasedTracker_nativeCreateObject(...)}"); + jenv->ThrowNew(je, "Unknown exception in JNI code of DetectionBasedTracker.nativeCreateObject()"); return 0; } @@ -133,7 +133,7 @@ JNIEXPORT void JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker_ { LOGD("nativeDestroyObject caught unknown exception"); jclass je = jenv->FindClass("java/lang/Exception"); - jenv->ThrowNew(je, "Unknown exception in JNI code {Java_org_opencv_samples_facedetect_DetectionBasedTracker_nativeDestroyObject(...)}"); + jenv->ThrowNew(je, "Unknown exception in JNI code of DetectionBasedTracker.nativeDestroyObject()"); } LOGD("Java_org_opencv_samples_facedetect_DetectionBasedTracker_nativeDestroyObject exit"); } @@ -159,7 +159,7 @@ JNIEXPORT void JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker_ { LOGD("nativeStart caught unknown exception"); jclass je = jenv->FindClass("java/lang/Exception"); - jenv->ThrowNew(je, "Unknown exception in JNI code {Java_org_opencv_samples_facedetect_DetectionBasedTracker_nativeStart(...)}"); + jenv->ThrowNew(je, "Unknown exception in JNI code of DetectionBasedTracker.nativeStart()"); } LOGD("Java_org_opencv_samples_facedetect_DetectionBasedTracker_nativeStart exit"); } @@ -185,7 +185,7 @@ JNIEXPORT void JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker_ { LOGD("nativeStop caught unknown exception"); jclass je = jenv->FindClass("java/lang/Exception"); - jenv->ThrowNew(je, "Unknown exception in JNI code {Java_org_opencv_samples_facedetect_DetectionBasedTracker_nativeStop(...)}"); + jenv->ThrowNew(je, "Unknown exception in JNI code of DetectionBasedTracker.nativeStop()"); } LOGD("Java_org_opencv_samples_facedetect_DetectionBasedTracker_nativeStop exit"); } @@ -215,7 +215,7 @@ JNIEXPORT void JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker_ { LOGD("nativeSetFaceSize caught unknown exception"); jclass je = jenv->FindClass("java/lang/Exception"); - jenv->ThrowNew(je, "Unknown exception in JNI code {Java_org_opencv_samples_facedetect_DetectionBasedTracker_nativeSetFaceSize(...)}"); + jenv->ThrowNew(je, "Unknown exception in JNI code of DetectionBasedTracker.nativeSetFaceSize()"); } LOGD("Java_org_opencv_samples_facedetect_DetectionBasedTracker_nativeSetFaceSize -- END"); } @@ -245,7 +245,7 @@ JNIEXPORT void JNICALL Java_org_opencv_samples_facedetect_DetectionBasedTracker_ { LOGD("nativeDetect caught unknown exception"); jclass je = jenv->FindClass("java/lang/Exception"); - jenv->ThrowNew(je, "Unknown exception in JNI code {Java_org_opencv_samples_facedetect_DetectionBasedTracker_nativeDetect(...)}"); + jenv->ThrowNew(je, "Unknown exception in JNI code DetectionBasedTracker.nativeDetect()"); } LOGD("Java_org_opencv_samples_facedetect_DetectionBasedTracker_nativeDetect END"); } diff --git a/samples/c/CMakeLists.txt b/samples/c/CMakeLists.txt index 7ea20b9b2f..77a42949d0 100644 --- a/samples/c/CMakeLists.txt +++ b/samples/c/CMakeLists.txt @@ -39,7 +39,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG") endif() install(TARGETS ${the_target} - RUNTIME DESTINATION "samples/c" COMPONENT main) + RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/c" COMPONENT main) endif() ENDMACRO() diff --git a/samples/c/adaptiveskindetector.cpp b/samples/c/adaptiveskindetector.cpp index f94c7a871c..21c9ffe9b2 100644 --- a/samples/c/adaptiveskindetector.cpp +++ b/samples/c/adaptiveskindetector.cpp @@ -34,7 +34,6 @@ // //M*/ - #include #include #include @@ -42,6 +41,10 @@ #include "opencv2/contrib/compat.hpp" #include "opencv2/highgui/highgui_c.h" +#ifndef _CRT_SECURE_NO_WARNINGS +# define _CRT_SECURE_NO_WARNINGS +#endif + static void help(char **argv) { std::cout << "\nThis program demonstrates the contributed flesh detector CvAdaptiveSkinDetector which can be found in contrib.cpp\n" diff --git a/samples/c/blobtrack_sample.cpp b/samples/c/blobtrack_sample.cpp index 55f72eb209..90f670d06e 100644 --- a/samples/c/blobtrack_sample.cpp +++ b/samples/c/blobtrack_sample.cpp @@ -8,11 +8,13 @@ /* Select appropriate case insensitive string comparison function: */ #if defined WIN32 || defined _MSC_VER - #define MY_STRNICMP strnicmp - #define MY_STRICMP stricmp +# define MY_STRNICMP _strnicmp +# define MY_STRICMP _stricmp +# define MY_STRDUP _strdup #else - #define MY_STRNICMP strncasecmp - #define MY_STRICMP strcasecmp +# define MY_STRNICMP strncasecmp +# define MY_STRICMP strcasecmp +# define MY_STRDUP strdup #endif /* List of foreground (FG) DETECTION modules: */ @@ -239,7 +241,7 @@ static int RunBlobTrackingAuto( CvCapture* pCap, CvBlobTrackerAuto* pTracker,cha if(pS) { - char* pStr = strdup(pS); + char* pStr = MY_STRDUP(pS); char* pStrFree = pStr; while (pStr && strlen(pStr) > 0) diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt index c3477e3fb7..2aa71f047c 100644 --- a/samples/cpp/CMakeLists.txt +++ b/samples/cpp/CMakeLists.txt @@ -41,11 +41,11 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) if("${srcs}" MATCHES "tutorial_code") set(sample_kind tutorial) set(sample_KIND TUTORIAL) - set(sample_folder "samples//tutorials") + set(sample_subfolder "tutorials") else() set(sample_kind example) set(sample_KIND EXAMPLE) - set(sample_folder "samples//cpp") + set(sample_subfolder "cpp") endif() set(the_target "${sample_kind}_${name}") @@ -61,7 +61,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) PROJECT_LABEL "(${sample_KIND}) ${name}") if(ENABLE_SOLUTION_FOLDERS) - set_target_properties(${the_target} PROPERTIES FOLDER "${sample_folder}") + set_target_properties(${the_target} PROPERTIES FOLDER "samples/${sample_subfolder}") endif() if(WIN32) @@ -69,7 +69,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG") endif() install(TARGETS ${the_target} - RUNTIME DESTINATION "${sample_folder}" COMPONENT main) + RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/${sample_subfolder}" COMPONENT main) endif() ENDMACRO() diff --git a/samples/cpp/OpenEXRimages_HDR_Retina_toneMapping_video.cpp b/samples/cpp/OpenEXRimages_HDR_Retina_toneMapping_video.cpp index 9890feee97..a81e12170c 100644 --- a/samples/cpp/OpenEXRimages_HDR_Retina_toneMapping_video.cpp +++ b/samples/cpp/OpenEXRimages_HDR_Retina_toneMapping_video.cpp @@ -18,6 +18,10 @@ #include "opencv2/imgproc.hpp" // cvCvtcolor function #include "opencv2/highgui.hpp" // display +#ifndef _CRT_SECURE_NO_WARNINGS +# define _CRT_SECURE_NO_WARNINGS +#endif + static void help(std::string errorMessage) { std::cout<<"Program init error : "< #include +#ifndef _CRT_SECURE_NO_WARNINGS +# define _CRT_SECURE_NO_WARNINGS +#endif + using namespace cv; using namespace std; diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt index 7aaaf6fb5c..2591d329dc 100644 --- a/samples/gpu/CMakeLists.txt +++ b/samples/gpu/CMakeLists.txt @@ -76,7 +76,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) if(MSVC AND NOT BUILD_SHARED_LIBS) set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG") endif() - install(TARGETS ${the_target} RUNTIME DESTINATION "samples/${project}" COMPONENT main) + install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/${project}" COMPONENT main) endif() ENDMACRO() diff --git a/samples/gpu/performance/CMakeLists.txt b/samples/gpu/performance/CMakeLists.txt index 28409c0258..22657b56e9 100644 --- a/samples/gpu/performance/CMakeLists.txt +++ b/samples/gpu/performance/CMakeLists.txt @@ -23,7 +23,7 @@ if(ENABLE_SOLUTION_FOLDERS) endif() if(WIN32) - install(TARGETS ${the_target} RUNTIME DESTINATION "samples/gpu" COMPONENT main) + install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/gpu" COMPONENT main) endif() if(INSTALL_C_EXAMPLES AND NOT WIN32) diff --git a/samples/ocl/CMakeLists.txt b/samples/ocl/CMakeLists.txt index 9b04dc3976..8db77d52c8 100644 --- a/samples/ocl/CMakeLists.txt +++ b/samples/ocl/CMakeLists.txt @@ -38,7 +38,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) if(MSVC AND NOT BUILD_SHARED_LIBS) set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG") endif() - install(TARGETS ${the_target} RUNTIME DESTINATION "samples/${project}" COMPONENT main) + install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/${project}" COMPONENT main) endif() ENDMACRO()