diff --git a/CMakeLists.txt b/CMakeLists.txt index e6639a962e..03bf4f3c08 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,7 +60,7 @@ endif() project(OpenCV CXX C) -include(cmake/OpenCVUtils.cmake REQUIRED) +include(cmake/OpenCVUtils.cmake) # ---------------------------------------------------------------------------- # Break in case of popular CMake configuration mistakes @@ -73,7 +73,7 @@ endif() # ---------------------------------------------------------------------------- # Detect compiler and target platform architecture # ---------------------------------------------------------------------------- -include(cmake/OpenCVDetectCXXCompiler.cmake REQUIRED) +include(cmake/OpenCVDetectCXXCompiler.cmake) # Add these standard paths to the search paths for FIND_LIBRARY # to find libraries from these locations first @@ -159,7 +159,6 @@ OCV_OPTION(BUILD_FAT_JAVA_LIB "Create fat java wrapper containing the whol OCV_OPTION(BUILD_ANDROID_SERVICE "Build OpenCV Manager for Google Play" OFF IF ANDROID AND ANDROID_USE_STLPORT AND ANDROID_SOURCE_TREE ) OCV_OPTION(BUILD_ANDROID_PACKAGE "Build platform-specific package for Google Play" OFF IF ANDROID ) - # 3rd party libs OCV_OPTION(BUILD_ZLIB "Build zlib from source" WIN32 OR APPLE ) OCV_OPTION(BUILD_TIFF "Build libtiff from source" WIN32 OR ANDROID OR APPLE ) @@ -168,24 +167,19 @@ OCV_OPTION(BUILD_JPEG "Build libjpeg from source" WIN32 OR AN OCV_OPTION(BUILD_PNG "Build libpng from source" WIN32 OR ANDROID OR APPLE ) OCV_OPTION(BUILD_OPENEXR "Build openexr from source" WIN32 OR ANDROID OR APPLE ) + # OpenCV installation options # =================================================== OCV_OPTION(INSTALL_C_EXAMPLES "Install C examples" OFF ) OCV_OPTION(INSTALL_PYTHON_EXAMPLES "Install Python examples" OFF ) -OCV_OPTION(INSTALL_ANDROID_EXAMPLES "Install Android examples" OFF IF ANDROID ) +OCV_OPTION(INSTALL_ANDROID_EXAMPLES "Install Android examples" OFF IF ANDROID ) OCV_OPTION(INSTALL_TO_MANGLED_PATHS "Enables mangled install paths, that help with side by side installs." OFF IF (UNIX AND NOT ANDROID AND NOT IOS AND BUILD_SHARED_LIBS) ) -if(MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode) - set(ENABLE_SOLUTION_FOLDERS0 ON) -else() - set(ENABLE_SOLUTION_FOLDERS0 OFF) -endif() - # OpenCV build options # =================================================== OCV_OPTION(ENABLE_PRECOMPILED_HEADERS "Use precompiled headers" ON IF (NOT IOS) ) -OCV_OPTION(ENABLE_SOLUTION_FOLDERS "Solution folder in Visual Studio or in other IDEs" ${ENABLE_SOLUTION_FOLDERS0} IF (CMAKE_VERSION VERSION_GREATER "2.8.0") ) +OCV_OPTION(ENABLE_SOLUTION_FOLDERS "Solution folder in Visual Studio or in other IDEs" (MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode) IF (CMAKE_VERSION VERSION_GREATER "2.8.0") ) OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add flags: -g -pg)" OFF IF CMAKE_COMPILER_IS_GNUCXX ) OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX ) OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) ) @@ -196,6 +190,7 @@ OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) ) +OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF ) OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF ) @@ -212,7 +207,7 @@ include(cmake/OpenCVLegacyOptions.cmake OPTIONAL) # ---------------------------------------------------------------------------- # Get actual OpenCV version number from sources # ---------------------------------------------------------------------------- -include(cmake/OpenCVVersion.cmake REQUIRED) +include(cmake/OpenCVVersion.cmake) # ---------------------------------------------------------------------------- @@ -223,33 +218,33 @@ include(cmake/OpenCVVersion.cmake REQUIRED) set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}/bin" CACHE PATH "Output directory for applications" ) if(ANDROID OR WIN32) - set(OPENCV_DOC_INSTALL_PATH doc) + set(OPENCV_DOC_INSTALL_PATH doc) elseif(INSTALL_TO_MANGLED_PATHS) - set(OPENCV_DOC_INSTALL_PATH share/OpenCV-${OPENCV_VERSION}/doc) + set(OPENCV_DOC_INSTALL_PATH share/OpenCV-${OPENCV_VERSION}/doc) else() - set(OPENCV_DOC_INSTALL_PATH share/OpenCV/doc) + set(OPENCV_DOC_INSTALL_PATH share/OpenCV/doc) endif() if(ANDROID) - set(LIBRARY_OUTPUT_PATH "${OpenCV_BINARY_DIR}/lib/${ANDROID_NDK_ABI_NAME}") - set(3P_LIBRARY_OUTPUT_PATH "${OpenCV_BINARY_DIR}/3rdparty/lib/${ANDROID_NDK_ABI_NAME}") - set(OPENCV_LIB_INSTALL_PATH sdk/native/libs/${ANDROID_NDK_ABI_NAME}) - set(OPENCV_3P_LIB_INSTALL_PATH sdk/native/3rdparty/libs/${ANDROID_NDK_ABI_NAME}) - set(OPENCV_CONFIG_INSTALL_PATH sdk/native/jni) - set(OPENCV_INCLUDE_INSTALL_PATH sdk/native/jni/include) + set(LIBRARY_OUTPUT_PATH "${OpenCV_BINARY_DIR}/lib/${ANDROID_NDK_ABI_NAME}") + set(3P_LIBRARY_OUTPUT_PATH "${OpenCV_BINARY_DIR}/3rdparty/lib/${ANDROID_NDK_ABI_NAME}") + set(OPENCV_LIB_INSTALL_PATH sdk/native/libs/${ANDROID_NDK_ABI_NAME}) + set(OPENCV_3P_LIB_INSTALL_PATH sdk/native/3rdparty/libs/${ANDROID_NDK_ABI_NAME}) + set(OPENCV_CONFIG_INSTALL_PATH sdk/native/jni) + set(OPENCV_INCLUDE_INSTALL_PATH sdk/native/jni/include) else() - set(LIBRARY_OUTPUT_PATH "${OpenCV_BINARY_DIR}/lib") - set(3P_LIBRARY_OUTPUT_PATH "${OpenCV_BINARY_DIR}/3rdparty/lib${LIB_SUFFIX}") - set(OPENCV_LIB_INSTALL_PATH lib${LIB_SUFFIX}) - set(OPENCV_3P_LIB_INSTALL_PATH share/OpenCV/3rdparty/${OPENCV_LIB_INSTALL_PATH}) - set(OPENCV_INCLUDE_INSTALL_PATH include) - - math(EXPR SIZEOF_VOID_P_BITS "8 * ${CMAKE_SIZEOF_VOID_P}") - if(LIB_SUFFIX AND NOT SIZEOF_VOID_P_BITS EQUAL LIB_SUFFIX) - set(OPENCV_CONFIG_INSTALL_PATH lib${LIB_SUFFIX}/cmake/opencv) - else() - set(OPENCV_CONFIG_INSTALL_PATH share/OpenCV) - endif() + set(LIBRARY_OUTPUT_PATH "${OpenCV_BINARY_DIR}/lib") + set(3P_LIBRARY_OUTPUT_PATH "${OpenCV_BINARY_DIR}/3rdparty/lib${LIB_SUFFIX}") + set(OPENCV_LIB_INSTALL_PATH lib${LIB_SUFFIX}) + set(OPENCV_3P_LIB_INSTALL_PATH share/OpenCV/3rdparty/${OPENCV_LIB_INSTALL_PATH}) + set(OPENCV_INCLUDE_INSTALL_PATH include) + + math(EXPR SIZEOF_VOID_P_BITS "8 * ${CMAKE_SIZEOF_VOID_P}") + if(LIB_SUFFIX AND NOT SIZEOF_VOID_P_BITS EQUAL LIB_SUFFIX) + set(OPENCV_CONFIG_INSTALL_PATH lib${LIB_SUFFIX}/cmake/opencv) + else() + set(OPENCV_CONFIG_INSTALL_PATH share/OpenCV) + endif() endif() set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${OPENCV_LIB_INSTALL_PATH}") @@ -329,7 +324,7 @@ if(CMAKE_GENERATOR MATCHES "Makefiles|Ninja" AND "${CMAKE_BUILD_TYPE}" STREQUAL set(CMAKE_BUILD_TYPE Release) endif() -include(cmake/OpenCVCompilerOptions.cmake REQUIRED) +include(cmake/OpenCVCompilerOptions.cmake) # ---------------------------------------------------------------------------- @@ -337,7 +332,7 @@ include(cmake/OpenCVCompilerOptions.cmake REQUIRED) # Default: dynamic # ---------------------------------------------------------------------------- if(MSVC) - include(cmake/OpenCVCRTLinkage.cmake REQUIRED) + include(cmake/OpenCVCRTLinkage.cmake) endif(MSVC) if(WIN32 AND NOT MINGW) @@ -370,17 +365,17 @@ if(UNIX) endif() endif() -include(cmake/OpenCVPCHSupport.cmake REQUIRED) -include(cmake/OpenCVModule.cmake REQUIRED) +include(cmake/OpenCVPCHSupport.cmake) +include(cmake/OpenCVModule.cmake) # ---------------------------------------------------------------------------- # Detect 3rd-party libraries # ---------------------------------------------------------------------------- -include(cmake/OpenCVFindLibsGrfmt.cmake REQUIRED) -include(cmake/OpenCVFindLibsGUI.cmake REQUIRED) -include(cmake/OpenCVFindLibsVideo.cmake REQUIRED) -include(cmake/OpenCVFindLibsPerf.cmake REQUIRED) +include(cmake/OpenCVFindLibsGrfmt.cmake) +include(cmake/OpenCVFindLibsGUI.cmake) +include(cmake/OpenCVFindLibsVideo.cmake) +include(cmake/OpenCVFindLibsPerf.cmake) # ---------------------------------------------------------------------------- @@ -389,21 +384,23 @@ include(cmake/OpenCVFindLibsPerf.cmake REQUIRED) # --- LATEX for pdf documentation --- if(BUILD_DOCS) - include(cmake/OpenCVFindLATEX.cmake REQUIRED) + include(cmake/OpenCVFindLATEX.cmake) endif(BUILD_DOCS) # --- Python Support --- -include(cmake/OpenCVDetectPython.cmake REQUIRED) +include(cmake/OpenCVDetectPython.cmake) # --- Java Support --- +include(cmake/OpenCVDetectApacheAnt.cmake) if(ANDROID) - include(cmake/OpenCVDetectApacheAnt.cmake REQUIRED) - include(cmake/OpenCVDetectAndroidSDK.cmake REQUIRED) + include(cmake/OpenCVDetectAndroidSDK.cmake) if(NOT ANDROID_TOOLS_Pkg_Revision GREATER 13) message(WARNING "OpenCV requires Android SDK tools revision 14 or newer. Otherwise tests and samples will no be compiled.") endif() -endif(ANDROID) +elseif(ANT_EXECUTABLE) + find_package(JNI) +endif() if(ANDROID AND ANDROID_EXECUTABLE AND ANT_EXECUTABLE AND (ANT_VERSION VERSION_GREATER 1.7) AND (ANDROID_TOOLS_Pkg_Revision GREATER 13)) SET(CAN_BUILD_ANDROID_PROJECTS TRUE) @@ -413,7 +410,7 @@ endif() # --- OpenCL --- if(WITH_OPENCL) - include(cmake/OpenCVDetectOpenCL.cmake REQUIRED) + include(cmake/OpenCVDetectOpenCL.cmake) if(OPENCL_FOUND) set(HAVE_OPENCL 1) endif() @@ -434,7 +431,7 @@ if(ENABLE_SOLUTION_FOLDERS) endif() # Extra OpenCV targets: uninstall, package_source, perf, etc. -include(cmake/OpenCVExtraTargets.cmake REQUIRED) +include(cmake/OpenCVExtraTargets.cmake) # ---------------------------------------------------------------------------- @@ -475,16 +472,16 @@ endif() ocv_track_build_dependencies() # Generate platform-dependent and configuration-dependent headers -include(cmake/OpenCVGenHeaders.cmake REQUIRED) +include(cmake/OpenCVGenHeaders.cmake) # Generate opencv.pc for pkg-config command -include(cmake/OpenCVGenPkgconfig.cmake REQUIRED) +include(cmake/OpenCVGenPkgconfig.cmake) # Generate OpenCV.mk for ndk-build (Android build tool) -include(cmake/OpenCVGenAndroidMK.cmake REQUIRED) +include(cmake/OpenCVGenAndroidMK.cmake) # Generate OpenCVŠ”onfig.cmake and OpenCVConfig-version.cmake for cmake projects -include(cmake/OpenCVGenConfig.cmake REQUIRED) +include(cmake/OpenCVGenConfig.cmake) # ---------------------------------------------------------------------------- @@ -760,7 +757,7 @@ status(" Use Clp:" HAVE_CLP THEN YES ELSE NO) if(HAVE_CUDA) status("") - status(" NVIDIA CUDA:" "(ver ${CUDA_VERSION_STRING})") + status(" NVIDIA CUDA") status(" Use CUFFT:" HAVE_CUFFT THEN YES ELSE NO) status(" Use CUBLAS:" HAVE_CUBLAS THEN YES ELSE NO) @@ -774,9 +771,9 @@ status(" Python:") status(" Interpreter:" PYTHON_EXECUTABLE THEN "${PYTHON_EXECUTABLE} (ver ${PYTHON_VERSION_FULL})" ELSE NO) if(BUILD_opencv_python) if(PYTHONLIBS_VERSION_STRING) - status(" Libraries:" HAVE_opencv_python THEN "${PYTHON_LIBRARIES} (ver ${PYTHONLIBS_VERSION_STRING})" ELSE NO) + status(" Libraries:" HAVE_opencv_python THEN "${PYTHON_LIBRARIES} (ver ${PYTHONLIBS_VERSION_STRING})" ELSE NO) else() - status(" Libraries:" HAVE_opencv_python THEN ${PYTHON_LIBRARIES} ELSE NO) + status(" Libraries:" HAVE_opencv_python THEN ${PYTHON_LIBRARIES} ELSE NO) endif() status(" numpy:" PYTHON_USE_NUMPY THEN "${PYTHON_NUMPY_INCLUDE_DIR} (ver ${PYTHON_NUMPY_VERSION})" ELSE "NO (Python wrappers can not be generated)") status(" packages path:" PYTHON_EXECUTABLE THEN "${PYTHON_PACKAGES_PATH}" ELSE "-") diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 1913527451..c2e45b30d8 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -124,35 +124,33 @@ if(CMAKE_COMPILER_IS_GNUCXX) # SSE3 and further should be disabled under MingW because it generates compiler errors if(NOT MINGW) - if(ENABLE_SSE3) - add_extra_compiler_option(-msse3) + if(ENABLE_AVX) + add_extra_compiler_option(-mavx) endif() - if(${CMAKE_OPENCV_GCC_VERSION_NUM} GREATER 402) - set(HAVE_GCC43_OR_NEWER 1) - endif() - if(${CMAKE_OPENCV_GCC_VERSION_NUM} GREATER 401) - set(HAVE_GCC42_OR_NEWER 1) - endif() + # GCC depresses SSEx instructions when -mavx is used. Instead, it generates new AVX instructions or AVX equivalence for all SSEx instructions when needed. + if(NOT OPENCV_EXTRA_CXX_FLAGS MATCHES "-mavx") + if(ENABLE_SSE3) + add_extra_compiler_option(-msse3) + endif() - if(HAVE_GCC42_OR_NEWER OR APPLE) if(ENABLE_SSSE3) add_extra_compiler_option(-mssse3) endif() - if(HAVE_GCC43_OR_NEWER OR APPLE) - if(ENABLE_SSE41) - add_extra_compiler_option(-msse4.1) - endif() - if(ENABLE_SSE42) - add_extra_compiler_option(-msse4.2) - endif() + + if(ENABLE_SSE41) + add_extra_compiler_option(-msse4.1) + endif() + + if(ENABLE_SSE42) + add_extra_compiler_option(-msse4.2) endif() endif() endif(NOT MINGW) if(X86 OR X86_64) if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4) - if(ENABLE_SSE2) + if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)") add_extra_compiler_option(-mfpmath=sse)# !! important - be on the same wave with x64 compilers else() add_extra_compiler_option(-mfpmath=387) @@ -201,30 +199,35 @@ if(MSVC) set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi") endif() - if(NOT MSVC64) - # 64-bit MSVC compiler uses SSE/SSE2 by default - if(ENABLE_SSE) - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE") - endif() - if(ENABLE_SSE2) - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE2") - endif() + if(ENABLE_AVX AND NOT MSVC_VERSION LESS 1600) + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX") endif() - if(ENABLE_SSE3) + if(ENABLE_SSE4_1 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE4.1") + endif() + + if(ENABLE_SSE3 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE3") endif() - if(ENABLE_SSE4_1) - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE4.1") + + if(NOT MSVC64) + # 64-bit MSVC compiler uses SSE/SSE2 by default + if(ENABLE_SSE2 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE2") + endif() + if(ENABLE_SSE AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE") + endif() endif() - if(ENABLE_SSE OR ENABLE_SSE2 OR ENABLE_SSE3 OR ENABLE_SSE4_1) + if(ENABLE_SSE OR ENABLE_SSE2 OR ENABLE_SSE3 OR ENABLE_SSE4_1 OR ENABLE_AVX) set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi") endif() if(X86 OR X86_64) if(CMAKE_SIZEOF_VOID_P EQUAL 4 AND ENABLE_SSE2) - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast")# !! important - be on the same wave with x64 compilers + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers endif() endif() endif() @@ -238,34 +241,34 @@ endif() # Add user supplied extra options (optimization, etc...) # ========================================================== -set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS}" CACHE INTERNAL "Extra compiler options") -set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS}" CACHE INTERNAL "Extra compiler options for C sources") -set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS}" CACHE INTERNAL "Extra compiler options for C++ sources") +set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS}" CACHE INTERNAL "Extra compiler options") +set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS}" CACHE INTERNAL "Extra compiler options for C sources") +set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS}" CACHE INTERNAL "Extra compiler options for C++ sources") set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE}" CACHE INTERNAL "Extra compiler options for Release build") -set(OPENCV_EXTRA_FLAGS_DEBUG "${OPENCV_EXTRA_FLAGS_DEBUG}" CACHE INTERNAL "Extra compiler options for Debug build") -set(OPENCV_EXTRA_EXE_LINKER_FLAGS "${OPENCV_EXTRA_EXE_LINKER_FLAGS}" CACHE INTERNAL "Extra linker flags") +set(OPENCV_EXTRA_FLAGS_DEBUG "${OPENCV_EXTRA_FLAGS_DEBUG}" CACHE INTERNAL "Extra compiler options for Debug build") +set(OPENCV_EXTRA_EXE_LINKER_FLAGS "${OPENCV_EXTRA_EXE_LINKER_FLAGS}" CACHE INTERNAL "Extra linker flags") set(OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE "${OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE}" CACHE INTERNAL "Extra linker flags for Release build") -set(OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG "${OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG}" CACHE INTERNAL "Extra linker flags for Debug build") +set(OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG "${OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG}" CACHE INTERNAL "Extra linker flags for Debug build") #combine all "extra" options -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${OPENCV_EXTRA_FLAGS_RELEASE}") -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${OPENCV_EXTRA_FLAGS_RELEASE}") -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${OPENCV_EXTRA_FLAGS_DEBUG}") -set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${OPENCV_EXTRA_FLAGS_DEBUG}") -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENCV_EXTRA_EXE_LINKER_FLAGS}") +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${OPENCV_EXTRA_FLAGS_RELEASE}") +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${OPENCV_EXTRA_FLAGS_DEBUG}") +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${OPENCV_EXTRA_FLAGS_DEBUG}") +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENCV_EXTRA_EXE_LINKER_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} ${OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE}") -set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} ${OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG}") +set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} ${OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG}") if(MSVC) # avoid warnings from MSVC about overriding the /W* option # we replace /W3 with /W4 only for C++ files, # since all the 3rd-party libraries OpenCV uses are in C, # and we do not care about their warnings. - string(REPLACE "/W3" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "/W3" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "/W3" "/W4" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") - string(REPLACE "/W3" "/W4" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") + string(REPLACE "/W3" "/W4" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") if(NOT ENABLE_NOISY_WARNINGS AND MSVC_VERSION EQUAL 1400) ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4510 /wd4610 /wd4312 /wd4201 /wd4244 /wd4328 /wd4267) @@ -274,7 +277,7 @@ if(MSVC) # allow extern "C" functions throw exceptions foreach(flags CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_RELEASE CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) string(REPLACE "/EHsc-" "/EHs" ${flags} "${${flags}}") - string(REPLACE "/EHsc" "/EHs" ${flags} "${${flags}}") + string(REPLACE "/EHsc" "/EHs" ${flags} "${${flags}}") string(REPLACE "/Zm1000" "" ${flags} "${${flags}}") endforeach() diff --git a/cmake/OpenCVDetectCXXCompiler.cmake b/cmake/OpenCVDetectCXXCompiler.cmake index 5ce90ce6f1..65edf72ede 100644 --- a/cmake/OpenCVDetectCXXCompiler.cmake +++ b/cmake/OpenCVDetectCXXCompiler.cmake @@ -89,8 +89,8 @@ if(CMAKE_COMPILER_IS_GNUCXX) endif() endif() -if(CMAKE_SYSTEM_PROCESSOR MATCHES amd64.*|x86_64.* OR CMAKE_GENERATOR MATCHES "Visual Studio.*Win64") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*" OR CMAKE_GENERATOR MATCHES "Visual Studio.*Win64") set(X86_64 1) -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES i686.*|i386.*|x86.*) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*") set(X86 1) endif() diff --git a/cmake/OpenCVFindLibsPerf.cmake b/cmake/OpenCVFindLibsPerf.cmake index 5ab16ada2d..a43b30d1ea 100644 --- a/cmake/OpenCVFindLibsPerf.cmake +++ b/cmake/OpenCVFindLibsPerf.cmake @@ -4,7 +4,7 @@ # --- TBB --- if(WITH_TBB) - include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectTBB.cmake" REQUIRED) + include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectTBB.cmake") endif(WITH_TBB) # --- IPP --- @@ -21,7 +21,7 @@ endif(WITH_IPP) # --- CUDA --- if(WITH_CUDA) - include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectCUDA.cmake" REQUIRED) + include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectCUDA.cmake") endif(WITH_CUDA) # --- Eigen --- diff --git a/cmake/OpenCVFindLibsVideo.cmake b/cmake/OpenCVFindLibsVideo.cmake index 91fb6a7eca..5eeff178f1 100644 --- a/cmake/OpenCVFindLibsVideo.cmake +++ b/cmake/OpenCVFindLibsVideo.cmake @@ -98,7 +98,7 @@ endif(WITH_XIMEA) ocv_clear_vars(HAVE_FFMPEG HAVE_FFMPEG_CODEC HAVE_FFMPEG_FORMAT HAVE_FFMPEG_UTIL HAVE_FFMPEG_SWSCALE HAVE_GENTOO_FFMPEG HAVE_FFMPEG_FFMPEG) if(WITH_FFMPEG) if(WIN32) - include("${OpenCV_SOURCE_DIR}/3rdparty/ffmpeg/ffmpeg_version.cmake" REQUIRED) + include("${OpenCV_SOURCE_DIR}/3rdparty/ffmpeg/ffmpeg_version.cmake") elseif(UNIX) CHECK_MODULE(libavcodec HAVE_FFMPEG_CODEC) CHECK_MODULE(libavformat HAVE_FFMPEG_FORMAT) diff --git a/modules/core/include/opencv2/core/internal.hpp b/modules/core/include/opencv2/core/internal.hpp index ee4053aafb..413e50a738 100644 --- a/modules/core/include/opencv2/core/internal.hpp +++ b/modules/core/include/opencv2/core/internal.hpp @@ -104,69 +104,72 @@ CV_INLINE IppiSize ippiSize(int width, int height) } #endif -#if defined __SSE2__ || (defined _MSC_VER && _MSC_VER >= 1300) +#ifndef IPPI_CALL +# define IPPI_CALL(func) CV_Assert((func) >= 0) +#endif + +#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) # include "emmintrin.h" # define CV_SSE 1 # define CV_SSE2 1 # if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) # include "pmmintrin.h" # define CV_SSE3 1 -# else -# define CV_SSE3 0 # endif -# if defined __SSSE3__ +# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) # include "tmmintrin.h" # define CV_SSSE3 1 -# else -# define CV_SSSE3 0 # endif # if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include -# define CV_SSE4_1 1 -# else -# define CV_SSE4_1 0 +# include +# define CV_SSE4_1 1 # endif # if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500) -# include +# include # define CV_SSE4_2 1 -# else -# define CV_SSE4_2 0 # endif # if defined __AVX__ || (defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219) -# include +// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX +// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32 +# include # define CV_AVX 1 # if defined(_XCR_XFEATURE_ENABLED_MASK) # define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) # else # define __xgetbv() 0 # endif -# else -# define CV_AVX 0 # endif -# else +#endif + +#ifdef __ARM_NEON__ +# include +# define CV_NEON 1 +# define CPU_HAS_NEON_FEATURE (true) +#endif + +#ifndef CV_SSE # define CV_SSE 0 +#endif +#ifndef CV_SSE2 # define CV_SSE2 0 +#endif +#ifndef CV_SSE3 # define CV_SSE3 0 +#endif +#ifndef CV_SSSE3 # define CV_SSSE3 0 +#endif +#ifndef CV_SSE4_1 # define CV_SSE4_1 0 +#endif +#ifndef CV_SSE4_2 # define CV_SSE4_2 0 +#endif +#ifndef CV_AVX # define CV_AVX 0 -# endif - -#if defined ANDROID && defined __ARM_NEON__ -# include "arm_neon.h" -# define CV_NEON 1 - -# define CPU_HAS_NEON_FEATURE (true) -//TODO: make real check using stuff from "cpu-features.h" -//((bool)android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) -#else -# define CV_NEON 0 -# define CPU_HAS_NEON_FEATURE (false) #endif - -#ifndef IPPI_CALL -# define IPPI_CALL(func) CV_Assert((func) >= 0) +#ifndef CV_NEON +# define CV_NEON 0 #endif #ifdef HAVE_TBB diff --git a/modules/core/test/test_operations.cpp b/modules/core/test/test_operations.cpp index a5d91a402f..77d05fe4e1 100644 --- a/modules/core/test/test_operations.cpp +++ b/modules/core/test/test_operations.cpp @@ -774,7 +774,7 @@ bool CV_OperationsTest::TestTemplateMat() Mat m2 = Mat::zeros(10, 10, CV_8UC3); m1.copyTo(m2.row(1)); } - catch(const Exception& e) + catch(const Exception&) { badarg_catched = true; } diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index 4c1190a011..04f7730dc8 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -1739,7 +1739,7 @@ PERF_TEST_P(Method_Sz, ImgProc_GeneralizedHough, Combine( for (int i = 0; i < objCount; ++i) { double scale = rng.uniform(0.7, 1.3); - bool rotate = rng.uniform(0, 2); + bool rotate = 1 == rng.uniform(0, 2); cv::Mat obj; cv::resize(templ, obj, cv::Size(), scale, scale); diff --git a/modules/imgproc/test/test_imgwarp_strict.cpp b/modules/imgproc/test/test_imgwarp_strict.cpp index a8142095b1..6a110fe1f8 100644 --- a/modules/imgproc/test/test_imgwarp_strict.cpp +++ b/modules/imgproc/test/test_imgwarp_strict.cpp @@ -433,7 +433,7 @@ void CV_Resize_Test::run_reference_func() double CV_Resize_Test::getWeight(double a, double b, int x) { - float w = std::min(static_cast(x + 1), b) - std::max(static_cast(x), a); + double w = std::min(static_cast(x + 1), b) - std::max(static_cast(x), a); CV_Assert(w >= 0); return w; } @@ -514,7 +514,7 @@ void CV_Resize_Test::resize_1d(const Mat& _src, Mat& _dst, int dy, const dim& _d else if (interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4) { internal::interpolate_method inter_func = internal::inter_array[interpolation - (interpolation == INTER_LANCZOS4 ? 2 : 1)]; - int elemsize = _src.elemSize(); + size_t elemsize = _src.elemSize(); int ofs = 0, ksize = 2; if (interpolation == INTER_CUBIC) @@ -557,10 +557,10 @@ void CV_Resize_Test::resize_1d(const Mat& _src, Mat& _dst, int dy, const dim& _d void CV_Resize_Test::generate_buffer(double scale, dim& _dim) { - int length = _dim.size(); - for (int dx = 0; dx < length; ++dx) + size_t length = _dim.size(); + for (size_t dx = 0; dx < length; ++dx) { - double fsx = scale * (dx + 0.5f) - 0.5f; + double fsx = scale * (dx + 0.5) - 0.5; int isx = cvFloor(fsx); _dim[dx] = std::make_pair(isx, fsx - isx); } diff --git a/modules/java/CMakeLists.txt b/modules/java/CMakeLists.txt index 50592ea223..4b23effbc5 100644 --- a/modules/java/CMakeLists.txt +++ b/modules/java/CMakeLists.txt @@ -1,14 +1,23 @@ # ---------------------------------------------------------------------------- # CMake file for java support # ---------------------------------------------------------------------------- -if(NOT ANDROID OR NOT PYTHON_EXECUTABLE OR ANDROID_NATIVE_API_LEVEL LESS 8) +if(IOS OR NOT PYTHON_EXECUTABLE OR NOT (JNI_FOUND OR (ANDROID AND ANDROID_NATIVE_API_LEVEL GREATER 7))) ocv_module_disable(java) endif() +if(NOT ANDROID) + # disable java by default because java support on desktop is experimental + set(BUILD_opencv_java_INIT OFF) +endif() + set(the_description "The java bindings") ocv_add_module(java BINDINGS opencv_core opencv_imgproc OPTIONAL opencv_objdetect opencv_features2d opencv_video opencv_highgui opencv_ml opencv_calib3d opencv_photo opencv_nonfree opencv_contrib) ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/generator/src/cpp") +if(NOT ANDROID) + include_directories(${JNI_INCLUDE_DIRS}) +endif() + # get list of modules to wrap string(REPLACE "opencv_" "" OPENCV_JAVA_MODULES "${OPENCV_MODULE_${the_module}_REQ_DEPS};${OPENCV_MODULE_${the_module}_OPT_DEPS}") foreach(module ${OPENCV_JAVA_MODULES}) @@ -123,7 +132,12 @@ set(JNI_INSTALL_ROOT "sdk/native") # copy each documented header to the final destination set(java_files "") -foreach(java_file ${documented_java_files} ${handwrittren_aidl_sources}) +set(source_java_files ${documented_java_files} ${handwrittren_aidl_sources}) +if(NOT ANDROID) + ocv_list_filterout(source_java_files "/(engine|android)\\\\+") +endif() + +foreach(java_file ${source_java_files}) get_filename_component(java_file_name "${java_file}" NAME) string(REPLACE "-jdoc.java" ".java" java_file_name "${java_file_name}") string(REPLACE "+" "/" java_file_name "${java_file_name}") @@ -173,8 +187,11 @@ set_target_properties(${the_module} PROPERTIES LINK_INTERFACE_LIBRARIES "" ) -install(TARGETS ${the_module} LIBRARY DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main) +install(TARGETS ${the_module} + LIBRARY DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main + ARCHIVE DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main) +set(lib_target ${the_module}_library) if(ANDROID) target_link_libraries(${the_module} jnigraphics) # for Mat <=> Bitmap converters @@ -223,8 +240,6 @@ if(ANDROID) # create Android library project in build folder if(ANDROID_EXECUTABLE) - set(lib_target ${the_module}_android_library) - set(lib_target_files ${ANDROID_LIB_PROJECT_FILES}) ocv_list_add_prefix(lib_target_files "${OpenCV_BINARY_DIR}/") @@ -263,12 +278,26 @@ if(ANDROID) endif() add_custom_target(${lib_target} SOURCES ${lib_proj_files} ${lib_target_files} "${OpenCV_BINARY_DIR}/${ANDROID_MANIFEST_FILE}") - - add_dependencies(${lib_target} ${api_target}) - add_dependencies(${the_module} ${lib_target}) endif() +else(ANDROID) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/jar/build.xml" "${OpenCV_BINARY_DIR}/build.xml" IMMEDIATE @ONLY) + set(JAR_NAME opencv-${OPENCV_VERSION_MAJOR}.${OPENCV_VERSION_MINOR}.${OPENCV_VERSION_PATCH}.jar) + + add_custom_command( + OUTPUT "${OpenCV_BINARY_DIR}/bin/${JAR_NAME}" "${OpenCV_BINARY_DIR}/bin/.${JAR_NAME}.dephelper" + COMMAND ${ANT_EXECUTABLE} -q -noinput -k jar + COMMAND ${CMAKE_COMMAND} -E touch "${OpenCV_BINARY_DIR}/bin/.${JAR_NAME}.dephelper" + WORKING_DIRECTORY "${OpenCV_BINARY_DIR}" + DEPENDS "${OpenCV_BINARY_DIR}/build.xml" ${java_files} + COMMENT "Generating ${JAR_NAME}" + ) + + add_custom_target(${lib_target} SOURCES "${OpenCV_BINARY_DIR}/bin/${JAR_NAME}") endif(ANDROID) +add_dependencies(${lib_target} ${api_target}) +add_dependencies(${the_module} ${lib_target}) + #android test project if(ANDROID AND BUILD_TESTS) add_subdirectory(android_test) diff --git a/modules/java/generator/gen_java.py b/modules/java/generator/gen_java.py index 0471b494e0..7d9dc8f07a 100644 --- a/modules/java/generator/gen_java.py +++ b/modules/java/generator/gen_java.py @@ -462,8 +462,10 @@ JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Core_n_1getTextSize env->SetDoubleArrayRegion(result, 0, 2, fill); - if (baseLine != NULL) - env->SetIntArrayRegion(baseLine, 0, 1, pbaseLine); + if (baseLine != NULL) { + jint jbaseLine = (jint)(*pbaseLine); + env->SetIntArrayRegion(baseLine, 0, 1, &jbaseLine); + } return result; @@ -871,14 +873,18 @@ public class %(jc)s { #include "converters.h" -#ifdef DEBUG -#include -#define MODULE_LOG_TAG "OpenCV.%(m)s" -#define LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG, MODULE_LOG_TAG, __VA_ARGS__)) +#if defined DEBUG && defined ANDROID +# include +# define MODULE_LOG_TAG "OpenCV.%(m)s" +# define LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG, MODULE_LOG_TAG, __VA_ARGS__)) #else //DEBUG -#define LOGD(...) +# define LOGD(...) #endif //DEBUG +#ifdef _MSC_VER +# pragma warning(disable:4800 4244) +#endif + #include "opencv2/%(m)s/%(m)s.hpp" using namespace cv; diff --git a/modules/java/generator/src/cpp/Mat.cpp b/modules/java/generator/src/cpp/Mat.cpp index 344c38d3c8..66e41b5f19 100644 --- a/modules/java/generator/src/cpp/Mat.cpp +++ b/modules/java/generator/src/cpp/Mat.cpp @@ -2,6 +2,8 @@ #include "converters.h" +#ifdef ANDROID + #include #define LOG_TAG "org.opencv.core.Mat" #define LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)) @@ -11,6 +13,15 @@ #define LOGD(...) #endif //DEBUG +#else +#define LOGE(...) +#define LOGD(...) +#endif + +#ifdef _MSC_VER +# pragma warning(disable:4800) +#endif + #include "opencv2/core/core.hpp" using namespace cv; @@ -2163,7 +2174,7 @@ template static int mat_put(cv::Mat* m, int row, int col, int count, if(! buff) return 0; count *= sizeof(T); - int rest = ((m->rows - row) * m->cols - col) * m->elemSize(); + int rest = ((m->rows - row) * m->cols - col) * (int)m->elemSize(); if(count>rest) count = rest; int res = count; @@ -2172,14 +2183,14 @@ template static int mat_put(cv::Mat* m, int row, int col, int count, memcpy(m->ptr(row, col), buff, count); } else { // row by row - int num = (m->cols - col) * m->elemSize(); // 1st partial row + int num = (m->cols - col) * (int)m->elemSize(); // 1st partial row if(countptr(row++, col); while(count>0){ memcpy(data, buff, num); count -= num; buff += num; - num = m->cols * m->elemSize(); + num = m->cols * (int)m->elemSize(); if(countptr(row++, 0); } @@ -2323,7 +2334,7 @@ template int mat_get(cv::Mat* m, int row, int col, int count, char* if(! buff) return 0; int bytesToCopy = count * sizeof(T); - int bytesRestInMat = ((m->rows - row) * m->cols - col) * m->elemSize(); + int bytesRestInMat = ((m->rows - row) * m->cols - col) * (int)m->elemSize(); if(bytesToCopy > bytesRestInMat) bytesToCopy = bytesRestInMat; int res = bytesToCopy; @@ -2332,7 +2343,7 @@ template int mat_get(cv::Mat* m, int row, int col, int count, char* memcpy(buff, m->ptr(row, col), bytesToCopy); } else { // row by row - int bytesInRow = (m->cols - col) * m->elemSize(); // 1st partial row + int bytesInRow = (m->cols - col) * (int)m->elemSize(); // 1st partial row while(bytesToCopy > 0) { int len = std::min(bytesToCopy, bytesInRow); @@ -2341,7 +2352,7 @@ template int mat_get(cv::Mat* m, int row, int col, int count, char* buff += len; row++; col = 0; - bytesInRow = m->cols * m->elemSize(); + bytesInRow = m->cols * (int)m->elemSize(); } } return res; @@ -2518,7 +2529,7 @@ JNIEXPORT jdoubleArray JNICALL Java_org_opencv_core_Mat_nGet jdoubleArray res = env->NewDoubleArray(me->channels()); if(res){ - jdouble buff[me->channels()]; + jdouble buff[CV_CN_MAX];//me->channels() int i; switch(me->depth()){ case CV_8U: for(i=0; ichannels(); i++) buff[i] = *((unsigned char*) me->ptr(row, col) + i); break; diff --git a/modules/java/generator/src/cpp/converters.cpp b/modules/java/generator/src/cpp/converters.cpp index 380ed3810c..9153dde11a 100644 --- a/modules/java/generator/src/cpp/converters.cpp +++ b/modules/java/generator/src/cpp/converters.cpp @@ -198,12 +198,12 @@ void Mat_to_vector_KeyPoint(Mat& mat, vector& v_kp) void vector_KeyPoint_to_Mat(vector& v_kp, Mat& mat) { - int count = v_kp.size(); + int count = (int)v_kp.size(); mat.create(count, 1, CV_32FC(7)); for(int i=0; i >(i, 0) = Vec(kp.pt.x, kp.pt.y, kp.size, kp.angle, kp.response, kp.octave, kp.class_id); + mat.at< Vec >(i, 0) = Vec(kp.pt.x, kp.pt.y, kp.size, kp.angle, kp.response, (float)kp.octave, (float)kp.class_id); } } #endif @@ -231,7 +231,7 @@ void Mat_to_vector_Mat(cv::Mat& mat, std::vector& v_mat) void vector_Mat_to_Mat(std::vector& v_mat, cv::Mat& mat) { - int count = v_mat.size(); + int count = (int)v_mat.size(); mat.create(count, 1, CV_32SC2); for(int i=0; i& v_dm) void vector_DMatch_to_Mat(vector& v_dm, Mat& mat) { - int count = v_dm.size(); + int count = (int)v_dm.size(); mat.create(count, 1, CV_32FC4); for(int i=0; i >(i, 0) = Vec(dm.queryIdx, dm.trainIdx, dm.imgIdx, dm.distance); + mat.at< Vec >(i, 0) = Vec((float)dm.queryIdx, (float)dm.trainIdx, (float)dm.imgIdx, dm.distance); } } #endif diff --git a/modules/java/generator/src/cpp/features2d_manual.hpp b/modules/java/generator/src/cpp/features2d_manual.hpp index 32bdc26414..38331c6686 100644 --- a/modules/java/generator/src/cpp/features2d_manual.hpp +++ b/modules/java/generator/src/cpp/features2d_manual.hpp @@ -6,6 +6,8 @@ #ifdef HAVE_OPENCV_FEATURES2D #include "opencv2/features2d/features2d.hpp" +#undef SIMPLEBLOB // to solve conflict with wincrypt.h on windows + namespace cv { diff --git a/modules/java/generator/src/cpp/utils.cpp b/modules/java/generator/src/cpp/utils.cpp index 3193d6be22..9dbc093dcd 100644 --- a/modules/java/generator/src/cpp/utils.cpp +++ b/modules/java/generator/src/cpp/utils.cpp @@ -3,6 +3,8 @@ #include "opencv2/core/core.hpp" #include "opencv2/imgproc/imgproc.hpp" +#ifdef ANDROID + #include #include @@ -165,3 +167,5 @@ JNIEXPORT void JNICALL Java_org_opencv_android_Utils_nMatToBitmap } } // extern "C" + +#endif //ANDROID \ No newline at end of file diff --git a/modules/java/jar/build.xml b/modules/java/jar/build.xml new file mode 100644 index 0000000000..71c1b1fefd --- /dev/null +++ b/modules/java/jar/build.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/modules/objdetect/src/haar.cpp b/modules/objdetect/src/haar.cpp index 5e2d5324dc..e9eda9dc89 100644 --- a/modules/objdetect/src/haar.cpp +++ b/modules/objdetect/src/haar.cpp @@ -45,7 +45,6 @@ #include #include "opencv2/core/internal.hpp" - #if CV_SSE2 || CV_SSE3 # if !CV_SSE4_1 && !CV_SSE4_2 # define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m)) @@ -53,13 +52,13 @@ # endif #endif -# if CV_AVX -# define CV_HAAR_USE_AVX 1 -# else -# if CV_SSE2 || CV_SSE3 -# define CV_HAAR_USE_SSE 1 -# endif -# endif +#if CV_AVX +# define CV_HAAR_USE_AVX 1 +#else +# if CV_SSE2 || CV_SSE3 +# define CV_HAAR_USE_SSE 1 +# endif +#endif /* these settings affect the quality of detection: change with care */ #define CV_ADJUST_FEATURES 1 @@ -76,8 +75,7 @@ typedef struct CvHidHaarFeature float weight; } rect[CV_HAAR_FEATURE_MAX]; -} -CvHidHaarFeature; +} CvHidHaarFeature; typedef struct CvHidHaarTreeNode @@ -86,8 +84,7 @@ typedef struct CvHidHaarTreeNode float threshold; int left; int right; -} -CvHidHaarTreeNode; +} CvHidHaarTreeNode; typedef struct CvHidHaarClassifier @@ -96,8 +93,7 @@ typedef struct CvHidHaarClassifier //CvHaarFeature* orig_feature; CvHidHaarTreeNode* node; float* alpha; -} -CvHidHaarClassifier; +} CvHidHaarClassifier; typedef struct CvHidHaarStageClassifier @@ -110,11 +106,10 @@ typedef struct CvHidHaarStageClassifier struct CvHidHaarStageClassifier* next; struct CvHidHaarStageClassifier* child; struct CvHidHaarStageClassifier* parent; -} -CvHidHaarStageClassifier; +} CvHidHaarStageClassifier; -struct CvHidHaarClassifierCascade +typedef struct CvHidHaarClassifierCascade { int count; int isStumpBased; @@ -127,7 +122,7 @@ struct CvHidHaarClassifierCascade sumtype *p0, *p1, *p2, *p3; void** ipp_stages; -}; +} CvHidHaarClassifierCascade; const int icv_object_win_border = 1; @@ -634,21 +629,21 @@ cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade, } -//AVX version icvEvalHidHaarClassifier. Process 8 CvHidHaarClassifiers per call. Check AVX support before invocation!! +// AVX version icvEvalHidHaarClassifier. Process 8 CvHidHaarClassifiers per call. Check AVX support before invocation!! #ifdef CV_HAAR_USE_AVX CV_INLINE double icvEvalHidHaarClassifierAVX( CvHidHaarClassifier* classifier, double variance_norm_factor, size_t p_offset ) { int CV_DECL_ALIGNED(32) idxV[8] = {0,0,0,0,0,0,0,0}; - char flags[8] = {0,0,0,0,0,0,0,0}; + uchar flags[8] = {0,0,0,0,0,0,0,0}; CvHidHaarTreeNode* nodes[8]; double res = 0; - char exitConditionFlag = 0; + uchar exitConditionFlag = 0; for(;;) { - float CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0}; - nodes[0] = classifier ->node + idxV[0]; + float CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0}; + nodes[0] = (classifier+0)->node + idxV[0]; nodes[1] = (classifier+1)->node + idxV[1]; nodes[2] = (classifier+2)->node + idxV[2]; nodes[3] = (classifier+3)->node + idxV[3]; @@ -658,46 +653,79 @@ double icvEvalHidHaarClassifierAVX( CvHidHaarClassifier* classifier, nodes[7] = (classifier+7)->node + idxV[7]; __m256 t = _mm256_set1_ps(variance_norm_factor); - t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold)); - __m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset), - calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0], - p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset)); - __m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight, - nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight); - __m256 sum = _mm256_mul_ps(offset, weight); + t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold, + nodes[6]->threshold, + nodes[5]->threshold, + nodes[4]->threshold, + nodes[3]->threshold, + nodes[2]->threshold, + nodes[1]->threshold, + nodes[0]->threshold)); + + __m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0], p_offset), + calc_sum(nodes[6]->feature.rect[0], p_offset), + calc_sum(nodes[5]->feature.rect[0], p_offset), + calc_sum(nodes[4]->feature.rect[0], p_offset), + calc_sum(nodes[3]->feature.rect[0], p_offset), + calc_sum(nodes[2]->feature.rect[0], p_offset), + calc_sum(nodes[1]->feature.rect[0], p_offset), + calc_sum(nodes[0]->feature.rect[0], p_offset)); + + __m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, + nodes[6]->feature.rect[0].weight, + nodes[5]->feature.rect[0].weight, + nodes[4]->feature.rect[0].weight, + nodes[3]->feature.rect[0].weight, + nodes[2]->feature.rect[0].weight, + nodes[1]->feature.rect[0].weight, + nodes[0]->feature.rect[0].weight); - offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset), - calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset), - calc_sum(nodes[0]->feature.rect[1],p_offset)); - weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight, - nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight); + __m256 sum = _mm256_mul_ps(offset, weight); - sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight)); + offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1], p_offset), + calc_sum(nodes[6]->feature.rect[1], p_offset), + calc_sum(nodes[5]->feature.rect[1], p_offset), + calc_sum(nodes[4]->feature.rect[1], p_offset), + calc_sum(nodes[3]->feature.rect[1], p_offset), + calc_sum(nodes[2]->feature.rect[1], p_offset), + calc_sum(nodes[1]->feature.rect[1], p_offset), + calc_sum(nodes[0]->feature.rect[1], p_offset)); + + weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, + nodes[6]->feature.rect[1].weight, + nodes[5]->feature.rect[1].weight, + nodes[4]->feature.rect[1].weight, + nodes[3]->feature.rect[1].weight, + nodes[2]->feature.rect[1].weight, + nodes[1]->feature.rect[1].weight, + nodes[0]->feature.rect[1].weight); + + sum = _mm256_add_ps(sum, _mm256_mul_ps(offset, weight)); if( nodes[0]->feature.rect[2].p0 ) - tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight; + tmp[0] = calc_sum(nodes[0]->feature.rect[2], p_offset) * nodes[0]->feature.rect[2].weight; if( nodes[1]->feature.rect[2].p0 ) - tmp[1] = calc_sum(nodes[1]->feature.rect[2],p_offset) * nodes[1]->feature.rect[2].weight; + tmp[1] = calc_sum(nodes[1]->feature.rect[2], p_offset) * nodes[1]->feature.rect[2].weight; if( nodes[2]->feature.rect[2].p0 ) - tmp[2] = calc_sum(nodes[2]->feature.rect[2],p_offset) * nodes[2]->feature.rect[2].weight; + tmp[2] = calc_sum(nodes[2]->feature.rect[2], p_offset) * nodes[2]->feature.rect[2].weight; if( nodes[3]->feature.rect[2].p0 ) - tmp[3] = calc_sum(nodes[3]->feature.rect[2],p_offset) * nodes[3]->feature.rect[2].weight; + tmp[3] = calc_sum(nodes[3]->feature.rect[2], p_offset) * nodes[3]->feature.rect[2].weight; if( nodes[4]->feature.rect[2].p0 ) - tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight; + tmp[4] = calc_sum(nodes[4]->feature.rect[2], p_offset) * nodes[4]->feature.rect[2].weight; if( nodes[5]->feature.rect[2].p0 ) - tmp[5] = calc_sum(nodes[5]->feature.rect[2],p_offset) * nodes[5]->feature.rect[2].weight; + tmp[5] = calc_sum(nodes[5]->feature.rect[2], p_offset) * nodes[5]->feature.rect[2].weight; if( nodes[6]->feature.rect[2].p0 ) - tmp[6] = calc_sum(nodes[6]->feature.rect[2],p_offset) * nodes[6]->feature.rect[2].weight; + tmp[6] = calc_sum(nodes[6]->feature.rect[2], p_offset) * nodes[6]->feature.rect[2].weight; if( nodes[7]->feature.rect[2].p0 ) - tmp[7] = calc_sum(nodes[7]->feature.rect[2],p_offset) * nodes[7]->feature.rect[2].weight; + tmp[7] = calc_sum(nodes[7]->feature.rect[2], p_offset) * nodes[7]->feature.rect[2].weight; sum = _mm256_add_ps(sum,_mm256_load_ps(tmp)); - __m256 left = _mm256_set_ps(nodes[7]->left,nodes[6]->left,nodes[5]->left,nodes[4]->left,nodes[3]->left,nodes[2]->left,nodes[1]->left,nodes[0]->left); + __m256 left = _mm256_set_ps(nodes[7]->left, nodes[6]->left, nodes[5]->left, nodes[4]->left, nodes[3]->left, nodes[2]->left, nodes[1]->left, nodes[0]->left ); __m256 right = _mm256_set_ps(nodes[7]->right,nodes[6]->right,nodes[5]->right,nodes[4]->right,nodes[3]->right,nodes[2]->right,nodes[1]->right,nodes[0]->right); - _mm256_store_si256((__m256i*)idxV,_mm256_cvttps_epi32(_mm256_blendv_ps(right, left,_mm256_cmp_ps(sum, t, _CMP_LT_OQ )))); + _mm256_store_si256((__m256i*)idxV, _mm256_cvttps_epi32(_mm256_blendv_ps(right, left, _mm256_cmp_ps(sum, t, _CMP_LT_OQ)))); for(int i = 0; i < 8; i++) { @@ -706,17 +734,17 @@ double icvEvalHidHaarClassifierAVX( CvHidHaarClassifier* classifier, if(!flags[i]) { exitConditionFlag++; - flags[i]=1; - res+=((classifier+i)->alpha[-idxV[i]]); + flags[i] = 1; + res += (classifier+i)->alpha[-idxV[i]]; } idxV[i]=0; } } - if(exitConditionFlag==8) + if(exitConditionFlag == 8) return res; } } -#endif +#endif //CV_HAAR_USE_AVX CV_INLINE double icvEvalHidHaarClassifier( CvHidHaarClassifier* classifier, @@ -778,18 +806,16 @@ static int cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, CvPoint pt, double& stage_sum, int start_stage ) { - #ifdef CV_HAAR_USE_AVX - bool haveAVX = false; - if(cv::checkHardwareSupport(CV_CPU_AVX)) - if(__xgetbv()&0x6)// Check if the OS will save the YMM registers - { - haveAVX = true; - } - #else - #ifdef CV_HAAR_USE_SSE - bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2); - #endif - #endif +#ifdef CV_HAAR_USE_AVX + bool haveAVX = false; + if(cv::checkHardwareSupport(CV_CPU_AVX)) + if(__xgetbv()&0x6)// Check if the OS will save the YMM registers + haveAVX = true; +#else +# ifdef CV_HAAR_USE_SSE + bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2); +# endif +#endif int p_offset, pq_offset; int i, j; @@ -828,19 +854,20 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, while( ptr ) { stage_sum = 0.0; + j = 0; - #ifdef CV_HAAR_USE_AVX +#ifdef CV_HAAR_USE_AVX if(haveAVX) { - for( ; j < cascade->stage_classifier[i].count-8; j+=8 ) + for( ; j <= ptr->count - 8; j += 8 ) { stage_sum += icvEvalHidHaarClassifierAVX( - cascade->stage_classifier[i].classifier+j, + ptr->classifier + j, variance_norm_factor, p_offset ); } } - #endif - for( j = 0; j < ptr->count; j++ ) +#endif + for( ; j < ptr->count; j++ ) { stage_sum += icvEvalHidHaarClassifier( ptr->classifier + j, variance_norm_factor, p_offset ); } @@ -860,283 +887,369 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, } else if( cascade->isStumpBased ) { - #ifdef CV_HAAR_USE_AVX - if(haveAVX) +#ifdef CV_HAAR_USE_AVX + if(haveAVX) + { + CvHidHaarClassifier* classifiers[8]; + CvHidHaarTreeNode* nodes[8]; + for( i = start_stage; i < cascade->count; i++ ) { - CvHidHaarClassifier* classifiers[8]; - CvHidHaarTreeNode* nodes[8]; - for( i = start_stage; i < cascade->count; i++ ) + stage_sum = 0.0; + j = 0; + float CV_DECL_ALIGNED(32) buf[8]; + if( cascade->stage_classifier[i].two_rects ) { - stage_sum = 0.0; - j = 0; - float CV_DECL_ALIGNED(32) buf[8]; - if( cascade->stage_classifier[i].two_rects ) + for( ; j <= cascade->stage_classifier[i].count - 8; j += 8 ) { - for( ; j <= cascade->stage_classifier[i].count-8; j+=8 ) - { - //__m256 stage_sumPart = _mm256_setzero_ps(); - classifiers[0] = cascade->stage_classifier[i].classifier + j; - nodes[0] = classifiers[0]->node; - classifiers[1] = cascade->stage_classifier[i].classifier + j + 1; - nodes[1] = classifiers[1]->node; - classifiers[2] = cascade->stage_classifier[i].classifier + j + 2; - nodes[2]= classifiers[2]->node; - classifiers[3] = cascade->stage_classifier[i].classifier + j + 3; - nodes[3] = classifiers[3]->node; - classifiers[4] = cascade->stage_classifier[i].classifier + j + 4; - nodes[4] = classifiers[4]->node; - classifiers[5] = cascade->stage_classifier[i].classifier + j + 5; - nodes[5] = classifiers[5]->node; - classifiers[6] = cascade->stage_classifier[i].classifier + j + 6; - nodes[6] = classifiers[6]->node; - classifiers[7] = cascade->stage_classifier[i].classifier + j + 7; - nodes[7] = classifiers[7]->node; - - __m256 t = _mm256_set1_ps(variance_norm_factor); - t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold)); - - __m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset), - calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0], - p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset)); - __m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight, - nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight); - __m256 sum = _mm256_mul_ps(offset, weight); - - offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset), - calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset), - calc_sum(nodes[0]->feature.rect[1],p_offset)); - weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight, - nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight); - sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight)); - - __m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0],classifiers[6]->alpha[0],classifiers[5]->alpha[0],classifiers[4]->alpha[0],classifiers[3]->alpha[0], - classifiers[2]->alpha[0],classifiers[1]->alpha[0],classifiers[0]->alpha[0]); - __m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1],classifiers[6]->alpha[1],classifiers[5]->alpha[1],classifiers[4]->alpha[1],classifiers[3]->alpha[1], - classifiers[2]->alpha[1],classifiers[1]->alpha[1],classifiers[0]->alpha[1]); - - _mm256_store_ps(buf, _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ ))); - stage_sum+=(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]); - - } + classifiers[0] = cascade->stage_classifier[i].classifier + j; + nodes[0] = classifiers[0]->node; + classifiers[1] = cascade->stage_classifier[i].classifier + j + 1; + nodes[1] = classifiers[1]->node; + classifiers[2] = cascade->stage_classifier[i].classifier + j + 2; + nodes[2] = classifiers[2]->node; + classifiers[3] = cascade->stage_classifier[i].classifier + j + 3; + nodes[3] = classifiers[3]->node; + classifiers[4] = cascade->stage_classifier[i].classifier + j + 4; + nodes[4] = classifiers[4]->node; + classifiers[5] = cascade->stage_classifier[i].classifier + j + 5; + nodes[5] = classifiers[5]->node; + classifiers[6] = cascade->stage_classifier[i].classifier + j + 6; + nodes[6] = classifiers[6]->node; + classifiers[7] = cascade->stage_classifier[i].classifier + j + 7; + nodes[7] = classifiers[7]->node; + + __m256 t = _mm256_set1_ps(variance_norm_factor); + t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold, + nodes[6]->threshold, + nodes[5]->threshold, + nodes[4]->threshold, + nodes[3]->threshold, + nodes[2]->threshold, + nodes[1]->threshold, + nodes[0]->threshold)); + + __m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0], p_offset), + calc_sum(nodes[6]->feature.rect[0], p_offset), + calc_sum(nodes[5]->feature.rect[0], p_offset), + calc_sum(nodes[4]->feature.rect[0], p_offset), + calc_sum(nodes[3]->feature.rect[0], p_offset), + calc_sum(nodes[2]->feature.rect[0], p_offset), + calc_sum(nodes[1]->feature.rect[0], p_offset), + calc_sum(nodes[0]->feature.rect[0], p_offset)); + + __m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, + nodes[6]->feature.rect[0].weight, + nodes[5]->feature.rect[0].weight, + nodes[4]->feature.rect[0].weight, + nodes[3]->feature.rect[0].weight, + nodes[2]->feature.rect[0].weight, + nodes[1]->feature.rect[0].weight, + nodes[0]->feature.rect[0].weight); + + __m256 sum = _mm256_mul_ps(offset, weight); + + offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1], p_offset), + calc_sum(nodes[6]->feature.rect[1], p_offset), + calc_sum(nodes[5]->feature.rect[1], p_offset), + calc_sum(nodes[4]->feature.rect[1], p_offset), + calc_sum(nodes[3]->feature.rect[1], p_offset), + calc_sum(nodes[2]->feature.rect[1], p_offset), + calc_sum(nodes[1]->feature.rect[1], p_offset), + calc_sum(nodes[0]->feature.rect[1], p_offset)); + + weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, + nodes[6]->feature.rect[1].weight, + nodes[5]->feature.rect[1].weight, + nodes[4]->feature.rect[1].weight, + nodes[3]->feature.rect[1].weight, + nodes[2]->feature.rect[1].weight, + nodes[1]->feature.rect[1].weight, + nodes[0]->feature.rect[1].weight); + + sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight)); + + __m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0], + classifiers[6]->alpha[0], + classifiers[5]->alpha[0], + classifiers[4]->alpha[0], + classifiers[3]->alpha[0], + classifiers[2]->alpha[0], + classifiers[1]->alpha[0], + classifiers[0]->alpha[0]); + __m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1], + classifiers[6]->alpha[1], + classifiers[5]->alpha[1], + classifiers[4]->alpha[1], + classifiers[3]->alpha[1], + classifiers[2]->alpha[1], + classifiers[1]->alpha[1], + classifiers[0]->alpha[1]); + + _mm256_store_ps(buf, _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ))); + stage_sum += (buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]); + } - for( ; j < cascade->stage_classifier[i].count; j++ ) - { - CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; - CvHidHaarTreeNode* node = classifier->node; + for( ; j < cascade->stage_classifier[i].count; j++ ) + { + CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; + CvHidHaarTreeNode* node = classifier->node; - double t = node->threshold*variance_norm_factor; - double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; - sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; - stage_sum += classifier->alpha[sum >= t]; - } + double t = node->threshold*variance_norm_factor; + double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; + sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; + stage_sum += classifier->alpha[sum >= t]; } - else + } + else + { + for( ; j <= (cascade->stage_classifier[i].count)-8; j+=8 ) { - for( ; j <= (cascade->stage_classifier[i].count)-8; j+=8 ) - { - float CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0}; - - classifiers[0] = cascade->stage_classifier[i].classifier + j; - nodes[0] = classifiers[0]->node; - classifiers[1] = cascade->stage_classifier[i].classifier + j + 1; - nodes[1] = classifiers[1]->node; - classifiers[2] = cascade->stage_classifier[i].classifier + j + 2; - nodes[2]= classifiers[2]->node; - classifiers[3] = cascade->stage_classifier[i].classifier + j + 3; - nodes[3] = classifiers[3]->node; - classifiers[4] = cascade->stage_classifier[i].classifier + j + 4; - nodes[4] = classifiers[4]->node; - classifiers[5] = cascade->stage_classifier[i].classifier + j + 5; - nodes[5] = classifiers[5]->node; - classifiers[6] = cascade->stage_classifier[i].classifier + j + 6; - nodes[6] = classifiers[6]->node; - classifiers[7] = cascade->stage_classifier[i].classifier + j + 7; - nodes[7] = classifiers[7]->node; - - __m256 t = _mm256_set1_ps(variance_norm_factor); - t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold)); - - __m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset), - calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0], - p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset)); - __m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight, - nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight); - __m256 sum = _mm256_mul_ps(offset, weight); - - offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset), - calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset), - calc_sum(nodes[0]->feature.rect[1],p_offset)); - weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight, - nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight); - - sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight)); - - if( nodes[0]->feature.rect[2].p0 ) - tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight; - if( nodes[1]->feature.rect[2].p0 ) - tmp[1] = calc_sum(nodes[1]->feature.rect[2],p_offset) * nodes[1]->feature.rect[2].weight; - if( nodes[2]->feature.rect[2].p0 ) - tmp[2] = calc_sum(nodes[2]->feature.rect[2],p_offset) * nodes[2]->feature.rect[2].weight; - if( nodes[3]->feature.rect[2].p0 ) - tmp[3] = calc_sum(nodes[3]->feature.rect[2],p_offset) * nodes[3]->feature.rect[2].weight; - if( nodes[4]->feature.rect[2].p0 ) - tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight; - if( nodes[5]->feature.rect[2].p0 ) - tmp[5] = calc_sum(nodes[5]->feature.rect[2],p_offset) * nodes[5]->feature.rect[2].weight; - if( nodes[6]->feature.rect[2].p0 ) - tmp[6] = calc_sum(nodes[6]->feature.rect[2],p_offset) * nodes[6]->feature.rect[2].weight; - if( nodes[7]->feature.rect[2].p0 ) - tmp[7] = calc_sum(nodes[7]->feature.rect[2],p_offset) * nodes[7]->feature.rect[2].weight; - - sum = _mm256_add_ps(sum, _mm256_load_ps(tmp)); - - __m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0],classifiers[6]->alpha[0],classifiers[5]->alpha[0],classifiers[4]->alpha[0],classifiers[3]->alpha[0], - classifiers[2]->alpha[0],classifiers[1]->alpha[0],classifiers[0]->alpha[0]); - __m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1],classifiers[6]->alpha[1],classifiers[5]->alpha[1],classifiers[4]->alpha[1],classifiers[3]->alpha[1], - classifiers[2]->alpha[1],classifiers[1]->alpha[1],classifiers[0]->alpha[1]); - - __m256 outBuf = _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ )); - outBuf = _mm256_hadd_ps(outBuf, outBuf); - outBuf = _mm256_hadd_ps(outBuf, outBuf); - _mm256_store_ps(buf, outBuf); - stage_sum+=(buf[0]+buf[4]);//(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]); - } + float CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0}; + + classifiers[0] = cascade->stage_classifier[i].classifier + j; + nodes[0] = classifiers[0]->node; + classifiers[1] = cascade->stage_classifier[i].classifier + j + 1; + nodes[1] = classifiers[1]->node; + classifiers[2] = cascade->stage_classifier[i].classifier + j + 2; + nodes[2] = classifiers[2]->node; + classifiers[3] = cascade->stage_classifier[i].classifier + j + 3; + nodes[3] = classifiers[3]->node; + classifiers[4] = cascade->stage_classifier[i].classifier + j + 4; + nodes[4] = classifiers[4]->node; + classifiers[5] = cascade->stage_classifier[i].classifier + j + 5; + nodes[5] = classifiers[5]->node; + classifiers[6] = cascade->stage_classifier[i].classifier + j + 6; + nodes[6] = classifiers[6]->node; + classifiers[7] = cascade->stage_classifier[i].classifier + j + 7; + nodes[7] = classifiers[7]->node; + + __m256 t = _mm256_set1_ps(variance_norm_factor); + + t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold, + nodes[6]->threshold, + nodes[5]->threshold, + nodes[4]->threshold, + nodes[3]->threshold, + nodes[2]->threshold, + nodes[1]->threshold, + nodes[0]->threshold)); + + __m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0], p_offset), + calc_sum(nodes[6]->feature.rect[0], p_offset), + calc_sum(nodes[5]->feature.rect[0], p_offset), + calc_sum(nodes[4]->feature.rect[0], p_offset), + calc_sum(nodes[3]->feature.rect[0], p_offset), + calc_sum(nodes[2]->feature.rect[0], p_offset), + calc_sum(nodes[1]->feature.rect[0], p_offset), + calc_sum(nodes[0]->feature.rect[0], p_offset)); + + __m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, + nodes[6]->feature.rect[0].weight, + nodes[5]->feature.rect[0].weight, + nodes[4]->feature.rect[0].weight, + nodes[3]->feature.rect[0].weight, + nodes[2]->feature.rect[0].weight, + nodes[1]->feature.rect[0].weight, + nodes[0]->feature.rect[0].weight); + + __m256 sum = _mm256_mul_ps(offset, weight); + + offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1], p_offset), + calc_sum(nodes[6]->feature.rect[1], p_offset), + calc_sum(nodes[5]->feature.rect[1], p_offset), + calc_sum(nodes[4]->feature.rect[1], p_offset), + calc_sum(nodes[3]->feature.rect[1], p_offset), + calc_sum(nodes[2]->feature.rect[1], p_offset), + calc_sum(nodes[1]->feature.rect[1], p_offset), + calc_sum(nodes[0]->feature.rect[1], p_offset)); + + weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, + nodes[6]->feature.rect[1].weight, + nodes[5]->feature.rect[1].weight, + nodes[4]->feature.rect[1].weight, + nodes[3]->feature.rect[1].weight, + nodes[2]->feature.rect[1].weight, + nodes[1]->feature.rect[1].weight, + nodes[0]->feature.rect[1].weight); + + sum = _mm256_add_ps(sum, _mm256_mul_ps(offset, weight)); + + if( nodes[0]->feature.rect[2].p0 ) + tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight; + if( nodes[1]->feature.rect[2].p0 ) + tmp[1] = calc_sum(nodes[1]->feature.rect[2],p_offset) * nodes[1]->feature.rect[2].weight; + if( nodes[2]->feature.rect[2].p0 ) + tmp[2] = calc_sum(nodes[2]->feature.rect[2],p_offset) * nodes[2]->feature.rect[2].weight; + if( nodes[3]->feature.rect[2].p0 ) + tmp[3] = calc_sum(nodes[3]->feature.rect[2],p_offset) * nodes[3]->feature.rect[2].weight; + if( nodes[4]->feature.rect[2].p0 ) + tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight; + if( nodes[5]->feature.rect[2].p0 ) + tmp[5] = calc_sum(nodes[5]->feature.rect[2],p_offset) * nodes[5]->feature.rect[2].weight; + if( nodes[6]->feature.rect[2].p0 ) + tmp[6] = calc_sum(nodes[6]->feature.rect[2],p_offset) * nodes[6]->feature.rect[2].weight; + if( nodes[7]->feature.rect[2].p0 ) + tmp[7] = calc_sum(nodes[7]->feature.rect[2],p_offset) * nodes[7]->feature.rect[2].weight; + + sum = _mm256_add_ps(sum, _mm256_load_ps(tmp)); + + __m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0], + classifiers[6]->alpha[0], + classifiers[5]->alpha[0], + classifiers[4]->alpha[0], + classifiers[3]->alpha[0], + classifiers[2]->alpha[0], + classifiers[1]->alpha[0], + classifiers[0]->alpha[0]); + __m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1], + classifiers[6]->alpha[1], + classifiers[5]->alpha[1], + classifiers[4]->alpha[1], + classifiers[3]->alpha[1], + classifiers[2]->alpha[1], + classifiers[1]->alpha[1], + classifiers[0]->alpha[1]); + + __m256 outBuf = _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ )); + outBuf = _mm256_hadd_ps(outBuf, outBuf); + outBuf = _mm256_hadd_ps(outBuf, outBuf); + _mm256_store_ps(buf, outBuf); + stage_sum += (buf[0] + buf[4]); + } - for( ; j < cascade->stage_classifier[i].count; j++ ) - { - CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; - CvHidHaarTreeNode* node = classifier->node; - - double t = node->threshold*variance_norm_factor; - double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; - sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; - if( node->feature.rect[2].p0 ) - sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight; - stage_sum += classifier->alpha[sum >= t]; - } + for( ; j < cascade->stage_classifier[i].count; j++ ) + { + CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; + CvHidHaarTreeNode* node = classifier->node; + + double t = node->threshold*variance_norm_factor; + double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; + sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; + if( node->feature.rect[2].p0 ) + sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight; + stage_sum += classifier->alpha[sum >= t]; } - if( stage_sum < cascade->stage_classifier[i].threshold ) - return -i; } + if( stage_sum < cascade->stage_classifier[i].threshold ) + return -i; } - else - #endif - #if defined CV_HAAR_USE_SSE && CV_HAAR_USE_SSE && (!defined CV_HAAR_USE_AVX || !CV_HAAR_USE_AVX) //old SSE optimization - if(haveSSE2) + } + else +#elif defined CV_HAAR_USE_SSE //old SSE optimization + if(haveSSE2) + { + for( i = start_stage; i < cascade->count; i++ ) { - for( i = start_stage; i < cascade->count; i++ ) + __m128d vstage_sum = _mm_setzero_pd(); + if( cascade->stage_classifier[i].two_rects ) { - __m128d vstage_sum = _mm_setzero_pd(); - if( cascade->stage_classifier[i].two_rects ) + for( j = 0; j < cascade->stage_classifier[i].count; j++ ) { - for( j = 0; j < cascade->stage_classifier[i].count; j++ ) - { - CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; - CvHidHaarTreeNode* node = classifier->node; - - // ayasin - NHM perf optim. Avoid use of costly flaky jcc - __m128d t = _mm_set_sd(node->threshold*variance_norm_factor); - __m128d a = _mm_set_sd(classifier->alpha[0]); - __m128d b = _mm_set_sd(classifier->alpha[1]); - __m128d sum = _mm_set_sd(calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight + - calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight); - t = _mm_cmpgt_sd(t, sum); - vstage_sum = _mm_add_sd(vstage_sum, _mm_blendv_pd(b, a, t)); - } + CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; + CvHidHaarTreeNode* node = classifier->node; + + // ayasin - NHM perf optim. Avoid use of costly flaky jcc + __m128d t = _mm_set_sd(node->threshold*variance_norm_factor); + __m128d a = _mm_set_sd(classifier->alpha[0]); + __m128d b = _mm_set_sd(classifier->alpha[1]); + __m128d sum = _mm_set_sd(calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight + + calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight); + t = _mm_cmpgt_sd(t, sum); + vstage_sum = _mm_add_sd(vstage_sum, _mm_blendv_pd(b, a, t)); } - else + } + else + { + for( j = 0; j < cascade->stage_classifier[i].count; j++ ) { - for( j = 0; j < cascade->stage_classifier[i].count; j++ ) - { - CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; - CvHidHaarTreeNode* node = classifier->node; - // ayasin - NHM perf optim. Avoid use of costly flaky jcc - __m128d t = _mm_set_sd(node->threshold*variance_norm_factor); - __m128d a = _mm_set_sd(classifier->alpha[0]); - __m128d b = _mm_set_sd(classifier->alpha[1]); - double _sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; - _sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; - if( node->feature.rect[2].p0 ) - _sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight; - __m128d sum = _mm_set_sd(_sum); - - t = _mm_cmpgt_sd(t, sum); - vstage_sum = _mm_add_sd(vstage_sum, _mm_blendv_pd(b, a, t)); - } + CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; + CvHidHaarTreeNode* node = classifier->node; + // ayasin - NHM perf optim. Avoid use of costly flaky jcc + __m128d t = _mm_set_sd(node->threshold*variance_norm_factor); + __m128d a = _mm_set_sd(classifier->alpha[0]); + __m128d b = _mm_set_sd(classifier->alpha[1]); + double _sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; + _sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; + if( node->feature.rect[2].p0 ) + _sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight; + __m128d sum = _mm_set_sd(_sum); + + t = _mm_cmpgt_sd(t, sum); + vstage_sum = _mm_add_sd(vstage_sum, _mm_blendv_pd(b, a, t)); } - __m128d i_threshold = _mm_set1_pd(cascade->stage_classifier[i].threshold); - if( _mm_comilt_sd(vstage_sum, i_threshold) ) - return -i; } + __m128d i_threshold = _mm_set1_pd(cascade->stage_classifier[i].threshold); + if( _mm_comilt_sd(vstage_sum, i_threshold) ) + return -i; } - else - #endif + } + else +#endif // AVX or SSE + { + for( i = start_stage; i < cascade->count; i++ ) { - for( i = start_stage; i < cascade->count; i++ ) + stage_sum = 0.0; + if( cascade->stage_classifier[i].two_rects ) { - stage_sum = 0.0; - if( cascade->stage_classifier[i].two_rects ) + for( j = 0; j < cascade->stage_classifier[i].count; j++ ) { - for( j = 0; j < cascade->stage_classifier[i].count; j++ ) - { - CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; - CvHidHaarTreeNode* node = classifier->node; - double t = node->threshold*variance_norm_factor; - double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; - sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; - stage_sum += classifier->alpha[sum >= t]; - } + CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; + CvHidHaarTreeNode* node = classifier->node; + double t = node->threshold*variance_norm_factor; + double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; + sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; + stage_sum += classifier->alpha[sum >= t]; } - else + } + else + { + for( j = 0; j < cascade->stage_classifier[i].count; j++ ) { - for( j = 0; j < cascade->stage_classifier[i].count; j++ ) - { - CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; - CvHidHaarTreeNode* node = classifier->node; - double t = node->threshold*variance_norm_factor; - double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; - sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; - if( node->feature.rect[2].p0 ) - sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight; - stage_sum += classifier->alpha[sum >= t]; - } + CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; + CvHidHaarTreeNode* node = classifier->node; + double t = node->threshold*variance_norm_factor; + double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; + sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; + if( node->feature.rect[2].p0 ) + sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight; + stage_sum += classifier->alpha[sum >= t]; } - if( stage_sum < cascade->stage_classifier[i].threshold ) - return -i; } + if( stage_sum < cascade->stage_classifier[i].threshold ) + return -i; } + } } - else { for( i = start_stage; i < cascade->count; i++ ) { stage_sum = 0.0; int k = 0; - #ifdef CV_HAAR_USE_AVX + +#ifdef CV_HAAR_USE_AVX if(haveAVX) { - for( ; k < cascade->stage_classifier[i].count-8; k+=8 ) + for( ; k < cascade->stage_classifier[i].count - 8; k += 8 ) { stage_sum += icvEvalHidHaarClassifierAVX( - cascade->stage_classifier[i].classifier+k, + cascade->stage_classifier[i].classifier + k, variance_norm_factor, p_offset ); } } - #endif - for(; k < cascade->stage_classifier[i].count; k++ ) - { +#endif + for(; k < cascade->stage_classifier[i].count; k++ ) + { - stage_sum += icvEvalHidHaarClassifier( - cascade->stage_classifier[i].classifier + k, - variance_norm_factor, p_offset ); - } + stage_sum += icvEvalHidHaarClassifier( + cascade->stage_classifier[i].classifier + k, + variance_norm_factor, p_offset ); + } if( stage_sum < cascade->stage_classifier[i].threshold ) return -i; } } - //_mm256_zeroupper(); return 1; } @@ -1186,7 +1299,7 @@ struct HaarDetectObjects_ScaleImage_Invoker Size ssz(sum1.cols - 1 - winSize0.width, y2 - y1); int x, y, ystep = factor > 2 ? 1 : 2; - #ifdef HAVE_IPP +#ifdef HAVE_IPP if( cascade->hid_cascade->ipp_stages ) { IppiRect iequRect = {equRect.x, equRect.y, equRect.width, equRect.height}; @@ -1241,7 +1354,7 @@ struct HaarDetectObjects_ScaleImage_Invoker } } else -#endif +#endif // IPP for( y = y1; y < y2; y += ystep ) for( x = 0; x < ssz.width; x += ystep ) { @@ -1880,18 +1993,18 @@ cvReleaseHaarClassifierCascade( CvHaarClassifierCascade** _cascade ) #define ICV_HAAR_SIZE_NAME "size" #define ICV_HAAR_STAGES_NAME "stages" -#define ICV_HAAR_TREES_NAME "trees" -#define ICV_HAAR_FEATURE_NAME "feature" -#define ICV_HAAR_RECTS_NAME "rects" -#define ICV_HAAR_TILTED_NAME "tilted" -#define ICV_HAAR_THRESHOLD_NAME "threshold" -#define ICV_HAAR_LEFT_NODE_NAME "left_node" -#define ICV_HAAR_LEFT_VAL_NAME "left_val" -#define ICV_HAAR_RIGHT_NODE_NAME "right_node" -#define ICV_HAAR_RIGHT_VAL_NAME "right_val" -#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold" -#define ICV_HAAR_PARENT_NAME "parent" -#define ICV_HAAR_NEXT_NAME "next" +#define ICV_HAAR_TREES_NAME "trees" +#define ICV_HAAR_FEATURE_NAME "feature" +#define ICV_HAAR_RECTS_NAME "rects" +#define ICV_HAAR_TILTED_NAME "tilted" +#define ICV_HAAR_THRESHOLD_NAME "threshold" +#define ICV_HAAR_LEFT_NODE_NAME "left_node" +#define ICV_HAAR_LEFT_VAL_NAME "left_val" +#define ICV_HAAR_RIGHT_NODE_NAME "right_node" +#define ICV_HAAR_RIGHT_VAL_NAME "right_val" +#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold" +#define ICV_HAAR_PARENT_NAME "parent" +#define ICV_HAAR_NEXT_NAME "next" static int icvIsHaarClassifier( const void* struct_ptr ) @@ -2418,45 +2531,4 @@ CvType haar_type( CV_TYPE_NAME_HAAR, icvIsHaarClassifier, icvReadHaarClassifier, icvWriteHaarClassifier, icvCloneHaarClassifier ); -#if 0 -namespace cv -{ - -HaarClassifierCascade::HaarClassifierCascade() {} -HaarClassifierCascade::HaarClassifierCascade(const String& filename) -{ load(filename); } - -bool HaarClassifierCascade::load(const String& filename) -{ - cascade = Ptr((CvHaarClassifierCascade*)cvLoad(filename.c_str(), 0, 0, 0)); - return (CvHaarClassifierCascade*)cascade != 0; -} - -void HaarClassifierCascade::detectMultiScale( const Mat& image, - Vector& objects, double scaleFactor, - int minNeighbors, int flags, - Size minSize ) -{ - MemStorage storage(cvCreateMemStorage(0)); - CvMat _image = image; - CvSeq* _objects = cvHaarDetectObjects( &_image, cascade, storage, scaleFactor, - minNeighbors, flags, minSize ); - Seq(_objects).copyTo(objects); -} - -int HaarClassifierCascade::runAt(Point pt, int startStage, int) const -{ - return cvRunHaarClassifierCascade(cascade, pt, startStage); -} - -void HaarClassifierCascade::setImages( const Mat& sum, const Mat& sqsum, - const Mat& tilted, double scale ) -{ - CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted; - cvSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale ); -} - -} -#endif - /* End of file. */ diff --git a/samples/cpp/pca.cpp b/samples/cpp/pca.cpp index 7ab86eaf04..eb891be589 100644 --- a/samples/cpp/pca.cpp +++ b/samples/cpp/pca.cpp @@ -176,7 +176,7 @@ int main(int argc, char** argv) // display until user presses q imshow(winName, reconstruction); - char key = 0; + int key = 0; while(key != 'q') key = waitKey(); diff --git a/samples/cpp/simpleflow_demo.cpp b/samples/cpp/simpleflow_demo.cpp index a864ebc22e..c9eaba8efe 100644 --- a/samples/cpp/simpleflow_demo.cpp +++ b/samples/cpp/simpleflow_demo.cpp @@ -86,7 +86,7 @@ static void run(int argc, char** argv) { Mat flow; - float start = getTickCount(); + float start = (float)getTickCount(); calcOpticalFlowSF(frame1, frame2, flow, 3, 2, 4, 4.1, 25.5, 18, 55.0, 25.5, 0.35, 18, 55.0, 25.5, 10); @@ -156,7 +156,7 @@ static float calc_rmse(Mat flow1, Mat flow2) { } } } - return sqrt(sum / (1e-9 + counter)); + return (float)sqrt(sum / (1e-9 + counter)); } static void eval(int argc, char** argv) {