Fix AVX and other SIMD support detection

pull/32/head
Andrey Kamaev 12 years ago
parent cd05d9aaad
commit f32eb05ea1
  1. 13
      CMakeLists.txt
  2. 49
      cmake/OpenCVCompilerOptions.cmake
  3. 4
      cmake/OpenCVDetectCXXCompiler.cmake
  4. 59
      modules/core/include/opencv2/core/internal.hpp

@ -159,7 +159,6 @@ OCV_OPTION(BUILD_FAT_JAVA_LIB "Create fat java wrapper containing the whol
OCV_OPTION(BUILD_ANDROID_SERVICE "Build OpenCV Manager for Google Play" OFF IF ANDROID AND ANDROID_USE_STLPORT AND ANDROID_SOURCE_TREE )
OCV_OPTION(BUILD_ANDROID_PACKAGE "Build platform-specific package for Google Play" OFF IF ANDROID )
# 3rd party libs
OCV_OPTION(BUILD_ZLIB "Build zlib from source" WIN32 OR APPLE )
OCV_OPTION(BUILD_TIFF "Build libtiff from source" WIN32 OR ANDROID OR APPLE )
@ -168,6 +167,7 @@ OCV_OPTION(BUILD_JPEG "Build libjpeg from source" WIN32 OR AN
OCV_OPTION(BUILD_PNG "Build libpng from source" WIN32 OR ANDROID OR APPLE )
OCV_OPTION(BUILD_OPENEXR "Build openexr from source" WIN32 OR ANDROID OR APPLE )
# OpenCV installation options
# ===================================================
OCV_OPTION(INSTALL_C_EXAMPLES "Install C examples" OFF )
@ -176,16 +176,10 @@ OCV_OPTION(INSTALL_ANDROID_EXAMPLES "Install Android examples" OFF IF ANDROID
OCV_OPTION(INSTALL_TO_MANGLED_PATHS "Enables mangled install paths, that help with side by side installs." OFF IF (UNIX AND NOT ANDROID AND NOT IOS AND BUILD_SHARED_LIBS) )
if(MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode)
set(ENABLE_SOLUTION_FOLDERS0 ON)
else()
set(ENABLE_SOLUTION_FOLDERS0 OFF)
endif()
# OpenCV build options
# ===================================================
OCV_OPTION(ENABLE_PRECOMPILED_HEADERS "Use precompiled headers" ON IF (NOT IOS) )
OCV_OPTION(ENABLE_SOLUTION_FOLDERS "Solution folder in Visual Studio or in other IDEs" ${ENABLE_SOLUTION_FOLDERS0} IF (CMAKE_VERSION VERSION_GREATER "2.8.0") )
OCV_OPTION(ENABLE_SOLUTION_FOLDERS "Solution folder in Visual Studio or in other IDEs" (MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode) IF (CMAKE_VERSION VERSION_GREATER "2.8.0") )
OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add flags: -g -pg)" OFF IF CMAKE_COMPILER_IS_GNUCXX )
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX )
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
@ -196,6 +190,7 @@ OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions"
OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF )
OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF )
@ -762,7 +757,7 @@ status(" Use Clp:" HAVE_CLP THEN YES ELSE NO)
if(HAVE_CUDA)
status("")
status(" NVIDIA CUDA:" "(ver ${CUDA_VERSION_STRING})")
status(" NVIDIA CUDA")
status(" Use CUFFT:" HAVE_CUFFT THEN YES ELSE NO)
status(" Use CUBLAS:" HAVE_CUBLAS THEN YES ELSE NO)

@ -124,35 +124,33 @@ if(CMAKE_COMPILER_IS_GNUCXX)
# SSE3 and further should be disabled under MingW because it generates compiler errors
if(NOT MINGW)
if(ENABLE_SSE3)
add_extra_compiler_option(-msse3)
if(ENABLE_AVX)
add_extra_compiler_option(-mavx)
endif()
if(${CMAKE_OPENCV_GCC_VERSION_NUM} GREATER 402)
set(HAVE_GCC43_OR_NEWER 1)
endif()
if(${CMAKE_OPENCV_GCC_VERSION_NUM} GREATER 401)
set(HAVE_GCC42_OR_NEWER 1)
# GCC depresses SSEx instructions when -mavx is used. Instead, it generates new AVX instructions or AVX equivalence for all SSEx instructions when needed.
if(NOT OPENCV_EXTRA_CXX_FLAGS MATCHES "-mavx")
if(ENABLE_SSE3)
add_extra_compiler_option(-msse3)
endif()
if(HAVE_GCC42_OR_NEWER OR APPLE)
if(ENABLE_SSSE3)
add_extra_compiler_option(-mssse3)
endif()
if(HAVE_GCC43_OR_NEWER OR APPLE)
if(ENABLE_SSE41)
add_extra_compiler_option(-msse4.1)
endif()
if(ENABLE_SSE42)
add_extra_compiler_option(-msse4.2)
endif()
endif()
endif()
endif(NOT MINGW)
if(X86 OR X86_64)
if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4)
if(ENABLE_SSE2)
if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)")
add_extra_compiler_option(-mfpmath=sse)# !! important - be on the same wave with x64 compilers
else()
add_extra_compiler_option(-mfpmath=387)
@ -201,30 +199,35 @@ if(MSVC)
set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi")
endif()
if(NOT MSVC64)
# 64-bit MSVC compiler uses SSE/SSE2 by default
if(ENABLE_SSE)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE")
endif()
if(ENABLE_SSE2)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE2")
if(ENABLE_AVX AND NOT MSVC_VERSION LESS 1600)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX")
endif()
if(ENABLE_SSE4_1 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE4.1")
endif()
if(ENABLE_SSE3)
if(ENABLE_SSE3 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE3")
endif()
if(ENABLE_SSE4_1)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE4.1")
if(NOT MSVC64)
# 64-bit MSVC compiler uses SSE/SSE2 by default
if(ENABLE_SSE2 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE2")
endif()
if(ENABLE_SSE AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE")
endif()
endif()
if(ENABLE_SSE OR ENABLE_SSE2 OR ENABLE_SSE3 OR ENABLE_SSE4_1)
if(ENABLE_SSE OR ENABLE_SSE2 OR ENABLE_SSE3 OR ENABLE_SSE4_1 OR ENABLE_AVX)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi")
endif()
if(X86 OR X86_64)
if(CMAKE_SIZEOF_VOID_P EQUAL 4 AND ENABLE_SSE2)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast")# !! important - be on the same wave with x64 compilers
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers
endif()
endif()
endif()

@ -89,8 +89,8 @@ if(CMAKE_COMPILER_IS_GNUCXX)
endif()
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES amd64.*|x86_64.* OR CMAKE_GENERATOR MATCHES "Visual Studio.*Win64")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*" OR CMAKE_GENERATOR MATCHES "Visual Studio.*Win64")
set(X86_64 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES i686.*|i386.*|x86.*)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
set(X86 1)
endif()

@ -104,35 +104,33 @@ CV_INLINE IppiSize ippiSize(int width, int height)
}
#endif
#if defined __SSE2__ || (defined _MSC_VER && _MSC_VER >= 1300)
#ifndef IPPI_CALL
# define IPPI_CALL(func) CV_Assert((func) >= 0)
#endif
#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
# include "emmintrin.h"
# define CV_SSE 1
# define CV_SSE2 1
# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include "pmmintrin.h"
# define CV_SSE3 1
# else
# define CV_SSE3 0
# endif
# if defined __SSSE3__
# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include "tmmintrin.h"
# define CV_SSSE3 1
# else
# define CV_SSSE3 0
# endif
# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include <smmintrin.h>
# define CV_SSE4_1 1
# else
# define CV_SSE4_1 0
# endif
# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include <nmmintrin.h>
# define CV_SSE4_2 1
# else
# define CV_SSE4_2 0
# endif
# if defined __AVX__ || (defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219)
// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
# include <immintrin.h>
# define CV_AVX 1
# if defined(_XCR_XFEATURE_ENABLED_MASK)
@ -140,33 +138,38 @@ CV_INLINE IppiSize ippiSize(int width, int height)
# else
# define __xgetbv() 0
# endif
# else
# define CV_AVX 0
# endif
# else
#endif
#ifdef __ARM_NEON__
# include <arm_neon.h>
# define CV_NEON 1
# define CPU_HAS_NEON_FEATURE (true)
#endif
#ifndef CV_SSE
# define CV_SSE 0
#endif
#ifndef CV_SSE2
# define CV_SSE2 0
#endif
#ifndef CV_SSE3
# define CV_SSE3 0
#endif
#ifndef CV_SSSE3
# define CV_SSSE3 0
#endif
#ifndef CV_SSE4_1
# define CV_SSE4_1 0
#endif
#ifndef CV_SSE4_2
# define CV_SSE4_2 0
#endif
#ifndef CV_AVX
# define CV_AVX 0
# endif
#if defined ANDROID && defined __ARM_NEON__
# include "arm_neon.h"
# define CV_NEON 1
# define CPU_HAS_NEON_FEATURE (true)
//TODO: make real check using stuff from "cpu-features.h"
//((bool)android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON)
#else
# define CV_NEON 0
# define CPU_HAS_NEON_FEATURE (false)
#endif
#ifndef IPPI_CALL
# define IPPI_CALL(func) CV_Assert((func) >= 0)
#ifndef CV_NEON
# define CV_NEON 0
#endif
#ifdef HAVE_TBB

Loading…
Cancel
Save