From bc394e7516194be88816cbc8cf7603d394f84433 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 12 Jan 2015 10:59:30 +0300
Subject: [PATCH] detection of other CPU features

---
 CMakeLists.txt                                |  2 +
 cmake/OpenCVCompilerOptions.cmake             | 11 ++-
 modules/core/include/opencv2/core/cvdef.h     | 91 ++++++++++++++++---
 modules/core/include/opencv2/core/utility.hpp | 34 +++++--
 modules/core/src/system.cpp                   | 10 ++
 modules/ts/src/ts_func.cpp                    | 36 ++++++++
 6 files changed, 157 insertions(+), 27 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7b5648efd4..d9bb04081c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -214,12 +214,14 @@ OCV_OPTION(ENABLE_COVERAGE            "Enable coverage collection with  GCov"
 OCV_OPTION(ENABLE_OMIT_FRAME_POINTER  "Enable -fomit-frame-pointer for GCC"                      ON   IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) )
 OCV_OPTION(ENABLE_POWERPC             "Enable PowerPC for GCC"                                   ON   IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
 OCV_OPTION(ENABLE_FAST_MATH           "Enable -ffast-math (not recommended for GCC 4.6.x)"       OFF  IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
+OCV_OPTION(ENABLE_POPCNT              "Enable POPCNT instructions"                               ON   IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_SSE                 "Enable SSE instructions"                                  ON   IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_SSE2                "Enable SSE2 instructions"                                 ON   IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_SSE3                "Enable SSE3 instructions"                                 ON   IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_SSSE3               "Enable SSSE3 instructions"                                OFF  IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_SSE41               "Enable SSE4.1 instructions"                               OFF  IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_SSE42               "Enable SSE4.2 instructions"                               OFF  IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
+OCV_OPTION(ENABLE_FMA3                "Enable FMA3 instructions"                                 ON   IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_AVX                 "Enable AVX instructions"                                  OFF  IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_AVX2                "Enable AVX2 instructions"                                 OFF  IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_NEON                "Enable NEON instructions"                                 OFF  IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR IOS) )
diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
index 831026fb50..3d1155c872 100644
--- a/cmake/OpenCVCompilerOptions.cmake
+++ b/cmake/OpenCVCompilerOptions.cmake
@@ -122,16 +122,19 @@ if(CMAKE_COMPILER_IS_GNUCXX)
   if(ENABLE_POWERPC)
     add_extra_compiler_option("-mcpu=G3 -mtune=G5")
   endif()
+  if(ENABLE_POPCNT)
+    add_extra_compiler_option(-mpopcnt)
+  endif()
   if(ENABLE_SSE)
     add_extra_compiler_option(-msse)
   endif()
   if(ENABLE_SSE2)
     add_extra_compiler_option(-msse2)
   endif()
-  if (ENABLE_NEON)
+  if(ENABLE_NEON)
     add_extra_compiler_option("-mfpu=neon")
   endif()
-  if (ENABLE_VFPV3 AND NOT ENABLE_NEON)
+  if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
     add_extra_compiler_option("-mfpu=vfpv3")
   endif()
 
@@ -162,6 +165,10 @@ if(CMAKE_COMPILER_IS_GNUCXX)
         add_extra_compiler_option(-msse4.2)
       endif()
     endif()
+
+    if(ENABLE_FMA3)
+      add_extra_compiler_option(-mfma)
+    endif()
   endif(NOT MINGW)
 
   if(X86 OR X86_64)
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
index c52cb021cb..45146a39c2 100644
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -104,18 +104,32 @@
 #endif
 
 /* CPU features and intrinsics support */
-#define CV_CPU_NONE    0
-#define CV_CPU_MMX     1
-#define CV_CPU_SSE     2
-#define CV_CPU_SSE2    3
-#define CV_CPU_SSE3    4
-#define CV_CPU_SSSE3   5
-#define CV_CPU_SSE4_1  6
-#define CV_CPU_SSE4_2  7
-#define CV_CPU_POPCNT  8
-#define CV_CPU_AVX    10
-#define CV_CPU_AVX2   11
-#define CV_CPU_NEON   12
+#define CV_CPU_NONE             0
+#define CV_CPU_MMX              1
+#define CV_CPU_SSE              2
+#define CV_CPU_SSE2             3
+#define CV_CPU_SSE3             4
+#define CV_CPU_SSSE3            5
+#define CV_CPU_SSE4_1           6
+#define CV_CPU_SSE4_2           7
+#define CV_CPU_POPCNT           8
+
+#define CV_CPU_AVX              10
+#define CV_CPU_AVX2             11
+#define CV_CPU_FMA3             12
+
+#define CV_CPU_AVX_512F         13
+#define CV_CPU_AVX_512BW        14
+#define CV_CPU_AVX_512CD        15
+#define CV_CPU_AVX_512DQ        16
+#define CV_CPU_AVX_512ER        17
+#define CV_CPU_AVX_512IFMA512   18
+#define CV_CPU_AVX_512PF        19
+#define CV_CPU_AVX_512VBMI      20
+#define CV_CPU_AVX_512VL        21
+
+#define CV_CPU_NEON   100
+
 // when adding to this list remember to update the enum in core/utility.cpp
 #define CV_HARDWARE_MAX_FEATURE 255
 
@@ -124,6 +138,7 @@
 
 #if defined __SSE2__ || defined _M_X64  || (defined _M_IX86_FP && _M_IX86_FP >= 2)
 #  include <emmintrin.h>
+#  define CV_MMX
 #  define CV_SSE 1
 #  define CV_SSE2 1
 #  if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
@@ -142,6 +157,14 @@
 #    include <nmmintrin.h>
 #    define CV_SSE4_2 1
 #  endif
+#  if defined __FMA__ || (defined _MSC_VER && _MSC_VER >= 1500)
+#    include <immintrin.h>
+#    define CV_FMA3 1
+#  endif
+#  if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
+#    include <popcntintrin.h>
+#    define CV_POPCNT 1
+#  endif
 #  if defined __AVX__ || defined __AVX2__ || (defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219)
 // MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
 // See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
@@ -151,11 +174,12 @@
 #      define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
 #    else
 #      define __xgetbv() 0
-#    ifdef __AVX2__
-#      define CV_AVX2 1
-#    endif
 #    endif
 #  endif
+#  if defined __AVX2__ || (defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219)
+#    include <immintrin.h>
+#    define CV_AVX2 1
+#  endif
 #endif
 
 #if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
@@ -170,6 +194,9 @@
 
 #endif // __CUDACC__
 
+#ifndef CV_MMX
+#  define CV_MMX 0
+#endif
 #ifndef CV_SSE
 #  define CV_SSE 0
 #endif
@@ -194,6 +221,40 @@
 #ifndef CV_AVX2
 #  define CV_AVX2 0
 #endif
+#ifndef CV_POPCNT
+#define CV_POPCNT 0
+#endif
+#ifndef CV_FMA3
+#  define CV_FMA3 0
+#endif
+#ifndef CV_AVX_512F
+#  define CV_AVX_512F 0
+#endif
+#ifndef CV_AVX_512BW
+#  define CV_AVX_512BW 0
+#endif
+#ifndef CV_AVX_512CD
+#  define CV_AVX_512CD 0
+#endif
+#ifndef CV_AVX_512DQ
+#  define CV_AVX_512DQ 0
+#endif
+#ifndef CV_AVX_512ER
+#  define CV_AVX_512ER 0
+#endif
+#ifndef CV_AVX_512IFMA512
+#  define CV_AVX_512IFMA512 0
+#endif
+#ifndef CV_AVX_512PF
+#  define CV_AVX_512PF 0
+#endif
+#ifndef CV_AVX_512VBMI
+#  define CV_AVX_512VBMI 0
+#endif
+#ifndef CV_AVX_512VL
+#  define CV_AVX_512VL 0
+#endif
+
 #ifndef CV_NEON
 #  define CV_NEON 0
 #endif
diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp
index 88989ef5cb..fb8ccd88d7 100644
--- a/modules/core/include/opencv2/core/utility.hpp
+++ b/modules/core/include/opencv2/core/utility.hpp
@@ -281,16 +281,30 @@ CV_EXPORTS_W int64 getCPUTickCount();
 remember to keep this list identical to the one in cvdef.h
 */
 enum CpuFeatures {
-    CPU_MMX       = 1,
-    CPU_SSE       = 2,
-    CPU_SSE2      = 3,
-    CPU_SSE3      = 4,
-    CPU_SSSE3     = 5,
-    CPU_SSE4_1    = 6,
-    CPU_SSE4_2    = 7,
-    CPU_POPCNT    = 8,
-    CPU_AVX       = 10,
-    CPU_NEON      = 11
+    CPU_MMX             = 1,
+    CPU_SSE             = 2,
+    CPU_SSE2            = 3,
+    CPU_SSE3            = 4,
+    CPU_SSSE3           = 5,
+    CPU_SSE4_1          = 6,
+    CPU_SSE4_2          = 7,
+    CPU_POPCNT          = 8,
+
+    CPU_AVX             = 10,
+    CPU_AVX2            = 11,
+    CPU_FMA3            = 12,
+
+    CPU_AVX_512F        = 13,
+    CPU_AVX_512BW       = 14,
+    CPU_AVX_512CD       = 15,
+    CPU_AVX_512DQ       = 16,
+    CPU_AVX_512ER       = 17,
+    CPU_AVX_512IFMA512  = 18,
+    CPU_AVX_512PF       = 19,
+    CPU_AVX_512VBMI     = 20,
+    CPU_AVX_512VL       = 21,
+
+    CPU_NEON            = 100
 };
 
 /** @brief Returns true if the specified feature is supported by the host hardware.
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index 11bbab3a25..a7a6e98d4d 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -263,6 +263,7 @@ struct HWFeatures
             f.have[CV_CPU_SSE2]   = (cpuid_data[3] & (1<<26)) != 0;
             f.have[CV_CPU_SSE3]   = (cpuid_data[2] & (1<<0)) != 0;
             f.have[CV_CPU_SSSE3]  = (cpuid_data[2] & (1<<9)) != 0;
+            f.have[CV_CPU_FMA3]  = (cpuid_data[2] & (1<<12)) != 0;
             f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
             f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
             f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
@@ -301,6 +302,15 @@ struct HWFeatures
         #endif
             f.have[CV_CPU_AVX2]   = (cpuid_data[1] & (1<<5)) != 0;
 
+            f.have[CV_CPU_AVX_512F]       = (cpuid_data[1] & (1<<16)) != 0;
+            f.have[CV_CPU_AVX_512DQ]      = (cpuid_data[1] & (1<<17)) != 0;
+            f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0;
+            f.have[CV_CPU_AVX_512PF]      = (cpuid_data[1] & (1<<26)) != 0;
+            f.have[CV_CPU_AVX_512ER]      = (cpuid_data[1] & (1<<27)) != 0;
+            f.have[CV_CPU_AVX_512CD]      = (cpuid_data[1] & (1<<28)) != 0;
+            f.have[CV_CPU_AVX_512BW]      = (cpuid_data[1] & (1<<30)) != 0;
+            f.have[CV_CPU_AVX_512VL]      = (cpuid_data[1] & (1<<31)) != 0;
+            f.have[CV_CPU_AVX_512VBMI]    = (cpuid_data[2] &  (1<<1)) != 0;
         }
 
         return f;
diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp
index 53b62e74d7..84a9233dd2 100644
--- a/modules/ts/src/ts_func.cpp
+++ b/modules/ts/src/ts_func.cpp
@@ -2998,6 +2998,12 @@ void printVersionInfo(bool useStdOut)
 
     std::string cpu_features;
 
+#if CV_MMX
+    if (checkHardwareSupport(CV_CPU_MMX)) cpu_features += " mmx";
+#endif
+#if CV_POPCNT
+    if (checkHardwareSupport(CV_CPU_POPCNT)) cpu_features += " popcnt";
+#endif
 #if CV_SSE
     if (checkHardwareSupport(CV_CPU_SSE)) cpu_features += " sse";
 #endif
@@ -3022,6 +3028,36 @@ void printVersionInfo(bool useStdOut)
 #if CV_AVX2
     if (checkHardwareSupport(CV_CPU_AVX2)) cpu_features += " avx2";
 #endif
+#if CV_FMA3
+    if (checkHardwareSupport(CV_CPU_FMA3)) cpu_features += " fma3";
+#endif
+#if CV_AVX_512F
+    if (checkHardwareSupport(CV_CPU_AVX_512F) cpu_features += " avx-512f";
+#endif
+#if CV_AVX_512BW
+    if (checkHardwareSupport(CV_CPU_AVX_512BW) cpu_features += " avx-512bw";
+#endif
+#if CV_AVX_512CD
+    if (checkHardwareSupport(CV_CPU_AVX_512CD) cpu_features += " avx-512cd";
+#endif
+#if CV_AVX_512DQ
+    if (checkHardwareSupport(CV_CPU_AVX_512DQ) cpu_features += " avx-512dq";
+#endif
+#if CV_AVX_512ER
+    if (checkHardwareSupport(CV_CPU_AVX_512ER) cpu_features += " avx-512er";
+#endif
+#if CV_AVX_512IFMA512
+    if (checkHardwareSupport(CV_CPU_AVX_512IFMA512) cpu_features += " avx-512ifma512";
+#endif
+#if CV_AVX_512PF
+    if (checkHardwareSupport(CV_CPU_AVX_512PF) cpu_features += " avx-512pf";
+#endif
+#if CV_AVX_512VBMI
+    if (checkHardwareSupport(CV_CPU_AVX_512VBMI) cpu_features += " avx-512vbmi";
+#endif
+#if CV_AVX_512VL
+    if (checkHardwareSupport(CV_CPU_AVX_512VL) cpu_features += " avx-512vl";
+#endif
 #if CV_NEON
     cpu_features += " neon"; // NEON is currently not checked at runtime
 #endif