Merge pull request #15646 from alalek:fix_avx512_detection

pull/15651/head^2
Alexander Alekhin 5 years ago
commit 98fc098216
  1. 20
      cmake/OpenCVCompilerOptimizations.cmake
  2. 6
      cmake/checks/cpu_avx512clx.cpp
  3. 7
      cmake/checks/cpu_avx512icl.cpp
  4. 14
      modules/core/include/opencv2/core/cv_cpu_dispatch.h
  5. 36
      modules/core/include/opencv2/core/cv_cpu_helper.h
  6. 4
      modules/core/include/opencv2/core/cvdef.h
  7. 27
      modules/core/src/system.cpp

@ -5,13 +5,15 @@
# AVX / AVX2 / AVX_512F
# FMA3
#
# AVX512 details: https://en.wikipedia.org/wiki/AVX-512#CPUs_with_AVX-512
#
# CPU features groups:
# AVX512_COMMON (Common instructions AVX-512F/CD for all CPUs that support AVX-512)
# AVX512_KNL (Knights Landing with AVX-512F/CD/ER/PF)
# AVX512_KNM (Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ)
# AVX512_SKX (Skylake-X with AVX-512F/CD/BW/DQ/VL)
# AVX512_CNL (Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI)
# AVX512_CEL (Cascade Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI)
# AVX512_CLX (Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI)
# AVX512_ICL (Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ/VPCLMULQDQ*/GFNI*/VAES*)
# ppc64le arch:
@ -43,7 +45,7 @@
# CPU_{opt}_ENABLED_DEFAULT=ON/OFF - has compiler support without additional flag (CPU_BASELINE_DETECT=ON only)
set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F")
list(APPEND CPU_ALL_OPTIMIZATIONS "AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CEL;AVX512_ICL")
list(APPEND CPU_ALL_OPTIMIZATIONS "AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL")
list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16)
list(APPEND CPU_ALL_OPTIMIZATIONS MSA)
list(APPEND CPU_ALL_OPTIMIZATIONS VSX VSX3)
@ -163,15 +165,15 @@ elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ")
endif()
if(X86 OR X86_64)
ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX_512F;AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CEL;AVX512_ICL")
ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX_512F;AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL")
ocv_update(CPU_AVX512_COMMON_GROUP "AVX_512F;AVX_512CD")
ocv_update(CPU_AVX512_KNL_GROUP "AVX512_COMMON;AVX512_KNL_EXTRA")
ocv_update(CPU_AVX512_KNM_GROUP "AVX512_KNL;AVX512_KNM_EXTRA;AVX_512VPOPCNTDQ")
ocv_update(CPU_AVX512_SKX_GROUP "AVX512_COMMON;AVX_512VL;AVX_512BW;AVX_512DQ")
ocv_update(CPU_AVX512_CNL_GROUP "AVX512_SKX;AVX_512IFMA;AVX_512VBMI")
ocv_update(CPU_AVX512_CEL_GROUP "AVX512_CNL;AVX_512VNNI")
ocv_update(CPU_AVX512_ICL_GROUP "AVX512_CEL;AVX_512VBMI2;AVX_512BITALG;AVX_512VPOPCNTDQ") # ? VPCLMULQDQ, GFNI, VAES
ocv_update(CPU_AVX512_CLX_GROUP "AVX512_SKX;AVX_512VNNI")
ocv_update(CPU_AVX512_ICL_GROUP "AVX512_SKX;AVX_512IFMA;AVX_512VBMI;AVX_512VNNI;AVX_512VBMI2;AVX_512BITALG;AVX_512VPOPCNTDQ") # ? VPCLMULQDQ, GFNI, VAES
ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp")
ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp")
@ -189,12 +191,12 @@ if(X86 OR X86_64)
ocv_update(CPU_AVX512_KNM_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512knm.cpp")
ocv_update(CPU_AVX512_SKX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512skx.cpp")
ocv_update(CPU_AVX512_CNL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512cnl.cpp")
ocv_update(CPU_AVX512_CEL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512cel.cpp")
ocv_update(CPU_AVX512_CLX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512clx.cpp")
ocv_update(CPU_AVX512_ICL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512icl.cpp")
if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE)
ocv_update(CPU_AVX512_ICL_IMPLIES "AVX512_CEL")
ocv_update(CPU_AVX512_CEL_IMPLIES "AVX512_CNL")
ocv_update(CPU_AVX512_ICL_IMPLIES "AVX512_SKX")
ocv_update(CPU_AVX512_CLX_IMPLIES "AVX512_SKX")
ocv_update(CPU_AVX512_CNL_IMPLIES "AVX512_SKX")
ocv_update(CPU_AVX512_SKX_IMPLIES "AVX512_COMMON")
ocv_update(CPU_AVX512_KNM_IMPLIES "AVX512_KNL")
@ -251,7 +253,7 @@ if(X86 OR X86_64)
ocv_intel_compiler_optimization_option(AVX512_KNM "-xKNM" "/Qx:KNM")
ocv_intel_compiler_optimization_option(AVX512_SKX "-xSKYLAKE-AVX512" "/Qx:SKYLAKE-AVX512")
ocv_intel_compiler_optimization_option(AVX512_CNL "-xCANNONLAKE" "/Qx:CANNONLAKE")
ocv_intel_compiler_optimization_option(AVX512_CEL "-xCASCADELAKE" "/Qx:CASCADELAKE")
ocv_intel_compiler_optimization_option(AVX512_CLX "-xCASCADELAKE" "/Qx:CASCADELAKE")
ocv_intel_compiler_optimization_option(AVX512_ICL "-xICELAKE-CLIENT" "/Qx:ICELAKE-CLIENT")
elseif(CV_GCC OR CV_CLANG)
ocv_update(CPU_AVX2_FLAGS_ON "-mavx2")

@ -3,9 +3,9 @@
void test()
{
__m512i a, b, c;
a = _mm512_dpwssd_epi32(a, b, c);
a = _mm512_dpwssd_epi32(a, b, c); // VNNI
}
#else
#error "AVX512-CEL is not supported"
#error "AVX512-CLX is not supported"
#endif
int main() { return 0; }
int main() { return 0; }

@ -3,9 +3,10 @@
void test()
{
__m512i a, b, c;
a = _mm512_popcnt_epi8(a);
a = _mm512_shrdv_epi64(a, b, c);
a = _mm512_popcnt_epi64(a);
a = _mm512_popcnt_epi8(a); // BITALG
a = _mm512_shrdv_epi64(a, b, c); // VBMI2
a = _mm512_popcnt_epi64(a); // VPOPCNTDQ
a = _mm512_dpwssd_epi32(a, b, c); // VNNI
}
#else
#error "AVX512-ICL is not supported"

@ -113,12 +113,18 @@
# define CV_AVX_512IFMA 1
# define CV_AVX_512VBMI 1
#endif
#ifdef CV_CPU_COMPILE_AVX512_CEL
# define CV_AVX512_CEL 1
#ifdef CV_CPU_COMPILE_AVX512_CLX
# define CV_AVX512_CLX 1
# define CV_AVX_512VNNI 1
#endif
#ifdef CV_CPU_COMPILE_AVX512_ICL
# define CV_AVX512_ICL 1
# undef CV_AVX_512IFMA
# define CV_AVX_512IFMA 1
# undef CV_AVX_512VBMI
# define CV_AVX_512VBMI 1
# undef CV_AVX_512VNNI
# define CV_AVX_512VNNI 1
# define CV_AVX_512VBMI2 1
# define CV_AVX_512BITALG 1
# define CV_AVX_512VPOPCNTDQ 1
@ -311,8 +317,8 @@ struct VZeroUpperGuard {
#ifndef CV_AVX512_CNL
# define CV_AVX512_CNL 0
#endif
#ifndef CV_AVX512_CEL
# define CV_AVX512_CEL 0
#ifndef CV_AVX512_CLX
# define CV_AVX512_CLX 0
#endif
#ifndef CV_AVX512_ICL
# define CV_AVX512_ICL 0

@ -357,26 +357,26 @@
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX512_CNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CEL
# define CV_TRY_AVX512_CEL 1
# define CV_CPU_FORCE_AVX512_CEL 1
# define CV_CPU_HAS_SUPPORT_AVX512_CEL 1
# define CV_CPU_CALL_AVX512_CEL(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_AVX512_CEL_(fn, args) return (opt_AVX512_CEL::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CEL
# define CV_TRY_AVX512_CEL 1
# define CV_CPU_FORCE_AVX512_CEL 0
# define CV_CPU_HAS_SUPPORT_AVX512_CEL (cv::checkHardwareSupport(CV_CPU_AVX512_CEL))
# define CV_CPU_CALL_AVX512_CEL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CEL) return (opt_AVX512_CEL::fn args)
# define CV_CPU_CALL_AVX512_CEL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CEL) return (opt_AVX512_CEL::fn args)
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CLX
# define CV_TRY_AVX512_CLX 1
# define CV_CPU_FORCE_AVX512_CLX 1
# define CV_CPU_HAS_SUPPORT_AVX512_CLX 1
# define CV_CPU_CALL_AVX512_CLX(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_AVX512_CLX_(fn, args) return (opt_AVX512_CLX::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CLX
# define CV_TRY_AVX512_CLX 1
# define CV_CPU_FORCE_AVX512_CLX 0
# define CV_CPU_HAS_SUPPORT_AVX512_CLX (cv::checkHardwareSupport(CV_CPU_AVX512_CLX))
# define CV_CPU_CALL_AVX512_CLX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args)
# define CV_CPU_CALL_AVX512_CLX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args)
#else
# define CV_TRY_AVX512_CEL 0
# define CV_CPU_FORCE_AVX512_CEL 0
# define CV_CPU_HAS_SUPPORT_AVX512_CEL 0
# define CV_CPU_CALL_AVX512_CEL(fn, args)
# define CV_CPU_CALL_AVX512_CEL_(fn, args)
# define CV_TRY_AVX512_CLX 0
# define CV_CPU_FORCE_AVX512_CLX 0
# define CV_CPU_HAS_SUPPORT_AVX512_CLX 0
# define CV_CPU_CALL_AVX512_CLX(fn, args)
# define CV_CPU_CALL_AVX512_CLX_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX512_CEL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CEL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#define __CV_CPU_DISPATCH_CHAIN_AVX512_CLX(fn, args, mode, ...) CV_CPU_CALL_AVX512_CLX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_ICL
# define CV_TRY_AVX512_ICL 1

@ -269,7 +269,7 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
#define CV_CPU_AVX512_KNL 258
#define CV_CPU_AVX512_KNM 259
#define CV_CPU_AVX512_CNL 260
#define CV_CPU_AVX512_CEL 261
#define CV_CPU_AVX512_CLX 261
#define CV_CPU_AVX512_ICL 262
// when adding to this list remember to update the following enum
@ -320,7 +320,7 @@ enum CpuFeatures {
CPU_AVX512_KNL = 258, //!< Knights Landing with AVX-512F/CD/ER/PF
CPU_AVX512_KNM = 259, //!< Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ
CPU_AVX512_CNL = 260, //!< Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI
CPU_AVX512_CEL = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI
CPU_AVX512_CLX = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI
CPU_AVX512_ICL = 262, //!< Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ
CPU_MAX_FEATURE = 512 // see CV_HARDWARE_MAX_FEATURE

@ -370,11 +370,12 @@ struct HWFeatures
g_hwFeatureNames[CPU_MSA] = "CPU_MSA";
g_hwFeatureNames[CPU_AVX512_COMMON] = "AVX512-COMMON";
g_hwFeatureNames[CPU_AVX512_SKX] = "AVX512-SKX";
g_hwFeatureNames[CPU_AVX512_KNL] = "AVX512-KNL";
g_hwFeatureNames[CPU_AVX512_KNM] = "AVX512-KNM";
g_hwFeatureNames[CPU_AVX512_CNL] = "AVX512-CNL";
g_hwFeatureNames[CPU_AVX512_CEL] = "AVX512-CEL";
g_hwFeatureNames[CPU_AVX512_CLX] = "AVX512-CLX";
g_hwFeatureNames[CPU_AVX512_ICL] = "AVX512-ICL";
}
@ -485,9 +486,11 @@ struct HWFeatures
have[CV_CPU_AVX_5124VNNIW] && have[CV_CPU_AVX_512VPOPCNTDQ];
have[CV_CPU_AVX512_SKX] = have[CV_CPU_AVX_512BW] && have[CV_CPU_AVX_512DQ] && have[CV_CPU_AVX_512VL];
have[CV_CPU_AVX512_CNL] = have[CV_CPU_AVX512_SKX] && have[CV_CPU_AVX_512IFMA] && have[CV_CPU_AVX_512VBMI];
have[CV_CPU_AVX512_CEL] = have[CV_CPU_AVX512_CNL] && have[CV_CPU_AVX_512VNNI];
have[CV_CPU_AVX512_ICL] = have[CV_CPU_AVX512_CEL] && have[CV_CPU_AVX_512VBMI2] &&
have[CV_CPU_AVX_512BITALG] && have[CV_CPU_AVX_512VPOPCNTDQ];
have[CV_CPU_AVX512_CLX] = have[CV_CPU_AVX512_SKX] && have[CV_CPU_AVX_512VNNI];
have[CV_CPU_AVX512_ICL] = have[CV_CPU_AVX512_SKX] &&
have[CV_CPU_AVX_512IFMA] && have[CV_CPU_AVX_512VBMI] &&
have[CV_CPU_AVX_512VNNI] &&
have[CV_CPU_AVX_512VBMI2] && have[CV_CPU_AVX_512BITALG] && have[CV_CPU_AVX_512VPOPCNTDQ];
}
else
{
@ -495,7 +498,7 @@ struct HWFeatures
have[CV_CPU_AVX512_KNM] = false;
have[CV_CPU_AVX512_SKX] = false;
have[CV_CPU_AVX512_CNL] = false;
have[CV_CPU_AVX512_CEL] = false;
have[CV_CPU_AVX512_CLX] = false;
have[CV_CPU_AVX512_ICL] = false;
}
}
@ -572,8 +575,16 @@ struct HWFeatures
have[CV_CPU_VSX3] = (CV_VSX3);
#endif
bool skip_baseline_check = false;
#ifndef NO_GETENV
if (getenv("OPENCV_SKIP_CPU_BASELINE_CHECK"))
{
skip_baseline_check = true;
}
#endif
int baseline_features[] = { CV_CPU_BASELINE_FEATURES };
if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0])))
if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]))
&& !skip_baseline_check)
{
fprintf(stderr, "\n"
"******************************************************************\n"
@ -600,12 +611,12 @@ struct HWFeatures
{
if (have[feature])
{
if (dump) fprintf(stderr, "%s - OK\n", getHWFeatureNameSafe(feature));
if (dump) fprintf(stderr, " ID=%3d (%s) - OK\n", feature, getHWFeatureNameSafe(feature));
}
else
{
result = false;
if (dump) fprintf(stderr, "%s - NOT AVAILABLE\n", getHWFeatureNameSafe(feature));
if (dump) fprintf(stderr, " ID=%3d (%s) - NOT AVAILABLE\n", feature, getHWFeatureNameSafe(feature));
}
}
}

Loading…
Cancel
Save