diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index 28867f8608..6389b19894 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -5,13 +5,15 @@ # AVX / AVX2 / AVX_512F # FMA3 # +# AVX512 details: https://en.wikipedia.org/wiki/AVX-512#CPUs_with_AVX-512 +# # CPU features groups: # AVX512_COMMON (Common instructions AVX-512F/CD for all CPUs that support AVX-512) # AVX512_KNL (Knights Landing with AVX-512F/CD/ER/PF) # AVX512_KNM (Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ) # AVX512_SKX (Skylake-X with AVX-512F/CD/BW/DQ/VL) # AVX512_CNL (Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI) -# AVX512_CEL (Cascade Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI) +# AVX512_CLX (Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI) # AVX512_ICL (Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ/VPCLMULQDQ*/GFNI*/VAES*) # ppc64le arch: @@ -43,7 +45,7 @@ # CPU_{opt}_ENABLED_DEFAULT=ON/OFF - has compiler support without additional flag (CPU_BASELINE_DETECT=ON only) set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F") -list(APPEND CPU_ALL_OPTIMIZATIONS "AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CEL;AVX512_ICL") +list(APPEND CPU_ALL_OPTIMIZATIONS "AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL") list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16) list(APPEND CPU_ALL_OPTIMIZATIONS MSA) list(APPEND CPU_ALL_OPTIMIZATIONS VSX VSX3) @@ -163,15 +165,15 @@ elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ") endif() if(X86 OR X86_64) - ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX_512F;AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CEL;AVX512_ICL") + ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX_512F;AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL") ocv_update(CPU_AVX512_COMMON_GROUP "AVX_512F;AVX_512CD") ocv_update(CPU_AVX512_KNL_GROUP "AVX512_COMMON;AVX512_KNL_EXTRA") ocv_update(CPU_AVX512_KNM_GROUP "AVX512_KNL;AVX512_KNM_EXTRA;AVX_512VPOPCNTDQ") ocv_update(CPU_AVX512_SKX_GROUP "AVX512_COMMON;AVX_512VL;AVX_512BW;AVX_512DQ") ocv_update(CPU_AVX512_CNL_GROUP "AVX512_SKX;AVX_512IFMA;AVX_512VBMI") - ocv_update(CPU_AVX512_CEL_GROUP "AVX512_CNL;AVX_512VNNI") - ocv_update(CPU_AVX512_ICL_GROUP "AVX512_CEL;AVX_512VBMI2;AVX_512BITALG;AVX_512VPOPCNTDQ") # ? VPCLMULQDQ, GFNI, VAES + ocv_update(CPU_AVX512_CLX_GROUP "AVX512_SKX;AVX_512VNNI") + ocv_update(CPU_AVX512_ICL_GROUP "AVX512_SKX;AVX_512IFMA;AVX_512VBMI;AVX_512VNNI;AVX_512VBMI2;AVX_512BITALG;AVX_512VPOPCNTDQ") # ? VPCLMULQDQ, GFNI, VAES ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp") ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp") @@ -189,12 +191,12 @@ if(X86 OR X86_64) ocv_update(CPU_AVX512_KNM_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512knm.cpp") ocv_update(CPU_AVX512_SKX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512skx.cpp") ocv_update(CPU_AVX512_CNL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512cnl.cpp") - ocv_update(CPU_AVX512_CEL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512cel.cpp") + ocv_update(CPU_AVX512_CLX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512clx.cpp") ocv_update(CPU_AVX512_ICL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512icl.cpp") if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE) - ocv_update(CPU_AVX512_ICL_IMPLIES "AVX512_CEL") - ocv_update(CPU_AVX512_CEL_IMPLIES "AVX512_CNL") + ocv_update(CPU_AVX512_ICL_IMPLIES "AVX512_SKX") + ocv_update(CPU_AVX512_CLX_IMPLIES "AVX512_SKX") ocv_update(CPU_AVX512_CNL_IMPLIES "AVX512_SKX") ocv_update(CPU_AVX512_SKX_IMPLIES "AVX512_COMMON") ocv_update(CPU_AVX512_KNM_IMPLIES "AVX512_KNL") @@ -251,7 +253,7 @@ if(X86 OR X86_64) ocv_intel_compiler_optimization_option(AVX512_KNM "-xKNM" "/Qx:KNM") ocv_intel_compiler_optimization_option(AVX512_SKX "-xSKYLAKE-AVX512" "/Qx:SKYLAKE-AVX512") ocv_intel_compiler_optimization_option(AVX512_CNL "-xCANNONLAKE" "/Qx:CANNONLAKE") - ocv_intel_compiler_optimization_option(AVX512_CEL "-xCASCADELAKE" "/Qx:CASCADELAKE") + ocv_intel_compiler_optimization_option(AVX512_CLX "-xCASCADELAKE" "/Qx:CASCADELAKE") ocv_intel_compiler_optimization_option(AVX512_ICL "-xICELAKE-CLIENT" "/Qx:ICELAKE-CLIENT") elseif(CV_GCC OR CV_CLANG) ocv_update(CPU_AVX2_FLAGS_ON "-mavx2") diff --git a/cmake/checks/cpu_avx512cel.cpp b/cmake/checks/cpu_avx512clx.cpp similarity index 50% rename from cmake/checks/cpu_avx512cel.cpp rename to cmake/checks/cpu_avx512clx.cpp index e372cf9a45..0d0b2aaee0 100644 --- a/cmake/checks/cpu_avx512cel.cpp +++ b/cmake/checks/cpu_avx512clx.cpp @@ -3,9 +3,9 @@ void test() { __m512i a, b, c; - a = _mm512_dpwssd_epi32(a, b, c); + a = _mm512_dpwssd_epi32(a, b, c); // VNNI } #else -#error "AVX512-CEL is not supported" +#error "AVX512-CLX is not supported" #endif -int main() { return 0; } \ No newline at end of file +int main() { return 0; } diff --git a/cmake/checks/cpu_avx512icl.cpp b/cmake/checks/cpu_avx512icl.cpp index a67f5f35d4..551f624d08 100644 --- a/cmake/checks/cpu_avx512icl.cpp +++ b/cmake/checks/cpu_avx512icl.cpp @@ -3,9 +3,10 @@ void test() { __m512i a, b, c; - a = _mm512_popcnt_epi8(a); - a = _mm512_shrdv_epi64(a, b, c); - a = _mm512_popcnt_epi64(a); + a = _mm512_popcnt_epi8(a); // BITALG + a = _mm512_shrdv_epi64(a, b, c); // VBMI2 + a = _mm512_popcnt_epi64(a); // VPOPCNTDQ + a = _mm512_dpwssd_epi32(a, b, c); // VNNI } #else #error "AVX512-ICL is not supported" diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h index 0248d5f98f..c00f569f12 100644 --- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -113,12 +113,18 @@ # define CV_AVX_512IFMA 1 # define CV_AVX_512VBMI 1 #endif -#ifdef CV_CPU_COMPILE_AVX512_CEL -# define CV_AVX512_CEL 1 +#ifdef CV_CPU_COMPILE_AVX512_CLX +# define CV_AVX512_CLX 1 # define CV_AVX_512VNNI 1 #endif #ifdef CV_CPU_COMPILE_AVX512_ICL # define CV_AVX512_ICL 1 +# undef CV_AVX_512IFMA +# define CV_AVX_512IFMA 1 +# undef CV_AVX_512VBMI +# define CV_AVX_512VBMI 1 +# undef CV_AVX_512VNNI +# define CV_AVX_512VNNI 1 # define CV_AVX_512VBMI2 1 # define CV_AVX_512BITALG 1 # define CV_AVX_512VPOPCNTDQ 1 @@ -311,8 +317,8 @@ struct VZeroUpperGuard { #ifndef CV_AVX512_CNL # define CV_AVX512_CNL 0 #endif -#ifndef CV_AVX512_CEL -# define CV_AVX512_CEL 0 +#ifndef CV_AVX512_CLX +# define CV_AVX512_CLX 0 #endif #ifndef CV_AVX512_ICL # define CV_AVX512_ICL 0 diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h index 2c82282b40..aaa89ed415 100644 --- a/modules/core/include/opencv2/core/cv_cpu_helper.h +++ b/modules/core/include/opencv2/core/cv_cpu_helper.h @@ -357,26 +357,26 @@ #endif #define __CV_CPU_DISPATCH_CHAIN_AVX512_CNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) -#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CEL -# define CV_TRY_AVX512_CEL 1 -# define CV_CPU_FORCE_AVX512_CEL 1 -# define CV_CPU_HAS_SUPPORT_AVX512_CEL 1 -# define CV_CPU_CALL_AVX512_CEL(fn, args) return (cpu_baseline::fn args) -# define CV_CPU_CALL_AVX512_CEL_(fn, args) return (opt_AVX512_CEL::fn args) -#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CEL -# define CV_TRY_AVX512_CEL 1 -# define CV_CPU_FORCE_AVX512_CEL 0 -# define CV_CPU_HAS_SUPPORT_AVX512_CEL (cv::checkHardwareSupport(CV_CPU_AVX512_CEL)) -# define CV_CPU_CALL_AVX512_CEL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CEL) return (opt_AVX512_CEL::fn args) -# define CV_CPU_CALL_AVX512_CEL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CEL) return (opt_AVX512_CEL::fn args) +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CLX +# define CV_TRY_AVX512_CLX 1 +# define CV_CPU_FORCE_AVX512_CLX 1 +# define CV_CPU_HAS_SUPPORT_AVX512_CLX 1 +# define CV_CPU_CALL_AVX512_CLX(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_CLX_(fn, args) return (opt_AVX512_CLX::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CLX +# define CV_TRY_AVX512_CLX 1 +# define CV_CPU_FORCE_AVX512_CLX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CLX (cv::checkHardwareSupport(CV_CPU_AVX512_CLX)) +# define CV_CPU_CALL_AVX512_CLX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args) +# define CV_CPU_CALL_AVX512_CLX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args) #else -# define CV_TRY_AVX512_CEL 0 -# define CV_CPU_FORCE_AVX512_CEL 0 -# define CV_CPU_HAS_SUPPORT_AVX512_CEL 0 -# define CV_CPU_CALL_AVX512_CEL(fn, args) -# define CV_CPU_CALL_AVX512_CEL_(fn, args) +# define CV_TRY_AVX512_CLX 0 +# define CV_CPU_FORCE_AVX512_CLX 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CLX 0 +# define CV_CPU_CALL_AVX512_CLX(fn, args) +# define CV_CPU_CALL_AVX512_CLX_(fn, args) #endif -#define __CV_CPU_DISPATCH_CHAIN_AVX512_CEL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CEL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) +#define __CV_CPU_DISPATCH_CHAIN_AVX512_CLX(fn, args, mode, ...) CV_CPU_CALL_AVX512_CLX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_ICL # define CV_TRY_AVX512_ICL 1 diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index 49390ec8d6..ea446b3fcb 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -269,7 +269,7 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard #define CV_CPU_AVX512_KNL 258 #define CV_CPU_AVX512_KNM 259 #define CV_CPU_AVX512_CNL 260 -#define CV_CPU_AVX512_CEL 261 +#define CV_CPU_AVX512_CLX 261 #define CV_CPU_AVX512_ICL 262 // when adding to this list remember to update the following enum @@ -320,7 +320,7 @@ enum CpuFeatures { CPU_AVX512_KNL = 258, //!< Knights Landing with AVX-512F/CD/ER/PF CPU_AVX512_KNM = 259, //!< Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ CPU_AVX512_CNL = 260, //!< Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI - CPU_AVX512_CEL = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI + CPU_AVX512_CLX = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI CPU_AVX512_ICL = 262, //!< Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ CPU_MAX_FEATURE = 512 // see CV_HARDWARE_MAX_FEATURE diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 8d7ccf7119..1b6777561a 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -370,11 +370,12 @@ struct HWFeatures g_hwFeatureNames[CPU_MSA] = "CPU_MSA"; + g_hwFeatureNames[CPU_AVX512_COMMON] = "AVX512-COMMON"; g_hwFeatureNames[CPU_AVX512_SKX] = "AVX512-SKX"; g_hwFeatureNames[CPU_AVX512_KNL] = "AVX512-KNL"; g_hwFeatureNames[CPU_AVX512_KNM] = "AVX512-KNM"; g_hwFeatureNames[CPU_AVX512_CNL] = "AVX512-CNL"; - g_hwFeatureNames[CPU_AVX512_CEL] = "AVX512-CEL"; + g_hwFeatureNames[CPU_AVX512_CLX] = "AVX512-CLX"; g_hwFeatureNames[CPU_AVX512_ICL] = "AVX512-ICL"; } @@ -485,9 +486,11 @@ struct HWFeatures have[CV_CPU_AVX_5124VNNIW] && have[CV_CPU_AVX_512VPOPCNTDQ]; have[CV_CPU_AVX512_SKX] = have[CV_CPU_AVX_512BW] && have[CV_CPU_AVX_512DQ] && have[CV_CPU_AVX_512VL]; have[CV_CPU_AVX512_CNL] = have[CV_CPU_AVX512_SKX] && have[CV_CPU_AVX_512IFMA] && have[CV_CPU_AVX_512VBMI]; - have[CV_CPU_AVX512_CEL] = have[CV_CPU_AVX512_CNL] && have[CV_CPU_AVX_512VNNI]; - have[CV_CPU_AVX512_ICL] = have[CV_CPU_AVX512_CEL] && have[CV_CPU_AVX_512VBMI2] && - have[CV_CPU_AVX_512BITALG] && have[CV_CPU_AVX_512VPOPCNTDQ]; + have[CV_CPU_AVX512_CLX] = have[CV_CPU_AVX512_SKX] && have[CV_CPU_AVX_512VNNI]; + have[CV_CPU_AVX512_ICL] = have[CV_CPU_AVX512_SKX] && + have[CV_CPU_AVX_512IFMA] && have[CV_CPU_AVX_512VBMI] && + have[CV_CPU_AVX_512VNNI] && + have[CV_CPU_AVX_512VBMI2] && have[CV_CPU_AVX_512BITALG] && have[CV_CPU_AVX_512VPOPCNTDQ]; } else { @@ -495,7 +498,7 @@ struct HWFeatures have[CV_CPU_AVX512_KNM] = false; have[CV_CPU_AVX512_SKX] = false; have[CV_CPU_AVX512_CNL] = false; - have[CV_CPU_AVX512_CEL] = false; + have[CV_CPU_AVX512_CLX] = false; have[CV_CPU_AVX512_ICL] = false; } } @@ -572,8 +575,16 @@ struct HWFeatures have[CV_CPU_VSX3] = (CV_VSX3); #endif + bool skip_baseline_check = false; +#ifndef NO_GETENV + if (getenv("OPENCV_SKIP_CPU_BASELINE_CHECK")) + { + skip_baseline_check = true; + } +#endif int baseline_features[] = { CV_CPU_BASELINE_FEATURES }; - if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]))) + if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0])) + && !skip_baseline_check) { fprintf(stderr, "\n" "******************************************************************\n" @@ -600,12 +611,12 @@ struct HWFeatures { if (have[feature]) { - if (dump) fprintf(stderr, "%s - OK\n", getHWFeatureNameSafe(feature)); + if (dump) fprintf(stderr, " ID=%3d (%s) - OK\n", feature, getHWFeatureNameSafe(feature)); } else { result = false; - if (dump) fprintf(stderr, "%s - NOT AVAILABLE\n", getHWFeatureNameSafe(feature)); + if (dump) fprintf(stderr, " ID=%3d (%s) - NOT AVAILABLE\n", feature, getHWFeatureNameSafe(feature)); } } }