core: several improves and fixes on ppc64le infrastructure

- add infrastructure support for Power9/VSX3
  - fix missing VSX flags on GCC4.9 and CLANG4(#13210, #13222)
  - fix disable VSX optimzation on GCC by using flag ENABLE_VSX
  - flag ENABLE_VSX is deprecated now, use CPU_BASELINE, CPU_DISPATCH instead
  - add VSX3 to arithmetic dispatchable flags
pull/13224/head
Sayed Adel 6 years ago
parent 183bc5c281
commit 474a0dac49
  1. 1
      CMakeLists.txt
  2. 20
      cmake/OpenCVCompilerOptimizations.cmake
  3. 14
      cmake/checks/cpu_vsx.cpp
  4. 17
      cmake/checks/cpu_vsx3.cpp
  5. 2
      modules/core/CMakeLists.txt
  6. 10
      modules/core/include/opencv2/core/cv_cpu_dispatch.h
  7. 21
      modules/core/include/opencv2/core/cv_cpu_helper.h
  8. 6
      modules/core/include/opencv2/core/cvdef.h
  9. 30
      modules/core/src/system.cpp

@ -327,7 +327,6 @@ OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add
OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CV_GCC ) OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CV_GCC )
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CV_GCC ) OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CV_GCC )
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) ) OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF ((CV_GCC OR CV_CLANG) AND PPC64LE) )
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CV_GCC AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CV_GCC AND (X86 OR X86_64)) )
if(NOT IOS) # Use CPU_BASELINE instead if(NOT IOS) # Use CPU_BASELINE instead
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) ) OCV_OPTION(ENABLE_NEON "Enable NEON instructions" (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )

@ -5,6 +5,10 @@
# AVX / AVX2 / AVX_512F # AVX / AVX2 / AVX_512F
# FMA3 # FMA3
# ppc64le arch:
# VSX (always available on Power8)
# VSX3 (always available on Power9)
# CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag) # CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag)
# CPU_{opt}_IMPLIES=<list> # CPU_{opt}_IMPLIES=<list>
# CPU_{opt}_FORCE=<list> - subset of "implies" list # CPU_{opt}_FORCE=<list> - subset of "implies" list
@ -29,7 +33,7 @@
set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F;AVX512_SKX") set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F;AVX512_SKX")
list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16) list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16)
list(APPEND CPU_ALL_OPTIMIZATIONS VSX) list(APPEND CPU_ALL_OPTIMIZATIONS VSX VSX3)
list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS) list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS)
ocv_update(CPU_VFPV3_FEATURE_ALIAS "") ocv_update(CPU_VFPV3_FEATURE_ALIAS "")
@ -81,7 +85,7 @@ ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON)
ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF) ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF)
ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF) ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF)
ocv_optimization_process_obsolete_option(ENABLE_VSX VSX OFF) ocv_optimization_process_obsolete_option(ENABLE_VSX VSX ON)
macro(ocv_is_optimization_in_list resultvar check_opt) macro(ocv_is_optimization_in_list resultvar check_opt)
set(__checked "") set(__checked "")
@ -289,14 +293,24 @@ elseif(ARM OR AARCH64)
set(CPU_BASELINE "NEON;FP16" CACHE STRING "${HELP_CPU_BASELINE}") set(CPU_BASELINE "NEON;FP16" CACHE STRING "${HELP_CPU_BASELINE}")
endif() endif()
elseif(PPC64LE) elseif(PPC64LE)
ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX") ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX;VSX3")
ocv_update(CPU_VSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx.cpp") ocv_update(CPU_VSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx.cpp")
ocv_update(CPU_VSX3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx3.cpp")
if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE)
ocv_update(CPU_VSX3_IMPLIES "VSX")
endif()
if(CV_CLANG AND (NOT ${CMAKE_CXX_COMPILER} MATCHES "xlc")) if(CV_CLANG AND (NOT ${CMAKE_CXX_COMPILER} MATCHES "xlc"))
ocv_update(CPU_VSX_FLAGS_ON "-mvsx -maltivec") ocv_update(CPU_VSX_FLAGS_ON "-mvsx -maltivec")
ocv_update(CPU_VSX3_FLAGS_ON "-mpower9-vector")
else() else()
ocv_update(CPU_VSX_FLAGS_ON "-mcpu=power8") ocv_update(CPU_VSX_FLAGS_ON "-mcpu=power8")
ocv_update(CPU_VSX3_FLAGS_ON "-mcpu=power9 -mtune=power9")
endif() endif()
set(CPU_DISPATCH "VSX3" CACHE STRING "${HELP_CPU_DISPATCH}")
set(CPU_BASELINE "VSX" CACHE STRING "${HELP_CPU_BASELINE}")
endif() endif()
# Helper values for cmake-gui # Helper values for cmake-gui

@ -1,8 +1,12 @@
# if defined(__VSX__) #if defined(__VSX__)
# include <altivec.h> #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
# else #include <altivec.h>
# error "VSX is not supported" #else
# endif #error "OpenCV only supports little-endian mode"
#endif
#else
#error "VSX is not supported"
#endif
int main() int main()
{ {

@ -0,0 +1,17 @@
#if defined(__VSX__)
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
#include <altivec.h>
#else
#error "OpenCV only supports little-endian mode"
#endif
#else
#error "VSX3 is not supported"
#endif
int main()
{
__vector unsigned char a = vec_splats((unsigned char)1);
__vector unsigned char b = vec_splats((unsigned char)2);
__vector unsigned char r = vec_absd(a, b);
return 0;
}

@ -2,7 +2,7 @@ set(the_description "The Core Functionality")
ocv_add_dispatched_file(mathfuncs_core SSE2 AVX AVX2) ocv_add_dispatched_file(mathfuncs_core SSE2 AVX AVX2)
ocv_add_dispatched_file(stat SSE4_2 AVX2) ocv_add_dispatched_file(stat SSE4_2 AVX2)
ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2 VSX3)
# dispatching for accuracy tests # dispatching for accuracy tests
ocv_add_dispatched_file_force_all(test_intrin128 TEST SSE2 SSE3 SSSE3 SSE4_1 SSE4_2 AVX FP16 AVX2) ocv_add_dispatched_file_force_all(test_intrin128 TEST SSE2 SSE3 SSSE3 SSE4_1 SSE4_2 AVX FP16 AVX2)

@ -107,7 +107,7 @@
# include <arm_neon.h> # include <arm_neon.h>
#endif #endif
#if defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__) #ifdef CV_CPU_COMPILE_VSX
# include <altivec.h> # include <altivec.h>
# undef vector # undef vector
# undef pixel # undef pixel
@ -115,6 +115,10 @@
# define CV_VSX 1 # define CV_VSX 1
#endif #endif
#ifdef CV_CPU_COMPILE_VSX3
# define CV_VSX3 1
#endif
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__ #endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX #if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
@ -237,3 +241,7 @@ struct VZeroUpperGuard {
#ifndef CV_VSX #ifndef CV_VSX
# define CV_VSX 0 # define CV_VSX 0
#endif #endif
#ifndef CV_VSX3
# define CV_VSX3 0
#endif

@ -315,5 +315,26 @@
#endif #endif
#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3
# define CV_TRY_VSX3 1
# define CV_CPU_FORCE_VSX3 1
# define CV_CPU_HAS_SUPPORT_VSX3 1
# define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3
# define CV_TRY_VSX3 1
# define CV_CPU_FORCE_VSX3 0
# define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3))
# define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
# define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
#else
# define CV_TRY_VSX3 0
# define CV_CPU_FORCE_VSX3 0
# define CV_CPU_HAS_SUPPORT_VSX3 0
# define CV_CPU_CALL_VSX3(fn, args)
# define CV_CPU_CALL_VSX3_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...) CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args) #define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */ #define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */

@ -240,9 +240,10 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
#define CV_CPU_AVX_512VBMI 20 #define CV_CPU_AVX_512VBMI 20
#define CV_CPU_AVX_512VL 21 #define CV_CPU_AVX_512VL 21
#define CV_CPU_NEON 100 #define CV_CPU_NEON 100
#define CV_CPU_VSX 200 #define CV_CPU_VSX 200
#define CV_CPU_VSX3 201
// CPU features groups // CPU features groups
#define CV_CPU_AVX512_SKX 256 #define CV_CPU_AVX512_SKX 256
@ -280,6 +281,7 @@ enum CpuFeatures {
CPU_NEON = 100, CPU_NEON = 100,
CPU_VSX = 200, CPU_VSX = 200,
CPU_VSX3 = 201,
CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL

@ -107,15 +107,14 @@ void* allocSingletonBuffer(size_t size) { return fastMalloc(size); }
# include <cpu-features.h> # include <cpu-features.h>
#endif #endif
#ifndef __VSX__
# if defined __PPC64__ && defined __linux__ #if CV_VSX && defined __linux__
# include "sys/auxv.h" # include "sys/auxv.h"
# ifndef AT_HWCAP2 # ifndef AT_HWCAP2
# define AT_HWCAP2 26 # define AT_HWCAP2 26
# endif # endif
# ifndef PPC_FEATURE2_ARCH_2_07 # ifndef PPC_FEATURE2_ARCH_3_00
# define PPC_FEATURE2_ARCH_2_07 0x80000000 # define PPC_FEATURE2_ARCH_3_00 0x00800000
# endif
# endif # endif
#endif #endif
@ -359,6 +358,7 @@ struct HWFeatures
g_hwFeatureNames[CPU_NEON] = "NEON"; g_hwFeatureNames[CPU_NEON] = "NEON";
g_hwFeatureNames[CPU_VSX] = "VSX"; g_hwFeatureNames[CPU_VSX] = "VSX";
g_hwFeatureNames[CPU_VSX3] = "VSX3";
g_hwFeatureNames[CPU_AVX512_SKX] = "AVX512-SKX"; g_hwFeatureNames[CPU_AVX512_SKX] = "AVX512-SKX";
} }
@ -513,14 +513,14 @@ struct HWFeatures
#endif #endif
#endif #endif
#ifdef __VSX__ // there's no need to check VSX availability in runtime since it's always available on ppc64le CPUs
have[CV_CPU_VSX] = true; have[CV_CPU_VSX] = (CV_VSX);
#elif (defined __PPC64__ && defined __linux__) // TODO: Check VSX3 availability in runtime for other platforms
uint64 hwcaps = getauxval(AT_HWCAP); #if CV_VSX && defined __linux__
uint64 hwcap2 = getauxval(AT_HWCAP2); uint64 hwcap2 = getauxval(AT_HWCAP2);
have[CV_CPU_VSX] = (hwcaps & PPC_FEATURE_PPC_LE && hwcaps & PPC_FEATURE_HAS_VSX && hwcap2 & PPC_FEATURE2_ARCH_2_07); have[CV_CPU_VSX3] = (hwcap2 & PPC_FEATURE2_ARCH_3_00);
#else #else
have[CV_CPU_VSX] = false; have[CV_CPU_VSX3] = (CV_VSX3);
#endif #endif
int baseline_features[] = { CV_CPU_BASELINE_FEATURES }; int baseline_features[] = { CV_CPU_BASELINE_FEATURES };

Loading…
Cancel
Save