Merge pull request #26091 from asmorkalov:as/arm_version_check

Got rid of CAROTENE_NEON_ARCH and use standard __ARM_ARCH check
pull/26084/head
Alexander Smorkalov 5 months ago committed by GitHub
commit b72d7e3b05
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 8
      3rdparty/carotene/CMakeLists.txt
  2. 4
      3rdparty/carotene/hal/tegra_hal.hpp
  3. 11
      3rdparty/carotene/src/common.hpp
  4. 8
      3rdparty/carotene/src/vround_helper.hpp
  5. 10
      CMakeLists.txt
  6. 1
      doc/tutorials/introduction/config_reference/config_reference.markdown

@ -42,14 +42,6 @@ endif()
if(WITH_NEON) if(WITH_NEON)
target_compile_definitions(carotene_objs PRIVATE "-DWITH_NEON") target_compile_definitions(carotene_objs PRIVATE "-DWITH_NEON")
if(NOT DEFINED CAROTENE_NEON_ARCH )
elseif(CAROTENE_NEON_ARCH EQUAL 8)
target_compile_definitions(carotene_objs PRIVATE "-DCAROTENE_NEON_ARCH=8")
elseif(CAROTENE_NEON_ARCH EQUAL 7)
target_compile_definitions(carotene_objs PRIVATE "-DCAROTENE_NEON_ARCH=7")
else()
target_compile_definitions(carotene_objs PRIVATE "-DCAROTENE_NEON_ARCH=0")
endif()
endif() endif()
if(MINGW) if(MINGW)

@ -1857,7 +1857,7 @@ TegraCvtColor_Invoker(bgrx2hsvf, bgrx2hsv, src_data + static_cast<size_t>(range.
#endif #endif
// The optimized branch was developed for old armv7 processors and leads to perf degradation on armv8 // The optimized branch was developed for old armv7 processors and leads to perf degradation on armv8
#if defined(DCAROTENE_NEON_ARCH) && (DCAROTENE_NEON_ARCH == 7) #if defined(__ARM_ARCH) && (__ARM_ARCH == 7)
inline CAROTENE_NS::BORDER_MODE borderCV2Carotene(int borderType) inline CAROTENE_NS::BORDER_MODE borderCV2Carotene(int borderType)
{ {
switch(borderType) switch(borderType)
@ -1928,7 +1928,7 @@ inline int TEGRA_GaussianBlurBinomial(const uchar* src_data, size_t src_step, uc
#undef cv_hal_gaussianBlurBinomial #undef cv_hal_gaussianBlurBinomial
#define cv_hal_gaussianBlurBinomial TEGRA_GaussianBlurBinomial #define cv_hal_gaussianBlurBinomial TEGRA_GaussianBlurBinomial
#endif // DCAROTENE_NEON_ARCH=7 #endif // __ARM_ARCH=7
#endif // OPENCV_IMGPROC_HAL_INTERFACE_H #endif // OPENCV_IMGPROC_HAL_INTERFACE_H

@ -58,17 +58,6 @@
namespace CAROTENE_NS { namespace internal { namespace CAROTENE_NS { namespace internal {
#ifndef CAROTENE_NEON_ARCH
# if defined(__aarch64__) || defined(__aarch32__)
# define CAROTENE_NEON_ARCH 8
# else
# define CAROTENE_NEON_ARCH 7
# endif
#endif
#if ( !defined(__aarch64__) && !defined(__aarch32__) ) && (CAROTENE_NEON_ARCH == 8 )
# error("ARMv7 doen't support A32/A64 Neon instructions")
#endif
inline void prefetch(const void *ptr, size_t offset = 32*10) inline void prefetch(const void *ptr, size_t offset = 32*10)
{ {
#if defined __GNUC__ #if defined __GNUC__

@ -57,7 +57,7 @@ namespace CAROTENE_NS { namespace internal {
inline uint32x4_t vroundq_u32_f32(const float32x4_t val) inline uint32x4_t vroundq_u32_f32(const float32x4_t val)
{ {
#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */ #if defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
return vcvtnq_u32_f32(val); return vcvtnq_u32_f32(val);
#else #else
const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA); const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA);
@ -67,7 +67,7 @@ inline uint32x4_t vroundq_u32_f32(const float32x4_t val)
inline uint32x2_t vround_u32_f32(const float32x2_t val) inline uint32x2_t vround_u32_f32(const float32x2_t val)
{ {
#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */ #if defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
return vcvtn_u32_f32(val); return vcvtn_u32_f32(val);
#else #else
const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA); const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA);
@ -77,7 +77,7 @@ inline uint32x2_t vround_u32_f32(const float32x2_t val)
inline int32x4_t vroundq_s32_f32(const float32x4_t val) inline int32x4_t vroundq_s32_f32(const float32x4_t val)
{ {
#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */ #if defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
return vcvtnq_s32_f32(val); return vcvtnq_s32_f32(val);
#else #else
const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA); const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA);
@ -87,7 +87,7 @@ inline int32x4_t vroundq_s32_f32(const float32x4_t val)
inline int32x2_t vround_s32_f32(const float32x2_t val) inline int32x2_t vround_s32_f32(const float32x2_t val)
{ {
#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */ #if defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
return vcvtn_s32_f32(val); return vcvtn_s32_f32(val);
#else #else
const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA); const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA);

@ -1018,15 +1018,7 @@ foreach(hal ${OpenCV_HAL})
if(";${CPU_BASELINE_FINAL};" MATCHES ";NEON;") if(";${CPU_BASELINE_FINAL};" MATCHES ";NEON;")
add_subdirectory(3rdparty/carotene/hal) add_subdirectory(3rdparty/carotene/hal)
ocv_hal_register(CAROTENE_HAL_LIBRARIES CAROTENE_HAL_HEADERS CAROTENE_HAL_INCLUDE_DIRS) ocv_hal_register(CAROTENE_HAL_LIBRARIES CAROTENE_HAL_HEADERS CAROTENE_HAL_INCLUDE_DIRS)
list(APPEND OpenCV_USED_HAL "carotene (ver ${CAROTENE_HAL_VERSION})")
if( NOT DEFINED CAROTENE_NEON_ARCH)
set(CAROTENE_NEON_MSG "Auto detected")
elseif( CAROTENE_NEON_ARCH GREATER 7)
set(CAROTENE_NEON_MSG "Force ARMv8+")
else()
set(CAROTENE_NEON_MSG "Force ARMv7")
endif()
list(APPEND OpenCV_USED_HAL "carotene (ver ${CAROTENE_HAL_VERSION}, ${CAROTENE_NEON_MSG})")
else() else()
message(STATUS "Carotene: NEON is not available, disabling carotene...") message(STATUS "Carotene: NEON is not available, disabling carotene...")
endif() endif()

@ -588,7 +588,6 @@ Following options can be used to change installation layout for common scenarios
| `BUILD_FAT_JAVA_LIB` | _ON_ (for static Android builds) | Build single _opencv_java_ dynamic library containing all library functionality bundled with Java bindings. | | `BUILD_FAT_JAVA_LIB` | _ON_ (for static Android builds) | Build single _opencv_java_ dynamic library containing all library functionality bundled with Java bindings. |
| `BUILD_opencv_python2` | _ON_ | Build python2 bindings (deprecated). Python with development files and numpy must be installed. | | `BUILD_opencv_python2` | _ON_ | Build python2 bindings (deprecated). Python with development files and numpy must be installed. |
| `BUILD_opencv_python3` | _ON_ | Build python3 bindings. Python with development files and numpy must be installed. | | `BUILD_opencv_python3` | _ON_ | Build python3 bindings. Python with development files and numpy must be installed. |
| `CAROTENE_NEON_ARCH` | '(auto)' | Switch NEON Arch for Carotene. If it sets nothing, it will be auto-detected. If it sets 8, ARMv8(and later) is used. Otherwise, ARMv7 is used. |
TODO: need separate tutorials covering bindings builds TODO: need separate tutorials covering bindings builds

Loading…
Cancel
Save