Merge pull request #21630 from shibayan:arm64-msvc-neon

* Added NEON support in builds for Windows on ARM

* Fixed `HAVE_CPU_NEON_SUPPORT` display broken during compiler test

* Fixed a build error prior to Visual Studio 2022
pull/21669/head
Tatsuro Shibamura 3 years ago committed by GitHub
parent 119d8b3aca
commit d354ad1c34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      cmake/OpenCVCompilerOptions.cmake
  2. 1
      cmake/checks/cpu_neon.cpp
  3. 26
      modules/core/include/opencv2/core/hal/intrin_neon.hpp
  4. 3
      modules/core/src/system.cpp

@ -314,6 +314,10 @@ if(MSVC)
set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} /FS")
set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} /FS")
endif()
if(AARCH64 AND NOT MSVC_VERSION LESS 1930)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /D _ARM64_DISTINCT_NEON_TYPES")
endif()
endif()
if(PROJECT_NAME STREQUAL "OpenCV")

@ -1,6 +1,7 @@
#include <stdio.h>
#if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64))
# define _ARM64_DISTINCT_NEON_TYPES
# include <Intrin.h>
# include <arm_neon.h>
# define CV_NEON 1

@ -591,28 +591,26 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
{
return v_int16x8(vcombine_s16(
vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16),
vshrn_n_s32(
#if CV_NEON_AARCH64
vmull_high_s16(a.val, b.val)
int32x4_t c = vmull_high_s16(a.val, b.val);
#else // #if CV_NEON_AARCH64
vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val))
int32x4_t c = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
#endif // #if CV_NEON_AARCH64
, 16)
return v_int16x8(vcombine_s16(
vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16),
vshrn_n_s32(c, 16)
));
}
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
{
return v_uint16x8(vcombine_u16(
vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16),
vshrn_n_u32(
#if CV_NEON_AARCH64
vmull_high_u16(a.val, b.val)
uint32x4_t c = vmull_high_u16(a.val, b.val);
#else // #if CV_NEON_AARCH64
vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val))
uint32x4_t c = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val));
#endif // #if CV_NEON_AARCH64
, 16)
return v_uint16x8(vcombine_u16(
vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16),
vshrn_n_u32(c, 16)
));
}
@ -1937,10 +1935,14 @@ inline v_int32x4 v_round(const v_float32x4& a)
{
float32x4_t a_ = a.val;
int32x4_t result;
#if defined _MSC_VER
result = vcvtnq_s32_f32(a_);
#else
__asm__ ("fcvtns %0.4s, %1.4s"
: "=w"(result)
: "w"(a_)
: /* No clobbers */);
#endif
return v_int32x4(result);
}
#else

@ -615,6 +615,9 @@ struct HWFeatures
#if defined _ARM_ && (defined(_WIN32_WCE) && _WIN32_WCE >= 0x800)
have[CV_CPU_NEON] = true;
#endif
#if defined _M_ARM64
have[CV_CPU_NEON] = true;
#endif
#ifdef __riscv_vector
have[CV_CPU_RISCVV] = true;
#endif

Loading…
Cancel
Save