diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 9547a860db..46d347d234 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -875,13 +875,27 @@ OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int64x2, s64) OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float64x2, f64) #endif +#if defined(__clang__) && defined(__aarch64__) +// avoid LD2 instruction. details: https://github.com/opencv/opencv/issues/14863 +#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ \ +typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; \ +uint64 v = *(unaligned_uint64*)ptr; \ +return _Tpvec(v_reinterpret_as_##suffix(v_uint64x2(v, (uint64)123456))); \ +} +#else +#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); } +#endif + #define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \ inline _Tpvec v_load(const _Tp* ptr) \ { return _Tpvec(vld1q_##suffix(ptr)); } \ inline _Tpvec v_load_aligned(const _Tp* ptr) \ { return _Tpvec(vld1q_##suffix(ptr)); } \ -inline _Tpvec v_load_low(const _Tp* ptr) \ -{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); } \ +OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \ inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ { return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \ inline void v_store(_Tp* ptr, const _Tpvec& a) \