core(intrin): v_load_low() workaround for aarch64+clang

pull/14889/head
Alexander Alekhin 6 years ago
parent e8ff5cac1d
commit e8a703a71d
  1. 18
      modules/core/include/opencv2/core/hal/intrin_neon.hpp

@ -875,13 +875,27 @@ OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int64x2, s64)
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float64x2, f64)
#endif
#if defined(__clang__) && defined(__aarch64__)
// avoid LD2 instruction. details: https://github.com/opencv/opencv/issues/14863
#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \
inline _Tpvec v_load_low(const _Tp* ptr) \
{ \
typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; \
uint64 v = *(unaligned_uint64*)ptr; \
return _Tpvec(v_reinterpret_as_##suffix(v_uint64x2(v, (uint64)123456))); \
}
#else
#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \
inline _Tpvec v_load_low(const _Tp* ptr) \
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); }
#endif
#define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \
inline _Tpvec v_load(const _Tp* ptr) \
{ return _Tpvec(vld1q_##suffix(ptr)); } \
inline _Tpvec v_load_aligned(const _Tp* ptr) \
{ return _Tpvec(vld1q_##suffix(ptr)); } \
inline _Tpvec v_load_low(const _Tp* ptr) \
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); } \
OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \
inline void v_store(_Tp* ptr, const _Tpvec& a) \

Loading…
Cancel
Save