diff --git a/modules/hal/include/opencv2/hal/intrin.hpp b/modules/hal/include/opencv2/hal/intrin.hpp index 3c53d60797..e959a49cff 100644 --- a/modules/hal/include/opencv2/hal/intrin.hpp +++ b/modules/hal/include/opencv2/hal/intrin.hpp @@ -2218,7 +2218,7 @@ inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint8x16, vqaddq_u8) OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint8x16, vqsubq_u8) OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int8x16, vqaddq_s8) -OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_u8) +OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_s8) OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint16x8, vqaddq_u16) OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint16x8, vqsubq_u16) OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint16x8, vmulq_u16) @@ -2342,11 +2342,11 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ { return _Tpvec(cast(vcgeq_##suffix(a.val, b.val))); } OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint8x16, OPENCV_HAL_NOP, u8, u8) -OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int8x16, OPENCV_HAL_NOP, s8, u8) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int8x16, vreinterpretq_s8_u8, s8, u8) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint16x8, OPENCV_HAL_NOP, u16, u16) -OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, OPENCV_HAL_NOP, s16, u16) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32) -OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, OPENCV_HAL_NOP, s32, u32) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8) @@ -2381,18 +2381,18 @@ inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_ } // trade efficiency for convenience -#define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, _Tp, suffix) \ +#define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \ inline _Tpvec operator << (const _Tpvec& a, int n) \ -{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##suffix((_Tp)n))); } \ +{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)n))); } \ inline _Tpvec operator >> (const _Tpvec& a, int n) \ -{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##suffix((_Tp)-n))); } +{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)-n))); } -OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int16x8, short, s16) -OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, int, s32) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint8x16, u8, schar, s8) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int8x16, s8, schar, s8) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint16x8, u16, short, s16) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int16x8, s16, short, s16) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint32x4, u32, int, s32) +OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, s32, int, s32) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_mullo, vmulq_u16) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_mullo, vmulq_s16) @@ -2444,8 +2444,8 @@ OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float, min, std::min) inline int v_signmask(const v_uint8x16& a) { - uint8x8_t m0 = vcreate_u8(CV_BIG_UINT(0x0706050403020100)); - uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), vcombine_u8(m0, m0)); + int8x8_t m0 = vcreate_s8(CV_BIG_UINT(0x0706050403020100)); + uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), vcombine_s8(m0, m0)); uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(v0))); return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 8); } @@ -2454,8 +2454,8 @@ inline int v_signmask(const v_int8x16& a) inline int v_signmask(const v_uint16x8& a) { - uint16x4_t m0 = vcreate_u16(CV_BIG_UINT(0x0003000200010000)); - uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), vcombine_u16(m0, m0)); + int16x4_t m0 = vcreate_s16(CV_BIG_UINT(0x0003000200010000)); + uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), vcombine_s16(m0, m0)); uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(v0)); return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 4); } @@ -2464,8 +2464,8 @@ inline int v_signmask(const v_int16x8& a) inline int v_signmask(const v_uint32x4& a) { - uint32x2_t m0 = vcreate_u32(CV_BIG_UINT(0x0000000100000000)); - uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), vcombine_u32(m0, m0)); + int32x2_t m0 = vcreate_s32(CV_BIG_UINT(0x0000000100000000)); + uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), vcombine_s32(m0, m0)); uint64x2_t v1 = vpaddlq_u32(v0); return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 2); }