continuing fixing universal intrinsics

pull/3935/head
Vadim Pisarevsky 10 years ago
parent bfbf864a73
commit ce7590bd24
  1. 38
      modules/hal/include/opencv2/hal/intrin.hpp

@ -2218,7 +2218,7 @@ inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint8x16, vqaddq_u8)
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint8x16, vqsubq_u8)
OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int8x16, vqaddq_s8)
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_u8)
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_s8)
OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint16x8, vqaddq_u16)
OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint16x8, vqsubq_u16)
OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint16x8, vmulq_u16)
@ -2342,11 +2342,11 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(cast(vcgeq_##suffix(a.val, b.val))); }
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint8x16, OPENCV_HAL_NOP, u8, u8)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int8x16, OPENCV_HAL_NOP, s8, u8)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int8x16, vreinterpretq_s8_u8, s8, u8)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint16x8, OPENCV_HAL_NOP, u16, u16)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, OPENCV_HAL_NOP, s16, u16)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, OPENCV_HAL_NOP, s32, u32)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32)
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8)
@ -2381,18 +2381,18 @@ inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_
}
// trade efficiency for convenience
#define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, _Tp, suffix) \
#define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \
inline _Tpvec operator << (const _Tpvec& a, int n) \
{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##suffix((_Tp)n))); } \
{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)n))); } \
inline _Tpvec operator >> (const _Tpvec& a, int n) \
{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##suffix((_Tp)-n))); }
{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)-n))); }
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint8x16, uchar, u8)
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int8x16, schar, s8)
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint16x8, ushort, u16)
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int16x8, short, s16)
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint32x4, unsigned, u32)
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, int, s32)
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint8x16, u8, schar, s8)
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int8x16, s8, schar, s8)
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint16x8, u16, short, s16)
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int16x8, s16, short, s16)
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint32x4, u32, int, s32)
OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, s32, int, s32)
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_mullo, vmulq_u16)
OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_mullo, vmulq_s16)
@ -2444,8 +2444,8 @@ OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float, min, std::min)
inline int v_signmask(const v_uint8x16& a)
{
uint8x8_t m0 = vcreate_u8(CV_BIG_UINT(0x0706050403020100));
uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), vcombine_u8(m0, m0));
int8x8_t m0 = vcreate_s8(CV_BIG_UINT(0x0706050403020100));
uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), vcombine_s8(m0, m0));
uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(v0)));
return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 8);
}
@ -2454,8 +2454,8 @@ inline int v_signmask(const v_int8x16& a)
inline int v_signmask(const v_uint16x8& a)
{
uint16x4_t m0 = vcreate_u16(CV_BIG_UINT(0x0003000200010000));
uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), vcombine_u16(m0, m0));
int16x4_t m0 = vcreate_s16(CV_BIG_UINT(0x0003000200010000));
uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), vcombine_s16(m0, m0));
uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(v0));
return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 4);
}
@ -2464,8 +2464,8 @@ inline int v_signmask(const v_int16x8& a)
inline int v_signmask(const v_uint32x4& a)
{
uint32x2_t m0 = vcreate_u32(CV_BIG_UINT(0x0000000100000000));
uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), vcombine_u32(m0, m0));
int32x2_t m0 = vcreate_s32(CV_BIG_UINT(0x0000000100000000));
uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), vcombine_s32(m0, m0));
uint64x2_t v1 = vpaddlq_u32(v0);
return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 2);
}

Loading…
Cancel
Save