|
|
|
@ -438,10 +438,14 @@ void v_rshr_pack_store(schar* ptr, const v_int16x8& a) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// bit-wise "mask ? a : b"
|
|
|
|
|
// byte-wise "mask ? a : b"
|
|
|
|
|
inline __m128i v_select_si128(__m128i mask, __m128i a, __m128i b) |
|
|
|
|
{ |
|
|
|
|
#if CV_SSE4_1 |
|
|
|
|
return _mm_blendv_epi8(b, a, mask); |
|
|
|
|
#else |
|
|
|
|
return _mm_xor_si128(b, _mm_and_si128(_mm_xor_si128(a, b), mask)); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b) |
|
|
|
@ -1403,6 +1407,26 @@ OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, |
|
|
|
|
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 15, 15) |
|
|
|
|
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 3, 3) |
|
|
|
|
|
|
|
|
|
#if CV_SSE4_1 |
|
|
|
|
#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, cast_ret, cast, suffix) \ |
|
|
|
|
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
|
|
|
|
|
{ \
|
|
|
|
|
return _Tpvec(cast_ret(_mm_blendv_##suffix(cast(b.val), cast(a.val), cast(mask.val)))); \
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
OPENCV_HAL_IMPL_SSE_SELECT(v_uint8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) |
|
|
|
|
OPENCV_HAL_IMPL_SSE_SELECT(v_int8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) |
|
|
|
|
OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) |
|
|
|
|
OPENCV_HAL_IMPL_SSE_SELECT(v_int16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8) |
|
|
|
|
OPENCV_HAL_IMPL_SSE_SELECT(v_uint32x4, _mm_castps_si128, _mm_castsi128_ps, ps) |
|
|
|
|
OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, _mm_castps_si128, _mm_castsi128_ps, ps) |
|
|
|
|
// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, TBD, TBD, pd)
|
|
|
|
|
// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, TBD, TBD, ps)
|
|
|
|
|
OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, OPENCV_HAL_NOP, OPENCV_HAL_NOP, ps) |
|
|
|
|
OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, OPENCV_HAL_NOP, OPENCV_HAL_NOP, pd) |
|
|
|
|
|
|
|
|
|
#else // CV_SSE4_1
|
|
|
|
|
|
|
|
|
|
#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, suffix) \ |
|
|
|
|
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
|
|
|
|
|
{ \
|
|
|
|
@ -1419,6 +1443,7 @@ OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, si128) |
|
|
|
|
// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128)
|
|
|
|
|
OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, ps) |
|
|
|
|
OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, pd) |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpuvec, _Tpwuvec, _Tpu, _Tpsvec, _Tpwsvec, _Tps, suffix, wsuffix, shift) \ |
|
|
|
|
inline void v_expand(const _Tpuvec& a, _Tpwuvec& b0, _Tpwuvec& b1) \
|
|
|
|
|