|
|
@ -255,7 +255,7 @@ struct v_float64x2 |
|
|
|
__m128d val; |
|
|
|
__m128d val; |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
#if defined(HAVE_FP16) |
|
|
|
#if CV_FP16 |
|
|
|
struct v_float16x4 |
|
|
|
struct v_float16x4 |
|
|
|
{ |
|
|
|
{ |
|
|
|
typedef short lane_type; |
|
|
|
typedef short lane_type; |
|
|
@ -1056,7 +1056,7 @@ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ |
|
|
|
OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps) |
|
|
|
OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps) |
|
|
|
OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd) |
|
|
|
OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd) |
|
|
|
|
|
|
|
|
|
|
|
#if defined(HAVE_FP16) |
|
|
|
#if CV_FP16 |
|
|
|
inline v_float16x4 v_load_f16(const short* ptr) |
|
|
|
inline v_float16x4 v_load_f16(const short* ptr) |
|
|
|
{ return v_float16x4(_mm_loadl_epi64((const __m128i*)ptr)); } |
|
|
|
{ return v_float16x4(_mm_loadl_epi64((const __m128i*)ptr)); } |
|
|
|
inline void v_store_f16(short* ptr, v_float16x4& a) |
|
|
|
inline void v_store_f16(short* ptr, v_float16x4& a) |
|
|
@ -1776,7 +1776,7 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) |
|
|
|
return v_float64x2(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(a.val),8)))); |
|
|
|
return v_float64x2(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(a.val),8)))); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#if defined(HAVE_FP16) |
|
|
|
#if CV_FP16 |
|
|
|
inline v_float32x4 v_cvt_f32(const v_float16x4& a) |
|
|
|
inline v_float32x4 v_cvt_f32(const v_float16x4& a) |
|
|
|
{ |
|
|
|
{ |
|
|
|
return v_float32x4(_mm_cvtph_ps(a.val)); |
|
|
|
return v_float32x4(_mm_cvtph_ps(a.val)); |
|
|
|