|
|
|
@ -1129,9 +1129,15 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_s |
|
|
|
|
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, |
|
|
|
|
const v_float32x4& c, const v_float32x4& d) |
|
|
|
|
{ |
|
|
|
|
#if CV_SSE3 |
|
|
|
|
__m128 ab = _mm_hadd_ps(a.val, b.val); |
|
|
|
|
__m128 cd = _mm_hadd_ps(c.val, d.val); |
|
|
|
|
return v_float32x4(_mm_hadd_ps(ab, cd)); |
|
|
|
|
#else |
|
|
|
|
__m128 ac = _mm_add_ps(_mm_unpacklo_ps(a.val, c.val), _mm_unpackhi_ps(a.val, c.val)); |
|
|
|
|
__m128 bd = _mm_add_ps(_mm_unpacklo_ps(b.val, d.val), _mm_unpackhi_ps(b.val, d.val)); |
|
|
|
|
return v_float32x4(_mm_add_ps(_mm_unpacklo_ps(ac, bd), _mm_unpackhi_ps(ac, bd))); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max) |
|
|
|
|