build: fix v_reduce_sum4 (requires SSE3)

pull/8911/head
Alexander Alekhin 8 years ago
parent f49f056d29
commit e23b59da5c
  1. 6
      modules/core/include/opencv2/core/hal/intrin_sse.hpp
  2. 18
      modules/core/test/test_intrin.cpp

@ -1129,9 +1129,15 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_s
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
const v_float32x4& c, const v_float32x4& d)
{
#if CV_SSE3
__m128 ab = _mm_hadd_ps(a.val, b.val);
__m128 cd = _mm_hadd_ps(c.val, d.val);
return v_float32x4(_mm_hadd_ps(ab, cd));
#else
__m128 ac = _mm_add_ps(_mm_unpacklo_ps(a.val, c.val), _mm_unpackhi_ps(a.val, c.val));
__m128 bd = _mm_add_ps(_mm_unpacklo_ps(b.val, d.val), _mm_unpackhi_ps(b.val, d.val));
return v_float32x4(_mm_add_ps(_mm_unpacklo_ps(ac, bd), _mm_unpackhi_ps(ac, bd)));
#endif
}
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max)

@ -741,6 +741,23 @@ template<typename R> struct TheTest
return *this;
}
TheTest & test_reduce_sum4()
{
R a(0.1f, 0.02f, 0.003f, 0.0004f);
R b(1, 20, 300, 4000);
R c(10, 2, 0.3f, 0.04f);
R d(1, 2, 3, 4);
R sum = v_reduce_sum4(a, b, c, d);
Data<R> res = sum;
EXPECT_EQ(0.1234f, res[0]);
EXPECT_EQ(4321.0f, res[1]);
EXPECT_EQ(12.34f, res[2]);
EXPECT_EQ(10.0f, res[3]);
return *this;
}
TheTest & test_loadstore_fp16()
{
#if CV_FP16 && CV_SIMD128
@ -986,6 +1003,7 @@ TEST(hal_intrin, float32x4) {
.test_float_cvt64()
.test_matmul()
.test_transpose()
.test_reduce_sum4()
;
}

Loading…
Cancel
Save