fix for cornerHarris

pull/3326/head
Ilya Lavrenov 10 years ago
parent bbc161e1cb
commit 1c491c42cd
  1. 11
      modules/core/src/stat.cpp
  2. 4
      modules/imgproc/src/corner.cpp

@ -2051,17 +2051,6 @@ float normL2Sqr_(const float* a, const float* b, int n)
d = buf[0] + buf[1] + buf[2] + buf[3];
}
else
#elif CV_NEON
float32x4_t v_sum = vdupq_n_f32(0.0f);
for ( ; j <= n - 4; j += 4)
{
float32x4_t v_diff = vmulq_f32(vld1q_f32(a + j), vld1q_f32(b + j));
v_sum = vaddq_f32(v_sum, vmulq_f32(v_diff, v_diff));
}
float CV_DECL_ALIGNED(16) buf[4];
vst1q_f32(buf, v_sum);
d = buf[0] + buf[1] + buf[2] + buf[3];
#endif
{
for( ; j <= n - 4; j += 4 )

@ -126,7 +126,7 @@ static void calcHarris( const Mat& _cov, Mat& _dst, double k )
if( simd )
{
__m128 k4 = _mm_set1_ps((float)k);
for( ; j <= size.width - 5; j += 4 )
for( ; j <= size.width - 4; j += 4 )
{
__m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x
__m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x
@ -151,7 +151,7 @@ static void calcHarris( const Mat& _cov, Mat& _dst, double k )
for( ; j <= size.width - 4; j += 4 )
{
float32x4x3_t v_src = vld3q_f32(cov + j + 3);
float32x4x3_t v_src = vld3q_f32(cov + j * 3);
float32x4_t v_a = v_src.val[0], v_b = v_src.val[1], v_c = v_src.val[2];
float32x4_t v_ac_bb = vmlsq_f32(vmulq_f32(v_a, v_c), v_b, v_b);
float32x4_t v_ac = vaddq_f32(v_a, v_c);

Loading…
Cancel
Save