diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index d775383af3..b1ad94ddfa 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -294,7 +294,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const #if CV_NEON - int CV_DECL_ALIGNED(16) nA11[] = {0, 0, 0, 0}, nA12[] = {0, 0, 0, 0}, nA22[] = {0, 0, 0, 0}; + float CV_DECL_ALIGNED(16) nA11[] = { 0, 0, 0, 0 }, nA12[] = { 0, 0, 0, 0 }, nA22[] = { 0, 0, 0, 0 }; const int shifter1 = -(W_BITS - 5); //negative so it shifts right const int shifter2 = -(W_BITS); @@ -406,19 +406,19 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const q6 = vaddq_s32(q6, q8); q7 = vmull_s16(d4d5.val[0], d28); - int32x4_t nq0 = vmull_s16(d4d5.val[1], d28); + int32x4_t q14 = vmull_s16(d4d5.val[1], d28); q8 = vmull_s16(d6d7.val[0], d29); int32x4_t q15 = vmull_s16(d6d7.val[1], d29); q7 = vaddq_s32(q7, q8); - nq0 = vaddq_s32(nq0, q15); + q14 = vaddq_s32(q14, q15); q4 = vaddq_s32(q4, q7); - q6 = vaddq_s32(q6, nq0); + q6 = vaddq_s32(q6, q14); - int32x4_t nq1 = vld1q_s32(nA12); - int32x4_t nq2 = vld1q_s32(nA22); - nq0 = vld1q_s32(nA11); + float32x4_t nq0 = vld1q_f32(nA11); + float32x4_t nq1 = vld1q_f32(nA12); + float32x4_t nq2 = vld1q_f32(nA22); q4 = vqrshlq_s32(q4, q12); q6 = vqrshlq_s32(q6, q12); @@ -427,13 +427,13 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const q8 = vmulq_s32(q4, q6); q15 = vmulq_s32(q6, q6); - nq0 = vaddq_s32(nq0, q7); - nq1 = vaddq_s32(nq1, q8); - nq2 = vaddq_s32(nq2, q15); + nq0 = vaddq_f32(nq0, vreinterpretq_f32_s32(q7)); + nq1 = vaddq_f32(nq1, vreinterpretq_f32_s32(q8)); + nq2 = vaddq_f32(nq2, vreinterpretq_f32_s32(q15)); - vst1q_s32(nA11, nq0); - vst1q_s32(nA12, nq1); - vst1q_s32(nA22, nq2); + vst1q_f32(nA11, nq0); + vst1q_f32(nA12, nq1); + vst1q_f32(nA22, nq2); int16x4_t d8 = vmovn_s32(q4); int16x4_t d12 = vmovn_s32(q6); @@ -474,9 +474,9 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const #endif #if CV_NEON - iA11 += (float)(nA11[0] + nA11[1] + nA11[2] + nA11[3]); - iA12 += (float)(nA12[0] + nA12[1] + nA12[2] + nA12[3]); - iA22 += (float)(nA22[0] + nA22[1] + nA22[2] + nA22[3]); + iA11 += nA11[0] + nA11[1] + nA11[2] + nA11[3]; + iA12 += nA12[0] + nA12[1] + nA12[2] + nA12[3]; + iA22 += nA22[0] + nA22[1] + nA22[2] + nA22[3]; #endif A11 = iA11*FLT_SCALE; @@ -530,7 +530,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const #endif #if CV_NEON - int CV_DECL_ALIGNED(16) nB1[] = {0,0,0,0}, nB2[] = {0,0,0,0}; + float CV_DECL_ALIGNED(16) nB1[] = { 0,0,0,0 }, nB2[] = { 0,0,0,0 }; const int16x4_t d26_2 = vdup_n_s16((int16_t)iw00); const int16x4_t d27_2 = vdup_n_s16((int16_t)iw01); @@ -621,8 +621,8 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const nq5 = vqrshlq_s32(nq5, q11); int16x8x2_t q0q1 = vld2q_s16(dIptr); - nq11 = vld1q_s32(nB1); - int32x4_t nq15 = vld1q_s32(nB2); + float32x4_t nB1v = vld1q_f32(nB1); + float32x4_t nB2v = vld1q_f32(nB2); nq4 = vsubq_s32(nq4, nq6); nq5 = vsubq_s32(nq5, nq8); @@ -642,11 +642,11 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const nq9 = vaddq_s32(nq9, nq10); nq4 = vaddq_s32(nq4, nq5); - nq11 = vaddq_s32(nq11, nq9); - nq15 = vaddq_s32(nq15, nq4); + nB1v = vaddq_f32(nB1v, vreinterpretq_f32_s32(nq9)); + nB2v = vaddq_f32(nB2v, vreinterpretq_f32_s32(nq4)); - vst1q_s32(nB1, nq11); - vst1q_s32(nB2, nq15); + vst1q_f32(nB1, nB1v); + vst1q_f32(nB2, nB2v); } #endif