|
|
|
@ -294,7 +294,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const |
|
|
|
|
|
|
|
|
|
#if CV_NEON |
|
|
|
|
|
|
|
|
|
int CV_DECL_ALIGNED(16) nA11[] = {0, 0, 0, 0}, nA12[] = {0, 0, 0, 0}, nA22[] = {0, 0, 0, 0}; |
|
|
|
|
float CV_DECL_ALIGNED(16) nA11[] = { 0, 0, 0, 0 }, nA12[] = { 0, 0, 0, 0 }, nA22[] = { 0, 0, 0, 0 }; |
|
|
|
|
const int shifter1 = -(W_BITS - 5); //negative so it shifts right
|
|
|
|
|
const int shifter2 = -(W_BITS); |
|
|
|
|
|
|
|
|
@ -406,19 +406,19 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const |
|
|
|
|
q6 = vaddq_s32(q6, q8); |
|
|
|
|
|
|
|
|
|
q7 = vmull_s16(d4d5.val[0], d28); |
|
|
|
|
int32x4_t nq0 = vmull_s16(d4d5.val[1], d28); |
|
|
|
|
int32x4_t q14 = vmull_s16(d4d5.val[1], d28); |
|
|
|
|
q8 = vmull_s16(d6d7.val[0], d29); |
|
|
|
|
int32x4_t q15 = vmull_s16(d6d7.val[1], d29); |
|
|
|
|
|
|
|
|
|
q7 = vaddq_s32(q7, q8); |
|
|
|
|
nq0 = vaddq_s32(nq0, q15); |
|
|
|
|
q14 = vaddq_s32(q14, q15); |
|
|
|
|
|
|
|
|
|
q4 = vaddq_s32(q4, q7); |
|
|
|
|
q6 = vaddq_s32(q6, nq0); |
|
|
|
|
q6 = vaddq_s32(q6, q14); |
|
|
|
|
|
|
|
|
|
int32x4_t nq1 = vld1q_s32(nA12); |
|
|
|
|
int32x4_t nq2 = vld1q_s32(nA22); |
|
|
|
|
nq0 = vld1q_s32(nA11); |
|
|
|
|
float32x4_t nq0 = vld1q_f32(nA11); |
|
|
|
|
float32x4_t nq1 = vld1q_f32(nA12); |
|
|
|
|
float32x4_t nq2 = vld1q_f32(nA22); |
|
|
|
|
|
|
|
|
|
q4 = vqrshlq_s32(q4, q12); |
|
|
|
|
q6 = vqrshlq_s32(q6, q12); |
|
|
|
@ -427,13 +427,13 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const |
|
|
|
|
q8 = vmulq_s32(q4, q6); |
|
|
|
|
q15 = vmulq_s32(q6, q6); |
|
|
|
|
|
|
|
|
|
nq0 = vaddq_s32(nq0, q7); |
|
|
|
|
nq1 = vaddq_s32(nq1, q8); |
|
|
|
|
nq2 = vaddq_s32(nq2, q15); |
|
|
|
|
nq0 = vaddq_f32(nq0, vreinterpretq_f32_s32(q7)); |
|
|
|
|
nq1 = vaddq_f32(nq1, vreinterpretq_f32_s32(q8)); |
|
|
|
|
nq2 = vaddq_f32(nq2, vreinterpretq_f32_s32(q15)); |
|
|
|
|
|
|
|
|
|
vst1q_s32(nA11, nq0); |
|
|
|
|
vst1q_s32(nA12, nq1); |
|
|
|
|
vst1q_s32(nA22, nq2); |
|
|
|
|
vst1q_f32(nA11, nq0); |
|
|
|
|
vst1q_f32(nA12, nq1); |
|
|
|
|
vst1q_f32(nA22, nq2); |
|
|
|
|
|
|
|
|
|
int16x4_t d8 = vmovn_s32(q4); |
|
|
|
|
int16x4_t d12 = vmovn_s32(q6); |
|
|
|
@ -474,9 +474,9 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if CV_NEON |
|
|
|
|
iA11 += (float)(nA11[0] + nA11[1] + nA11[2] + nA11[3]); |
|
|
|
|
iA12 += (float)(nA12[0] + nA12[1] + nA12[2] + nA12[3]); |
|
|
|
|
iA22 += (float)(nA22[0] + nA22[1] + nA22[2] + nA22[3]); |
|
|
|
|
iA11 += nA11[0] + nA11[1] + nA11[2] + nA11[3]; |
|
|
|
|
iA12 += nA12[0] + nA12[1] + nA12[2] + nA12[3]; |
|
|
|
|
iA22 += nA22[0] + nA22[1] + nA22[2] + nA22[3]; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
A11 = iA11*FLT_SCALE; |
|
|
|
@ -530,7 +530,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if CV_NEON |
|
|
|
|
int CV_DECL_ALIGNED(16) nB1[] = {0,0,0,0}, nB2[] = {0,0,0,0}; |
|
|
|
|
float CV_DECL_ALIGNED(16) nB1[] = { 0,0,0,0 }, nB2[] = { 0,0,0,0 }; |
|
|
|
|
|
|
|
|
|
const int16x4_t d26_2 = vdup_n_s16((int16_t)iw00); |
|
|
|
|
const int16x4_t d27_2 = vdup_n_s16((int16_t)iw01); |
|
|
|
@ -621,8 +621,8 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const |
|
|
|
|
nq5 = vqrshlq_s32(nq5, q11); |
|
|
|
|
|
|
|
|
|
int16x8x2_t q0q1 = vld2q_s16(dIptr); |
|
|
|
|
nq11 = vld1q_s32(nB1); |
|
|
|
|
int32x4_t nq15 = vld1q_s32(nB2); |
|
|
|
|
float32x4_t nB1v = vld1q_f32(nB1); |
|
|
|
|
float32x4_t nB2v = vld1q_f32(nB2); |
|
|
|
|
|
|
|
|
|
nq4 = vsubq_s32(nq4, nq6); |
|
|
|
|
nq5 = vsubq_s32(nq5, nq8); |
|
|
|
@ -642,11 +642,11 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const |
|
|
|
|
nq9 = vaddq_s32(nq9, nq10); |
|
|
|
|
nq4 = vaddq_s32(nq4, nq5); |
|
|
|
|
|
|
|
|
|
nq11 = vaddq_s32(nq11, nq9); |
|
|
|
|
nq15 = vaddq_s32(nq15, nq4); |
|
|
|
|
nB1v = vaddq_f32(nB1v, vreinterpretq_f32_s32(nq9)); |
|
|
|
|
nB2v = vaddq_f32(nB2v, vreinterpretq_f32_s32(nq4)); |
|
|
|
|
|
|
|
|
|
vst1q_s32(nB1, nq11); |
|
|
|
|
vst1q_s32(nB2, nq15); |
|
|
|
|
vst1q_f32(nB1, nB1v); |
|
|
|
|
vst1q_f32(nB2, nB2v); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|