diff --git a/modules/optflow/src/dis_flow.cpp b/modules/optflow/src/dis_flow.cpp index 77d62e971..d37a497f3 100644 --- a/modules/optflow/src/dis_flow.cpp +++ b/modules/optflow/src/dis_flow.cpp @@ -41,8 +41,10 @@ //M*/ #include "precomp.hpp" +#include "opencv2/core/hal/intrin.hpp" #include "opencl_kernels_optflow.hpp" + using namespace std; #define EPS 0.001F #define INF 1E+10F @@ -519,7 +521,7 @@ inline float processPatch(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar * int I0_stride, int I1_stride, float w00, float w01, float w10, float w11, int patch_sz) { float SSD = 0.0f; -#ifdef CV_SIMD128 +#if CV_SIMD128 if (patch_sz == 8) { /* Variables to accumulate the sums */ @@ -571,7 +573,7 @@ inline float processPatch(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar * dst_dUx += diff * I0x_ptr[i * I0_stride + j]; dst_dUy += diff * I0y_ptr[i * I0_stride + j]; } -#ifdef CV_SIMD128 +#if CV_SIMD128 } #endif return SSD; @@ -588,7 +590,7 @@ inline float processPatchMeanNorm(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0; float n = (float)patch_sz * patch_sz; -#ifdef CV_SIMD128 +#if CV_SIMD128 if (patch_sz == 8) { /* Variables to accumulate the sums */ @@ -643,7 +645,7 @@ inline float processPatchMeanNorm(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, sum_I0x_mul += diff * I0x_ptr[i * I0_stride + j]; sum_I0y_mul += diff * I0y_ptr[i * I0_stride + j]; } -#ifdef CV_SIMD128 +#if CV_SIMD128 } #endif dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n; @@ -656,7 +658,7 @@ inline float computeSSD(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stri float w11, int patch_sz) { float SSD = 0.0f; -#ifdef CV_SIMD128 +#if CV_SIMD128 if (patch_sz == 8) { v_float32x4 SSD_vec = v_setall_f32(0); @@ -681,7 +683,7 @@ inline float computeSSD(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stri I0_ptr[i * I0_stride + j]; SSD += diff * diff; } -#ifdef CV_SIMD128 +#if CV_SIMD128 } #endif return SSD; @@ -693,7 +695,7 @@ inline float computeSSDMeanNorm(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int { float sum_diff = 0.0f, sum_diff_sq = 0.0f; float n = (float)patch_sz * patch_sz; -#ifdef CV_SIMD128 +#if CV_SIMD128 if (patch_sz == 8) { v_float32x4 sum_diff_vec = v_setall_f32(0); @@ -723,7 +725,7 @@ inline float computeSSDMeanNorm(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int sum_diff += diff; sum_diff_sq += diff * diff; } -#ifdef CV_SIMD128 +#if CV_SIMD128 } #endif return sum_diff_sq - sum_diff * sum_diff / n; diff --git a/modules/optflow/src/variational_refinement.cpp b/modules/optflow/src/variational_refinement.cpp index 8fe8e0415..f52236e5b 100644 --- a/modules/optflow/src/variational_refinement.cpp +++ b/modules/optflow/src/variational_refinement.cpp @@ -597,7 +597,7 @@ void VariationalRefinementImpl::ComputeDataTerm_ParBody::operator()(const Range #undef INIT_ROW_POINTERS int j = 0; -#ifdef CV_SIMD128 +#if CV_SIMD128 v_float32x4 zeta_vec = v_setall_f32(zeta_squared); v_float32x4 eps_vec = v_setall_f32(epsilon_squared); v_float32x4 delta_vec = v_setall_f32(delta2); @@ -803,7 +803,7 @@ void VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::operator() pA_v_next[j] += pWeight[j]; int j = 0; -#ifdef CV_SIMD128 +#if CV_SIMD128 v_float32x4 alpha2_vec = v_setall_f32(alpha2); v_float32x4 eps_vec = v_setall_f32(epsilon_squared); v_float32x4 cW_u_vec, cW_v_vec; @@ -913,7 +913,7 @@ void VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::operator( #undef INIT_ROW_POINTERS int j = 0; -#ifdef CV_SIMD128 +#if CV_SIMD128 v_float32x4 pWeight_vec, uy_vec, vy_vec; for (; j < len - 3; j += 4) { @@ -1015,7 +1015,7 @@ void VariationalRefinementImpl::RedBlackSOR_ParBody::operator()(const Range &ran #undef INIT_ROW_POINTERS j = 0; -#ifdef CV_SIMD128 +#if CV_SIMD128 v_float32x4 pW_prev_vec = v_setall_f32(pW_next[-1]); v_float32x4 pdu_prev_vec = v_setall_f32(pdu_next[-1]); v_float32x4 pdv_prev_vec = v_setall_f32(pdv_next[-1]);