|
|
@ -41,6 +41,7 @@ |
|
|
|
//M*/
|
|
|
|
//M*/
|
|
|
|
|
|
|
|
|
|
|
|
#include "precomp.hpp" |
|
|
|
#include "precomp.hpp" |
|
|
|
|
|
|
|
#include "opencv2/core/hal/intrin.hpp" |
|
|
|
#include "opencl_kernels_video.hpp" |
|
|
|
#include "opencl_kernels_video.hpp" |
|
|
|
|
|
|
|
|
|
|
|
using namespace std; |
|
|
|
using namespace std; |
|
|
@ -517,7 +518,7 @@ inline float processPatch(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar * |
|
|
|
int I0_stride, int I1_stride, float w00, float w01, float w10, float w11, int patch_sz) |
|
|
|
int I0_stride, int I1_stride, float w00, float w01, float w10, float w11, int patch_sz) |
|
|
|
{ |
|
|
|
{ |
|
|
|
float SSD = 0.0f; |
|
|
|
float SSD = 0.0f; |
|
|
|
#ifdef CV_SIMD128 |
|
|
|
#if CV_SIMD128 |
|
|
|
if (patch_sz == 8) |
|
|
|
if (patch_sz == 8) |
|
|
|
{ |
|
|
|
{ |
|
|
|
/* Variables to accumulate the sums */ |
|
|
|
/* Variables to accumulate the sums */ |
|
|
@ -569,7 +570,7 @@ inline float processPatch(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar * |
|
|
|
dst_dUx += diff * I0x_ptr[i * I0_stride + j]; |
|
|
|
dst_dUx += diff * I0x_ptr[i * I0_stride + j]; |
|
|
|
dst_dUy += diff * I0y_ptr[i * I0_stride + j]; |
|
|
|
dst_dUy += diff * I0y_ptr[i * I0_stride + j]; |
|
|
|
} |
|
|
|
} |
|
|
|
#ifdef CV_SIMD128 |
|
|
|
#if CV_SIMD128 |
|
|
|
} |
|
|
|
} |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
return SSD; |
|
|
|
return SSD; |
|
|
@ -586,7 +587,7 @@ inline float processPatchMeanNorm(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, |
|
|
|
float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0; |
|
|
|
float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0; |
|
|
|
float n = (float)patch_sz * patch_sz; |
|
|
|
float n = (float)patch_sz * patch_sz; |
|
|
|
|
|
|
|
|
|
|
|
#ifdef CV_SIMD128 |
|
|
|
#if CV_SIMD128 |
|
|
|
if (patch_sz == 8) |
|
|
|
if (patch_sz == 8) |
|
|
|
{ |
|
|
|
{ |
|
|
|
/* Variables to accumulate the sums */ |
|
|
|
/* Variables to accumulate the sums */ |
|
|
@ -641,7 +642,7 @@ inline float processPatchMeanNorm(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, |
|
|
|
sum_I0x_mul += diff * I0x_ptr[i * I0_stride + j]; |
|
|
|
sum_I0x_mul += diff * I0x_ptr[i * I0_stride + j]; |
|
|
|
sum_I0y_mul += diff * I0y_ptr[i * I0_stride + j]; |
|
|
|
sum_I0y_mul += diff * I0y_ptr[i * I0_stride + j]; |
|
|
|
} |
|
|
|
} |
|
|
|
#ifdef CV_SIMD128 |
|
|
|
#if CV_SIMD128 |
|
|
|
} |
|
|
|
} |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n; |
|
|
|
dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n; |
|
|
@ -654,7 +655,7 @@ inline float computeSSD(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stri |
|
|
|
float w11, int patch_sz) |
|
|
|
float w11, int patch_sz) |
|
|
|
{ |
|
|
|
{ |
|
|
|
float SSD = 0.0f; |
|
|
|
float SSD = 0.0f; |
|
|
|
#ifdef CV_SIMD128 |
|
|
|
#if CV_SIMD128 |
|
|
|
if (patch_sz == 8) |
|
|
|
if (patch_sz == 8) |
|
|
|
{ |
|
|
|
{ |
|
|
|
v_float32x4 SSD_vec = v_setall_f32(0); |
|
|
|
v_float32x4 SSD_vec = v_setall_f32(0); |
|
|
@ -679,7 +680,7 @@ inline float computeSSD(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stri |
|
|
|
I0_ptr[i * I0_stride + j]; |
|
|
|
I0_ptr[i * I0_stride + j]; |
|
|
|
SSD += diff * diff; |
|
|
|
SSD += diff * diff; |
|
|
|
} |
|
|
|
} |
|
|
|
#ifdef CV_SIMD128 |
|
|
|
#if CV_SIMD128 |
|
|
|
} |
|
|
|
} |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
return SSD; |
|
|
|
return SSD; |
|
|
@ -691,7 +692,7 @@ inline float computeSSDMeanNorm(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int |
|
|
|
{ |
|
|
|
{ |
|
|
|
float sum_diff = 0.0f, sum_diff_sq = 0.0f; |
|
|
|
float sum_diff = 0.0f, sum_diff_sq = 0.0f; |
|
|
|
float n = (float)patch_sz * patch_sz; |
|
|
|
float n = (float)patch_sz * patch_sz; |
|
|
|
#ifdef CV_SIMD128 |
|
|
|
#if CV_SIMD128 |
|
|
|
if (patch_sz == 8) |
|
|
|
if (patch_sz == 8) |
|
|
|
{ |
|
|
|
{ |
|
|
|
v_float32x4 sum_diff_vec = v_setall_f32(0); |
|
|
|
v_float32x4 sum_diff_vec = v_setall_f32(0); |
|
|
@ -721,7 +722,7 @@ inline float computeSSDMeanNorm(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int |
|
|
|
sum_diff += diff; |
|
|
|
sum_diff += diff; |
|
|
|
sum_diff_sq += diff * diff; |
|
|
|
sum_diff_sq += diff * diff; |
|
|
|
} |
|
|
|
} |
|
|
|
#ifdef CV_SIMD128 |
|
|
|
#if CV_SIMD128 |
|
|
|
} |
|
|
|
} |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
return sum_diff_sq - sum_diff * sum_diff / n; |
|
|
|
return sum_diff_sq - sum_diff * sum_diff / n; |
|
|
|