Merge pull request #2913 from ilya-lavrenov:sse2_precornerdetect

pull/2940/head
Vadim Pisarevsky 11 years ago
commit d6233b13ba
  1. 26
      modules/imgproc/src/corner.cpp

@ -608,6 +608,11 @@ void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int bord
factor *= 255; factor *= 255;
factor = 1./(factor * factor * factor); factor = 1./(factor * factor * factor);
#if CV_SSE2
volatile bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
__m128 v_factor = _mm_set1_ps((float)factor), v_m2 = _mm_set1_ps(-2.0f);
#endif
Size size = src.size(); Size size = src.size();
int i, j; int i, j;
for( i = 0; i < size.height; i++ ) for( i = 0; i < size.height; i++ )
@ -619,7 +624,26 @@ void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int bord
const float* d2ydata = (const float*)(D2y.data + i*D2y.step); const float* d2ydata = (const float*)(D2y.data + i*D2y.step);
const float* dxydata = (const float*)(Dxy.data + i*Dxy.step); const float* dxydata = (const float*)(Dxy.data + i*Dxy.step);
for( j = 0; j < size.width; j++ ) j = 0;
#if CV_SSE2
if (haveSSE2)
{
for( ; j <= size.width - 4; j += 4 )
{
__m128 v_dx = _mm_loadu_ps((const float *)(dxdata + j));
__m128 v_dy = _mm_loadu_ps((const float *)(dydata + j));
__m128 v_s1 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dx), _mm_loadu_ps((const float *)(d2ydata + j)));
__m128 v_s2 = _mm_mul_ps(_mm_mul_ps(v_dy, v_dy), _mm_loadu_ps((const float *)(d2xdata + j)));
__m128 v_s3 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dy), _mm_loadu_ps((const float *)(dxydata + j)));
v_s1 = _mm_mul_ps(v_factor, _mm_add_ps(v_s1, _mm_add_ps(v_s2, _mm_mul_ps(v_s3, v_m2))));
_mm_storeu_ps(dstdata + j, v_s1);
}
}
#endif
for( ; j < size.width; j++ )
{ {
float dx = dxdata[j]; float dx = dxdata[j];
float dy = dydata[j]; float dy = dydata[j];

Loading…
Cancel
Save