|
|
@ -39,6 +39,8 @@ |
|
|
|
//
|
|
|
|
//
|
|
|
|
//M*/
|
|
|
|
//M*/
|
|
|
|
#include "precomp.hpp" |
|
|
|
#include "precomp.hpp" |
|
|
|
|
|
|
|
#include "opencv2/core/hal/intrin.hpp" |
|
|
|
|
|
|
|
|
|
|
|
namespace cv |
|
|
|
namespace cv |
|
|
|
{ |
|
|
|
{ |
|
|
|
|
|
|
|
|
|
|
@ -746,53 +748,105 @@ static Rect pointSetBoundingRect( const Mat& points ) |
|
|
|
if( npoints == 0 ) |
|
|
|
if( npoints == 0 ) |
|
|
|
return Rect(); |
|
|
|
return Rect(); |
|
|
|
|
|
|
|
|
|
|
|
const Point* pts = points.ptr<Point>(); |
|
|
|
#if CV_SIMD |
|
|
|
Point pt = pts[0]; |
|
|
|
const int64_t* pts = points.ptr<int64_t>(); |
|
|
|
|
|
|
|
|
|
|
|
#if CV_SSE4_2 |
|
|
|
|
|
|
|
if(cv::checkHardwareSupport(CV_CPU_SSE4_2)) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
if( !is_float ) |
|
|
|
if( !is_float ) |
|
|
|
{ |
|
|
|
{ |
|
|
|
__m128i minval, maxval; |
|
|
|
v_int32 minval, maxval; |
|
|
|
minval = maxval = _mm_loadl_epi64((const __m128i*)(&pt)); //min[0]=pt.x, min[1]=pt.y
|
|
|
|
minval = maxval = v_reinterpret_as_s32(vx_setall_s64(*pts)); //min[0]=pt.x, min[1]=pt.y, min[2]=pt.x, min[3]=pt.y
|
|
|
|
|
|
|
|
for( i = 1; i <= npoints - v_int32::nlanes/2; i+= v_int32::nlanes/2 ) |
|
|
|
for( i = 1; i < npoints; i++ ) |
|
|
|
|
|
|
|
{ |
|
|
|
{ |
|
|
|
__m128i ptXY = _mm_loadl_epi64((const __m128i*)&pts[i]); |
|
|
|
v_int32 ptXY2 = v_reinterpret_as_s32(vx_load(pts + i)); |
|
|
|
minval = _mm_min_epi32(ptXY, minval); |
|
|
|
minval = v_min(ptXY2, minval); |
|
|
|
maxval = _mm_max_epi32(ptXY, maxval); |
|
|
|
maxval = v_max(ptXY2, maxval); |
|
|
|
} |
|
|
|
} |
|
|
|
xmin = _mm_cvtsi128_si32(minval); |
|
|
|
minval = v_min(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval)))); |
|
|
|
ymin = _mm_cvtsi128_si32(_mm_srli_si128(minval, 4)); |
|
|
|
maxval = v_max(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval)))); |
|
|
|
xmax = _mm_cvtsi128_si32(maxval); |
|
|
|
if( i <= npoints - v_int32::nlanes/4 ) |
|
|
|
ymax = _mm_cvtsi128_si32(_mm_srli_si128(maxval, 4)); |
|
|
|
{ |
|
|
|
|
|
|
|
v_int32 ptXY = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(vx_load_low(pts + i)))); |
|
|
|
|
|
|
|
minval = v_min(ptXY, minval); |
|
|
|
|
|
|
|
maxval = v_max(ptXY, maxval); |
|
|
|
|
|
|
|
i += v_int64::nlanes/2; |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
for(int j = 16; j < CV_SIMD_WIDTH; j*=2) |
|
|
|
{ |
|
|
|
{ |
|
|
|
__m128 minvalf, maxvalf, z = _mm_setzero_ps(), ptXY = _mm_setzero_ps(); |
|
|
|
minval = v_min(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval)))); |
|
|
|
minvalf = maxvalf = _mm_loadl_pi(z, (const __m64*)(&pt)); |
|
|
|
maxval = v_max(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval)))); |
|
|
|
|
|
|
|
} |
|
|
|
for( i = 1; i < npoints; i++ ) |
|
|
|
xmin = minval.get0(); |
|
|
|
|
|
|
|
xmax = maxval.get0(); |
|
|
|
|
|
|
|
ymin = v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))).get0(); |
|
|
|
|
|
|
|
ymax = v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))).get0(); |
|
|
|
|
|
|
|
#if CV_SIMD_WIDTH > 16 |
|
|
|
|
|
|
|
if( i < npoints ) |
|
|
|
{ |
|
|
|
{ |
|
|
|
ptXY = _mm_loadl_pi(ptXY, (const __m64*)&pts[i]); |
|
|
|
v_int32x4 minval2, maxval2; |
|
|
|
|
|
|
|
minval2 = maxval2 = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i)))); |
|
|
|
minvalf = _mm_min_ps(minvalf, ptXY); |
|
|
|
for( i++; i < npoints; i++ ) |
|
|
|
maxvalf = _mm_max_ps(maxvalf, ptXY); |
|
|
|
{ |
|
|
|
|
|
|
|
v_int32x4 ptXY = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i)))); |
|
|
|
|
|
|
|
minval2 = v_min(ptXY, minval2); |
|
|
|
|
|
|
|
maxval2 = v_max(ptXY, maxval2); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
xmin = min(xmin, minval2.get0()); |
|
|
|
float xyminf[2], xymaxf[2]; |
|
|
|
xmax = max(xmax, maxval2.get0()); |
|
|
|
_mm_storel_pi((__m64*)xyminf, minvalf); |
|
|
|
ymin = min(ymin, v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval2))).get0()); |
|
|
|
_mm_storel_pi((__m64*)xymaxf, maxvalf); |
|
|
|
ymax = max(ymax, v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval2))).get0()); |
|
|
|
xmin = cvFloor(xyminf[0]); |
|
|
|
|
|
|
|
ymin = cvFloor(xyminf[1]); |
|
|
|
|
|
|
|
xmax = cvFloor(xymaxf[0]); |
|
|
|
|
|
|
|
ymax = cvFloor(xymaxf[1]); |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
else |
|
|
|
#endif |
|
|
|
|
|
|
|
{ |
|
|
|
{ |
|
|
|
|
|
|
|
v_float32 minval, maxval; |
|
|
|
|
|
|
|
minval = maxval = v_reinterpret_as_f32(vx_setall_s64(*pts)); //min[0]=pt.x, min[1]=pt.y, min[2]=pt.x, min[3]=pt.y
|
|
|
|
|
|
|
|
for( i = 1; i <= npoints - v_float32::nlanes/2; i+= v_float32::nlanes/2 ) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
v_float32 ptXY2 = v_reinterpret_as_f32(vx_load(pts + i)); |
|
|
|
|
|
|
|
minval = v_min(ptXY2, minval); |
|
|
|
|
|
|
|
maxval = v_max(ptXY2, maxval); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
minval = v_min(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval)))); |
|
|
|
|
|
|
|
maxval = v_max(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval)))); |
|
|
|
|
|
|
|
if( i <= npoints - v_float32::nlanes/4 ) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
v_float32 ptXY = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(vx_load_low(pts + i)))); |
|
|
|
|
|
|
|
minval = v_min(ptXY, minval); |
|
|
|
|
|
|
|
maxval = v_max(ptXY, maxval); |
|
|
|
|
|
|
|
i += v_float32::nlanes/4; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
for(int j = 16; j < CV_SIMD_WIDTH; j*=2) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
minval = v_min(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval)))); |
|
|
|
|
|
|
|
maxval = v_max(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval)))); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
xmin = cvFloor(minval.get0()); |
|
|
|
|
|
|
|
xmax = cvFloor(maxval.get0()); |
|
|
|
|
|
|
|
ymin = cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))).get0()); |
|
|
|
|
|
|
|
ymax = cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))).get0()); |
|
|
|
|
|
|
|
#if CV_SIMD_WIDTH > 16 |
|
|
|
|
|
|
|
if( i < npoints ) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
v_float32x4 minval2, maxval2; |
|
|
|
|
|
|
|
minval2 = maxval2 = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i)))); |
|
|
|
|
|
|
|
for( i++; i < npoints; i++ ) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
v_float32x4 ptXY = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i)))); |
|
|
|
|
|
|
|
minval2 = v_min(ptXY, minval2); |
|
|
|
|
|
|
|
maxval2 = v_max(ptXY, maxval2); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
xmin = min(xmin, cvFloor(minval2.get0())); |
|
|
|
|
|
|
|
xmax = max(xmax, cvFloor(maxval2.get0())); |
|
|
|
|
|
|
|
ymin = min(ymin, cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval2))).get0())); |
|
|
|
|
|
|
|
ymax = max(ymax, cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval2))).get0())); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
const Point* pts = points.ptr<Point>(); |
|
|
|
|
|
|
|
Point pt = pts[0]; |
|
|
|
|
|
|
|
|
|
|
|
if( !is_float ) |
|
|
|
if( !is_float ) |
|
|
|
{ |
|
|
|
{ |
|
|
|
xmin = xmax = pt.x; |
|
|
|
xmin = xmax = pt.x; |
|
|
@ -848,7 +902,7 @@ static Rect pointSetBoundingRect( const Mat& points ) |
|
|
|
v.i = CV_TOGGLE_FLT(xmax); xmax = cvFloor(v.f); |
|
|
|
v.i = CV_TOGGLE_FLT(xmax); xmax = cvFloor(v.f); |
|
|
|
v.i = CV_TOGGLE_FLT(ymax); ymax = cvFloor(v.f); |
|
|
|
v.i = CV_TOGGLE_FLT(ymax); ymax = cvFloor(v.f); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
return Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1); |
|
|
|
return Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1); |
|
|
|
} |
|
|
|
} |
|
|
|