Merge pull request #13650 from terfendail:shapedescr_wintr

pull/13658/head
Alexander Alekhin 6 years ago
commit 0395b2ea9c
  1. 22
      modules/imgproc/perf/perf_contours.cpp
  2. 124
      modules/imgproc/src/shapedescr.cpp

@ -84,4 +84,26 @@ PERF_TEST_P(TestFindContoursFF, findContours,
SANITY_CHECK_NOTHING(); SANITY_CHECK_NOTHING();
} }
typedef TestBaseWithParam< tuple<MatDepth, int> > TestBoundingRect;
PERF_TEST_P(TestBoundingRect, BoundingRect,
Combine(
testing::Values(CV_32S, CV_32F), // points type
Values(400, 511, 1000, 10000, 100000) // points count
)
)
{
int ptType = get<0>(GetParam());
int n = get<1>(GetParam());
Mat pts(n, 2, ptType);
declare.in(pts, WARMUP_RNG);
cv::Rect rect;
TEST_CYCLE() rect = boundingRect(pts);
SANITY_CHECK_NOTHING();
}
} } // namespace } } // namespace

@ -39,6 +39,8 @@
// //
//M*/ //M*/
#include "precomp.hpp" #include "precomp.hpp"
#include "opencv2/core/hal/intrin.hpp"
namespace cv namespace cv
{ {
@ -746,53 +748,105 @@ static Rect pointSetBoundingRect( const Mat& points )
if( npoints == 0 ) if( npoints == 0 )
return Rect(); return Rect();
const Point* pts = points.ptr<Point>(); #if CV_SIMD
Point pt = pts[0]; const int64_t* pts = points.ptr<int64_t>();
#if CV_SSE4_2
if(cv::checkHardwareSupport(CV_CPU_SSE4_2))
{
if( !is_float ) if( !is_float )
{ {
__m128i minval, maxval; v_int32 minval, maxval;
minval = maxval = _mm_loadl_epi64((const __m128i*)(&pt)); //min[0]=pt.x, min[1]=pt.y minval = maxval = v_reinterpret_as_s32(vx_setall_s64(*pts)); //min[0]=pt.x, min[1]=pt.y, min[2]=pt.x, min[3]=pt.y
for( i = 1; i <= npoints - v_int32::nlanes/2; i+= v_int32::nlanes/2 )
for( i = 1; i < npoints; i++ )
{ {
__m128i ptXY = _mm_loadl_epi64((const __m128i*)&pts[i]); v_int32 ptXY2 = v_reinterpret_as_s32(vx_load(pts + i));
minval = _mm_min_epi32(ptXY, minval); minval = v_min(ptXY2, minval);
maxval = _mm_max_epi32(ptXY, maxval); maxval = v_max(ptXY2, maxval);
} }
xmin = _mm_cvtsi128_si32(minval); minval = v_min(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))));
ymin = _mm_cvtsi128_si32(_mm_srli_si128(minval, 4)); maxval = v_max(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))));
xmax = _mm_cvtsi128_si32(maxval); if( i <= npoints - v_int32::nlanes/4 )
ymax = _mm_cvtsi128_si32(_mm_srli_si128(maxval, 4)); {
v_int32 ptXY = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(vx_load_low(pts + i))));
minval = v_min(ptXY, minval);
maxval = v_max(ptXY, maxval);
i += v_int64::nlanes/2;
} }
else for(int j = 16; j < CV_SIMD_WIDTH; j*=2)
{ {
__m128 minvalf, maxvalf, z = _mm_setzero_ps(), ptXY = _mm_setzero_ps(); minval = v_min(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))));
minvalf = maxvalf = _mm_loadl_pi(z, (const __m64*)(&pt)); maxval = v_max(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))));
}
for( i = 1; i < npoints; i++ ) xmin = minval.get0();
xmax = maxval.get0();
ymin = v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))).get0();
ymax = v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))).get0();
#if CV_SIMD_WIDTH > 16
if( i < npoints )
{ {
ptXY = _mm_loadl_pi(ptXY, (const __m64*)&pts[i]); v_int32x4 minval2, maxval2;
minval2 = maxval2 = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
minvalf = _mm_min_ps(minvalf, ptXY); for( i++; i < npoints; i++ )
maxvalf = _mm_max_ps(maxvalf, ptXY); {
v_int32x4 ptXY = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
minval2 = v_min(ptXY, minval2);
maxval2 = v_max(ptXY, maxval2);
} }
xmin = min(xmin, minval2.get0());
float xyminf[2], xymaxf[2]; xmax = max(xmax, maxval2.get0());
_mm_storel_pi((__m64*)xyminf, minvalf); ymin = min(ymin, v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval2))).get0());
_mm_storel_pi((__m64*)xymaxf, maxvalf); ymax = max(ymax, v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval2))).get0());
xmin = cvFloor(xyminf[0]);
ymin = cvFloor(xyminf[1]);
xmax = cvFloor(xymaxf[0]);
ymax = cvFloor(xymaxf[1]);
} }
#endif
} }
else else
#endif
{ {
v_float32 minval, maxval;
minval = maxval = v_reinterpret_as_f32(vx_setall_s64(*pts)); //min[0]=pt.x, min[1]=pt.y, min[2]=pt.x, min[3]=pt.y
for( i = 1; i <= npoints - v_float32::nlanes/2; i+= v_float32::nlanes/2 )
{
v_float32 ptXY2 = v_reinterpret_as_f32(vx_load(pts + i));
minval = v_min(ptXY2, minval);
maxval = v_max(ptXY2, maxval);
}
minval = v_min(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))));
maxval = v_max(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))));
if( i <= npoints - v_float32::nlanes/4 )
{
v_float32 ptXY = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(vx_load_low(pts + i))));
minval = v_min(ptXY, minval);
maxval = v_max(ptXY, maxval);
i += v_float32::nlanes/4;
}
for(int j = 16; j < CV_SIMD_WIDTH; j*=2)
{
minval = v_min(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))));
maxval = v_max(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))));
}
xmin = cvFloor(minval.get0());
xmax = cvFloor(maxval.get0());
ymin = cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))).get0());
ymax = cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))).get0());
#if CV_SIMD_WIDTH > 16
if( i < npoints )
{
v_float32x4 minval2, maxval2;
minval2 = maxval2 = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
for( i++; i < npoints; i++ )
{
v_float32x4 ptXY = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
minval2 = v_min(ptXY, minval2);
maxval2 = v_max(ptXY, maxval2);
}
xmin = min(xmin, cvFloor(minval2.get0()));
xmax = max(xmax, cvFloor(maxval2.get0()));
ymin = min(ymin, cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval2))).get0()));
ymax = max(ymax, cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval2))).get0()));
}
#endif
}
#else
const Point* pts = points.ptr<Point>();
Point pt = pts[0];
if( !is_float ) if( !is_float )
{ {
xmin = xmax = pt.x; xmin = xmax = pt.x;
@ -848,7 +902,7 @@ static Rect pointSetBoundingRect( const Mat& points )
v.i = CV_TOGGLE_FLT(xmax); xmax = cvFloor(v.f); v.i = CV_TOGGLE_FLT(xmax); xmax = cvFloor(v.f);
v.i = CV_TOGGLE_FLT(ymax); ymax = cvFloor(v.f); v.i = CV_TOGGLE_FLT(ymax); ymax = cvFloor(v.f);
} }
} #endif
return Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1); return Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1);
} }

Loading…
Cancel
Save