Merge pull request #16421 from mshabunin:add-local-pool

pull/16539/head
Alexander Alekhin 5 years ago
commit 69944cd46b
  1. 270
      modules/calib3d/src/stereobm.cpp
  2. 103
      modules/core/include/opencv2/core/utils/buffer_area.private.hpp
  3. 121
      modules/core/src/buffer_area.cpp
  4. 129
      modules/core/test/test_utils.cpp

@ -48,8 +48,10 @@
#include "precomp.hpp" #include "precomp.hpp"
#include <stdio.h> #include <stdio.h>
#include <limits> #include <limits>
#include <vector>
#include "opencl_kernels_calib3d.hpp" #include "opencl_kernels_calib3d.hpp"
#include "opencv2/core/hal/intrin.hpp" #include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/utils/buffer_area.private.hpp"
namespace cv namespace cv
{ {
@ -85,6 +87,19 @@ struct StereoBMParams
Rect roi1, roi2; Rect roi1, roi2;
int disp12MaxDiff; int disp12MaxDiff;
int dispType; int dispType;
inline bool useShorts() const
{
return preFilterCap <= 31 && SADWindowSize <= 21;
}
inline bool useFilterSpeckles() const
{
return speckleRange >= 0 && speckleWindowSize > 0;
}
inline bool useNormPrefilter() const
{
return preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE;
}
}; };
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
@ -110,10 +125,10 @@ static bool ocl_prefilter_norm(InputArray _input, OutputArray _output, int winsi
} }
#endif #endif
static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf ) static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, int *buf )
{ {
int x, y, wsz2 = winsize/2; int x, y, wsz2 = winsize/2;
int* vsum = (int*)alignPtr(buf + (wsz2 + 1)*sizeof(vsum[0]), 32); int* vsum = buf + (wsz2 + 1);
int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2); int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2);
const int OFS = 256*5, TABSZ = OFS*2 + 256; const int OFS = 256*5, TABSZ = OFS*2 + 256;
uchar tab[TABSZ]; uchar tab[TABSZ];
@ -309,13 +324,77 @@ inline int dispDescale(int v1, int v2, int d)
return (int)(v1*256 + (d != 0 ? v2*256/d : 0)); // no need to add 127, this will be converted to float return (int)(v1*256 + (d != 0 ? v2*256/d : 0)); // no need to add 127, this will be converted to float
} }
class BufferBM
{
static const int TABSZ = 256;
public:
std::vector<int*> sad;
std::vector<int*> hsad;
std::vector<int*> htext;
std::vector<uchar*> cbuf0;
std::vector<ushort*> sad_short;
std::vector<ushort*> hsad_short;
int *prefilter[2];
uchar tab[TABSZ];
private:
utils::BufferArea area;
public:
BufferBM(size_t nstripes, size_t width, size_t height, const StereoBMParams& params)
: sad(nstripes, NULL),
hsad(nstripes, NULL),
htext(nstripes, NULL),
cbuf0(nstripes, NULL),
sad_short(nstripes, NULL),
hsad_short(nstripes, NULL)
{
const int wsz = params.SADWindowSize;
const int ndisp = params.numDisparities;
const int ftzero = params.preFilterCap;
for (size_t i = 0; i < nstripes; ++i)
{
// 1D: [1][ ndisp ][1]
#if CV_SIMD
if (params.useShorts())
area.allocate(sad_short[i], ndisp + 2);
else
#endif
area.allocate(sad[i], ndisp + 2);
// 2D: [ wsz/2 + 1 ][ height ][ wsz/2 + 1 ] * [ ndisp ]
#if CV_SIMD
if (params.useShorts())
area.allocate(hsad_short[i], (height + wsz + 2) * ndisp);
else
#endif
area.allocate(hsad[i], (height + wsz + 2) * ndisp);
// 1D: [ wsz/2 + 1 ][ height ][ wsz/2 + 1 ]
area.allocate(htext[i], (height + wsz + 2));
// 3D: [ wsz/2 + 1 ][ height ][ wsz/2 + 1 ] * [ ndisp ] * [ wsz/2 + 1 ][ wsz/2 + 1 ]
area.allocate(cbuf0[i], ((height + wsz + 2) * ndisp * (wsz + 2) + 256));
}
if (params.useNormPrefilter())
{
for (size_t i = 0; i < 2; ++i)
area.allocate(prefilter[0], width + params.preFilterSize + 2);
}
area.commit();
// static table
for (int x = 0; x < TABSZ; x++)
tab[x] = (uchar)std::abs(x - ftzero);
}
};
#if CV_SIMD #if CV_SIMD
template <typename dType> template <typename dType>
static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right, static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
Mat& disp, Mat& cost, StereoBMParams& state, Mat& disp, Mat& cost, const StereoBMParams& state,
uchar* buf, int _dy0, int _dy1 ) int _dy0, int _dy1, const BufferBM & bufX, size_t bufNum )
{ {
const int ALIGN = CV_SIMD_WIDTH;
int x, y, d; int x, y, d;
int wsz = state.SADWindowSize, wsz2 = wsz/2; int wsz = state.SADWindowSize, wsz2 = wsz/2;
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1); int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
@ -325,15 +404,13 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
int rofs = -MIN(ndisp - 1 + mindisp, 0); int rofs = -MIN(ndisp - 1 + mindisp, 0);
int width = left.cols, height = left.rows; int width = left.cols, height = left.rows;
int width1 = width - rofs - ndisp + 1; int width1 = width - rofs - ndisp + 1;
int ftzero = state.preFilterCap;
int textureThreshold = state.textureThreshold; int textureThreshold = state.textureThreshold;
int uniquenessRatio = state.uniquenessRatio; int uniquenessRatio = state.uniquenessRatio;
const int disp_shift = dispShiftTemplate<dType>::value; const int disp_shift = dispShiftTemplate<dType>::value;
dType FILTERED = (dType)((mindisp - 1) << disp_shift); dType FILTERED = (dType)((mindisp - 1) << disp_shift);
ushort *sad, *hsad0, *hsad, *hsad_sub; ushort *hsad, *hsad_sub;
int *htext; uchar *cbuf;
uchar *cbuf0, *cbuf;
const uchar* lptr0 = left.ptr() + lofs; const uchar* lptr0 = left.ptr() + lofs;
const uchar* rptr0 = right.ptr() + rofs; const uchar* rptr0 = right.ptr() + rofs;
const uchar *lptr, *lptr_sub, *rptr; const uchar *lptr, *lptr_sub, *rptr;
@ -343,23 +420,20 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
int cstep = (height + dy0 + dy1)*ndisp; int cstep = (height + dy0 + dy1)*ndisp;
short costbuf = 0; short costbuf = 0;
int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0; int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
const int TABSZ = 256; const uchar * tab = bufX.tab;
uchar tab[TABSZ];
short v_seq[v_int16::nlanes]; short v_seq[v_int16::nlanes];
for (short i = 0; i < v_int16::nlanes; ++i) for (short i = 0; i < v_int16::nlanes; ++i)
v_seq[i] = i; v_seq[i] = i;
sad = (ushort*)alignPtr(buf + sizeof(sad[0]), ALIGN); ushort *sad = bufX.sad_short[bufNum] + 1;
hsad0 = (ushort*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN); ushort *hsad0 = bufX.hsad_short[bufNum] + (wsz2 + 1) * ndisp;
htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN); int *htext = bufX.htext[bufNum] + (wsz2 + 1);
cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN); uchar *cbuf0 = bufX.cbuf0[bufNum] + (wsz2 + 1) * ndisp;
for( x = 0; x < TABSZ; x++ )
tab[x] = (uchar)std::abs(x - ftzero);
// initialize buffers // initialize buffers
memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) ); memset(sad - 1, 0, (ndisp + 2) * sizeof(sad[0]));
memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) ); memset(hsad0 - dy0 * ndisp, 0, (height + wsz + 2) * ndisp * sizeof(hsad[0]));
memset(htext - dy0, 0, (height + wsz + 2) * sizeof(htext[0]));
for( x = -wsz2-1; x < wsz2; x++ ) for( x = -wsz2-1; x < wsz2; x++ )
{ {
@ -594,10 +668,9 @@ template <typename mType>
static void static void
findStereoCorrespondenceBM( const Mat& left, const Mat& right, findStereoCorrespondenceBM( const Mat& left, const Mat& right,
Mat& disp, Mat& cost, const StereoBMParams& state, Mat& disp, Mat& cost, const StereoBMParams& state,
uchar* buf, int _dy0, int _dy1 ) int _dy0, int _dy1, const BufferBM & bufX, size_t bufNum )
{ {
const int ALIGN = CV_SIMD_WIDTH;
int x, y, d; int x, y, d;
int wsz = state.SADWindowSize, wsz2 = wsz/2; int wsz = state.SADWindowSize, wsz2 = wsz/2;
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1); int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
@ -607,14 +680,13 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
int rofs = -MIN(ndisp - 1 + mindisp, 0); int rofs = -MIN(ndisp - 1 + mindisp, 0);
int width = left.cols, height = left.rows; int width = left.cols, height = left.rows;
int width1 = width - rofs - ndisp + 1; int width1 = width - rofs - ndisp + 1;
int ftzero = state.preFilterCap;
int textureThreshold = state.textureThreshold; int textureThreshold = state.textureThreshold;
int uniquenessRatio = state.uniquenessRatio; int uniquenessRatio = state.uniquenessRatio;
const int disp_shift = dispShiftTemplate<mType>::value; const int disp_shift = dispShiftTemplate<mType>::value;
mType FILTERED = (mType)((mindisp - 1) << disp_shift); mType FILTERED = (mType)((mindisp - 1) << disp_shift);
int *sad, *hsad0, *hsad, *hsad_sub, *htext; int *hsad, *hsad_sub;
uchar *cbuf0, *cbuf; uchar *cbuf;
const uchar* lptr0 = left.ptr() + lofs; const uchar* lptr0 = left.ptr() + lofs;
const uchar* rptr0 = right.ptr() + rofs; const uchar* rptr0 = right.ptr() + rofs;
const uchar *lptr, *lptr_sub, *rptr; const uchar *lptr, *lptr_sub, *rptr;
@ -624,8 +696,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
int cstep = (height+dy0+dy1)*ndisp; int cstep = (height+dy0+dy1)*ndisp;
int costbuf = 0; int costbuf = 0;
int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0; int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
const int TABSZ = 256; const uchar * tab = bufX.tab;
uchar tab[TABSZ];
#if CV_SIMD #if CV_SIMD
int v_seq[v_int32::nlanes]; int v_seq[v_int32::nlanes];
@ -634,17 +705,15 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
v_int32 d0_4 = vx_load(v_seq), dd_4 = vx_setall_s32(v_int32::nlanes); v_int32 d0_4 = vx_load(v_seq), dd_4 = vx_setall_s32(v_int32::nlanes);
#endif #endif
sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN); int *sad = bufX.sad[bufNum] + 1;
hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN); int *hsad0 = bufX.hsad[bufNum] + (wsz2 + 1) * ndisp;
htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN); int *htext = bufX.htext[bufNum] + (wsz2 + 1);
cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN); uchar *cbuf0 = bufX.cbuf0[bufNum] + (wsz2 + 1) * ndisp;
for( x = 0; x < TABSZ; x++ )
tab[x] = (uchar)std::abs(x - ftzero);
// initialize buffers // initialize buffers
memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) ); memset(sad - 1, 0, (ndisp + 2) * sizeof(sad[0]));
memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) ); memset(hsad0 - dy0 * ndisp, 0, (height + wsz + 2) * ndisp * sizeof(hsad[0]));
memset(htext - dy0, 0, (height + wsz + 2) * sizeof(htext[0]));
for( x = -wsz2-1; x < wsz2; x++ ) for( x = -wsz2-1; x < wsz2; x++ )
{ {
@ -890,7 +959,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray left, OutputArray right, StereoBMParams* state) static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray left, OutputArray right, StereoBMParams* state)
{ {
if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE ) if (state->useNormPrefilter())
{ {
if(!ocl_prefilter_norm( left0, left, state->preFilterSize, state->preFilterCap)) if(!ocl_prefilter_norm( left0, left, state->preFilterSize, state->preFilterCap))
return false; return false;
@ -911,29 +980,28 @@ static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray le
struct PrefilterInvoker : public ParallelLoopBody struct PrefilterInvoker : public ParallelLoopBody
{ {
PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right, PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right,
uchar* buf0, uchar* buf1, StereoBMParams* _state) const BufferBM &bufX_, const StereoBMParams &state_)
: bufX(bufX_), state(state_)
{ {
imgs0[0] = &left0; imgs0[1] = &right0; imgs0[0] = &left0; imgs0[1] = &right0;
imgs[0] = &left; imgs[1] = &right; imgs[0] = &left; imgs[1] = &right;
buf[0] = buf0; buf[1] = buf1;
state = _state;
} }
void operator()(const Range& range) const CV_OVERRIDE void operator()(const Range& range) const CV_OVERRIDE
{ {
for( int i = range.start; i < range.end; i++ ) for( int i = range.start; i < range.end; i++ )
{ {
if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE ) if (state.useNormPrefilter())
prefilterNorm( *imgs0[i], *imgs[i], state->preFilterSize, state->preFilterCap, buf[i] ); prefilterNorm( *imgs0[i], *imgs[i], state.preFilterSize, state.preFilterCap, bufX.prefilter[i] );
else else
prefilterXSobel( *imgs0[i], *imgs[i], state->preFilterCap ); prefilterXSobel( *imgs0[i], *imgs[i], state.preFilterCap );
} }
} }
const Mat* imgs0[2]; const Mat* imgs0[2];
Mat* imgs[2]; Mat* imgs[2];
uchar* buf[2]; const BufferBM &bufX;
StereoBMParams* state; const StereoBMParams &state;
}; };
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
@ -986,18 +1054,17 @@ static bool ocl_stereobm( InputArray _left, InputArray _right,
struct FindStereoCorrespInvoker : public ParallelLoopBody struct FindStereoCorrespInvoker : public ParallelLoopBody
{ {
FindStereoCorrespInvoker( const Mat& _left, const Mat& _right, FindStereoCorrespInvoker( const Mat& _left, const Mat& _right,
Mat& _disp, StereoBMParams* _state, Mat& _disp, const StereoBMParams &_state,
int _nstripes, size_t _stripeBufSize, int _nstripes,
bool _useShorts, Rect _validDisparityRect, Rect _validDisparityRect,
Mat& _slidingSumBuf, Mat& _cost ) Mat& _cost, const BufferBM & buf_ )
: state(_state), buf(buf_)
{ {
CV_Assert( _disp.type() == CV_16S || _disp.type() == CV_32S ); CV_Assert( _disp.type() == CV_16S || _disp.type() == CV_32S );
left = &_left; right = &_right; left = &_left; right = &_right;
disp = &_disp; state = _state; disp = &_disp;
nstripes = _nstripes; stripeBufSize = _stripeBufSize; nstripes = _nstripes;
useShorts = _useShorts;
validDisparityRect = _validDisparityRect; validDisparityRect = _validDisparityRect;
slidingSumBuf = &_slidingSumBuf;
cost = &_cost; cost = &_cost;
} }
@ -1006,11 +1073,10 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
int cols = left->cols, rows = left->rows; int cols = left->cols, rows = left->rows;
int _row0 = std::min(cvRound(range.start * rows / nstripes), rows); int _row0 = std::min(cvRound(range.start * rows / nstripes), rows);
int _row1 = std::min(cvRound(range.end * rows / nstripes), rows); int _row1 = std::min(cvRound(range.end * rows / nstripes), rows);
uchar *ptr = slidingSumBuf->ptr() + range.start * stripeBufSize;
int dispShift = disp->type() == CV_16S ? DISPARITY_SHIFT_16S : int dispShift = disp->type() == CV_16S ? DISPARITY_SHIFT_16S :
DISPARITY_SHIFT_32S; DISPARITY_SHIFT_32S;
int FILTERED = (state->minDisparity - 1) << dispShift; int FILTERED = (state.minDisparity - 1) << dispShift;
Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0); Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0);
if( roi.height == 0 ) if( roi.height == 0 )
@ -1033,27 +1099,27 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
Mat left_i = left->rowRange(row0, row1); Mat left_i = left->rowRange(row0, row1);
Mat right_i = right->rowRange(row0, row1); Mat right_i = right->rowRange(row0, row1);
Mat disp_i = disp->rowRange(row0, row1); Mat disp_i = disp->rowRange(row0, row1);
Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat(); Mat cost_i = state.disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat();
#if CV_SIMD #if CV_SIMD
if (useShorts) if (state.useShorts())
{ {
if( disp_i.type() == CV_16S) if( disp_i.type() == CV_16S)
findStereoCorrespondenceBM_SIMD<short>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 ); findStereoCorrespondenceBM_SIMD<short>( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start );
else else
findStereoCorrespondenceBM_SIMD<int>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1); findStereoCorrespondenceBM_SIMD<int>( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start);
} }
else else
#endif #endif
{ {
if( disp_i.type() == CV_16S ) if( disp_i.type() == CV_16S )
findStereoCorrespondenceBM<short>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 ); findStereoCorrespondenceBM<short>( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start );
else else
findStereoCorrespondenceBM<int>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 ); findStereoCorrespondenceBM<int>( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start );
} }
if( state->disp12MaxDiff >= 0 ) if( state.disp12MaxDiff >= 0 )
validateDisparity( disp_i, cost_i, state->minDisparity, state->numDisparities, state->disp12MaxDiff ); validateDisparity( disp_i, cost_i, state.minDisparity, state.numDisparities, state.disp12MaxDiff );
if( roi.x > 0 ) if( roi.x > 0 )
{ {
@ -1069,13 +1135,12 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
protected: protected:
const Mat *left, *right; const Mat *left, *right;
Mat* disp, *slidingSumBuf, *cost; Mat* disp, *cost;
StereoBMParams *state; const StereoBMParams &state;
int nstripes; int nstripes;
size_t stripeBufSize;
bool useShorts;
Rect validDisparityRect; Rect validDisparityRect;
const BufferBM & buf;
}; };
class StereoBMImpl CV_FINAL : public StereoBM class StereoBMImpl CV_FINAL : public StereoBM
@ -1149,7 +1214,7 @@ public:
disp_shift = DISPARITY_SHIFT_16S; disp_shift = DISPARITY_SHIFT_16S;
FILTERED = (params.minDisparity - 1) << disp_shift; FILTERED = (params.minDisparity - 1) << disp_shift;
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 ) if (params.useFilterSpeckles())
filterSpeckles(disparr.getMat(), FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf); filterSpeckles(disparr.getMat(), FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
if (dtype == CV_32F) if (dtype == CV_32F)
disparr.getUMat().convertTo(disparr, CV_32FC1, 1./(1 << disp_shift), 0); disparr.getUMat().convertTo(disparr, CV_32FC1, 1./(1 << disp_shift), 0);
@ -1192,44 +1257,39 @@ public:
disp = dispbuf; disp = dispbuf;
} }
int wsz = params.SADWindowSize; {
int bufSize0 = (int)((ndisp + 2)*sizeof(int)); const double SAD_overhead_coeff = 10.0;
bufSize0 += (int)((height+wsz+2)*ndisp*sizeof(int)); const double N0 = 8000000 / (params.useShorts() ? 1 : 4); // approx tbb's min number instructions reasonable for one thread
bufSize0 += (int)((height + wsz + 2)*sizeof(int)); const double maxStripeSize = std::min(
bufSize0 += (int)((height+wsz+2)*ndisp*(wsz+2)*sizeof(uchar) + 256); std::max(
N0 / (width * ndisp),
int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256); (params.SADWindowSize-1) * SAD_overhead_coeff
int bufSize2 = 0; ),
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 ) (double)height
bufSize2 = width*height*(sizeof(Point_<short>) + sizeof(int) + sizeof(uchar)); );
const int nstripes = cvCeil(height / maxStripeSize);
bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21; BufferBM localBuf(nstripes, width, height, params);
const double SAD_overhead_coeff = 10.0;
double N0 = 8000000 / (useShorts ? 1 : 4); // approx tbb's min number instructions reasonable for one thread // Prefiltering
double maxStripeSize = std::min(std::max(N0 / (width * ndisp), (wsz-1) * SAD_overhead_coeff), (double)height); parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, localBuf, params), 1);
int nstripes = cvCeil(height / maxStripeSize);
int bufSize = std::max(bufSize0 * nstripes, std::max(bufSize1 * 2, bufSize2));
Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
if( slidingSumBuf.cols < bufSize ) validDisparityRect = getValidDisparityROI(!R1.empty() ? R1 : validDisparityRect,
slidingSumBuf.create( 1, bufSize, CV_8U ); !R2.empty() ? R2 : validDisparityRect,
params.minDisparity, params.numDisparities,
uchar *_buf = slidingSumBuf.ptr(); params.SADWindowSize);
parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, &params), 1); FindStereoCorrespInvoker invoker(left, right, disp, params, nstripes, validDisparityRect, cost, localBuf);
parallel_for_(Range(0, nstripes), invoker);
Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
validDisparityRect = getValidDisparityROI(!R1.empty() ? R1 : validDisparityRect, if (params.useFilterSpeckles())
!R2.empty() ? R2 : validDisparityRect, {
params.minDisparity, params.numDisparities, slidingSumBuf.create( 1, width * height * (sizeof(Point_<short>) + sizeof(int) + sizeof(uchar)), CV_8U );
params.SADWindowSize); filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
}
parallel_for_(Range(0, nstripes),
FindStereoCorrespInvoker(left, right, disp, &params, nstripes,
bufSize0, useShorts, validDisparityRect,
slidingSumBuf, cost));
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 ) }
filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
if (disp0.data != disp.data) if (disp0.data != disp.data)
disp.convertTo(disp0, disp0.type(), 1./(1 << disp_shift), 0); disp.convertTo(disp0, disp0.type(), 1./(1 << disp_shift), 0);

@ -0,0 +1,103 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_UTILS_BUFFER_AREA_HPP
#define OPENCV_UTILS_BUFFER_AREA_HPP
#include <opencv2/core/base.hpp>
#include <opencv2/core/private.hpp>
#include <opencv2/core/utility.hpp>
#include <vector>
namespace cv { namespace utils {
//! @addtogroup core_utils
//! @{
/** @brief Manages memory block shared by muliple buffers.
This class allows to allocate one large memory block and split it into several smaller
non-overlapping buffers. In safe mode each buffer allocation will be performed independently,
this mode allows dynamic memory access instrumentation using valgrind or memory sanitizer.
Safe mode can be explicitly switched ON in constructor. It will also be enabled when compiling with
memory sanitizer support or in runtime with the environment variable `OPENCV_BUFFER_AREA_ALWAYS_SAFE`.
Example of usage:
@code
int * buf1 = 0;
double * buf2 = 0;
cv::util::BufferArea area;
area.allocate(buf1, 200); // buf1 = new int[200];
area.allocate(buf2, 1000, 64); // buf2 = new double[1000]; - aligned by 64
area.commit();
@endcode
@note This class is considered private and should be used only in OpenCV itself. API can be changed.
*/
class CV_EXPORTS BufferArea
{
public:
/** @brief Class constructor.
@param safe Enable _safe_ operation mode, each allocation will be performed independently.
*/
BufferArea(bool safe = false);
/** @brief Class destructor
All allocated memory well be freed. Each bound pointer will be reset to NULL.
*/
~BufferArea();
/** @brief Bind a pointer to local area.
BufferArea will store reference to the pointer and allocation parameters effectively owning the
pointer and allocated memory. This operation has the same parameters and does the same job
as the operator `new`, except allocation can be performed later during the BufferArea::commit call.
@param ptr Reference to a pointer of type T. Must be NULL
@param count Count of objects to be allocated, it has the same meaning as in the operator `new`.
@param alignment Alignment of allocated memory. same meaning as in the operator `new` (C++17).
Must be divisible by sizeof(T). Must be power of two.
@note In safe mode allocation will be performed immediatly.
*/
template <typename T>
void allocate(T*&ptr, size_t count, ushort alignment = sizeof(T))
{
CV_Assert(ptr == NULL);
CV_Assert(count > 0);
CV_Assert(alignment > 0);
CV_Assert(alignment % sizeof(T) == 0);
CV_Assert((alignment & (alignment - 1)) == 0);
allocate_((void**)(&ptr), static_cast<ushort>(sizeof(T)), count, alignment);
}
/** @brief Allocate memory and initialize all bound pointers
Each pointer bound to the area with the BufferArea::allocate will be initialized and will be set
to point to a memory block with requested size and alignment.
@note Does nothing in safe mode as all allocations will be performed by BufferArea::allocate
*/
void commit();
private:
BufferArea(const BufferArea &); // = delete
BufferArea &operator=(const BufferArea &); // = delete
void allocate_(void **ptr, ushort type_size, size_t count, ushort alignment);
private:
class Block;
std::vector<Block> blocks;
void * oneBuf;
size_t totalSize;
const bool safe;
};
//! @}
}} // cv::utils::
#endif

@ -0,0 +1,121 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "opencv2/core/utils/buffer_area.private.hpp"
#include "opencv2/core/utils/configuration.private.hpp"
#ifdef OPENCV_ENABLE_MEMORY_SANITIZER
#define BUFFER_AREA_DEFAULT_MODE true
#else
#define BUFFER_AREA_DEFAULT_MODE false
#endif
static bool CV_BUFFER_AREA_OVERRIDE_SAFE_MODE =
cv::utils::getConfigurationParameterBool("OPENCV_BUFFER_AREA_ALWAYS_SAFE", BUFFER_AREA_DEFAULT_MODE);
namespace cv { namespace utils {
//==================================================================================================
class BufferArea::Block
{
private:
inline size_t reserve_count() const
{
return alignment / type_size - 1;
}
public:
Block(void **ptr_, ushort type_size_, size_t count_, ushort alignment_)
: ptr(ptr_), raw_mem(0), count(count_), type_size(type_size_), alignment(alignment_)
{
CV_Assert(ptr && *ptr == NULL);
}
void cleanup() const
{
CV_Assert(ptr && *ptr);
*ptr = 0;
if (raw_mem)
fastFree(raw_mem);
}
size_t getByteCount() const
{
return type_size * (count + reserve_count());
}
void real_allocate()
{
CV_Assert(ptr && *ptr == NULL);
const size_t allocated_count = count + reserve_count();
raw_mem = fastMalloc(type_size * allocated_count);
if (alignment != type_size)
{
*ptr = alignPtr(raw_mem, alignment);
CV_Assert(reinterpret_cast<size_t>(*ptr) % alignment == 0);
CV_Assert(static_cast<uchar*>(*ptr) + type_size * count <= static_cast<uchar*>(raw_mem) + type_size * allocated_count);
}
else
{
*ptr = raw_mem;
}
}
void * fast_allocate(void * buf) const
{
CV_Assert(ptr && *ptr == NULL);
buf = alignPtr(buf, alignment);
CV_Assert(reinterpret_cast<size_t>(buf) % alignment == 0);
*ptr = buf;
return static_cast<void*>(static_cast<uchar*>(*ptr) + type_size * count);
}
private:
void **ptr;
void * raw_mem;
size_t count;
ushort type_size;
ushort alignment;
};
//==================================================================================================
BufferArea::BufferArea(bool safe_) :
oneBuf(0),
totalSize(0),
safe(safe_ || CV_BUFFER_AREA_OVERRIDE_SAFE_MODE)
{
}
BufferArea::~BufferArea()
{
for(std::vector<Block>::const_iterator i = blocks.begin(); i != blocks.end(); ++i)
i->cleanup();
if (oneBuf)
fastFree(oneBuf);
}
void BufferArea::allocate_(void **ptr, ushort type_size, size_t count, ushort alignment)
{
blocks.push_back(Block(ptr, type_size, count, alignment));
if (safe)
blocks.back().real_allocate();
else
totalSize += blocks.back().getByteCount();
}
void BufferArea::commit()
{
if (!safe)
{
CV_Assert(totalSize > 0);
CV_Assert(oneBuf == NULL);
CV_Assert(!blocks.empty());
oneBuf = fastMalloc(totalSize);
void * ptr = oneBuf;
for(std::vector<Block>::const_iterator i = blocks.begin(); i != blocks.end(); ++i)
{
ptr = i->fast_allocate(ptr);
}
}
}
//==================================================================================================
}} // cv::utils::

@ -3,6 +3,7 @@
// of this distribution and at http://opencv.org/license.html. // of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp" #include "test_precomp.hpp"
#include "opencv2/core/utils/logger.hpp" #include "opencv2/core/utils/logger.hpp"
#include "opencv2/core/utils/buffer_area.private.hpp"
#include "test_utils_tls.impl.hpp" #include "test_utils_tls.impl.hpp"
@ -303,4 +304,132 @@ TEST(Samples, findFile_missing)
cv::utils::logging::setLogLevel(prev); cv::utils::logging::setLogLevel(prev);
} }
template <typename T>
inline bool buffers_overlap(T * first, size_t first_num, T * second, size_t second_num)
{
// cerr << "[" << (void*)first << " : " << (void*)(first + first_num) << ")";
// cerr << " X ";
// cerr << "[" << (void*)second << " : " << (void*)(second + second_num) << ")";
// cerr << endl;
bool res = false;
res |= (second <= first) && (first < second + second_num);
res |= (second < first + first_num) && (first + first_num < second + second_num);
return res;
}
typedef testing::TestWithParam<bool> BufferArea;
TEST_P(BufferArea, basic)
{
const bool safe = GetParam();
const size_t SZ = 3;
int * int_ptr = NULL;
uchar * uchar_ptr = NULL;
double * dbl_ptr = NULL;
{
cv::utils::BufferArea area(safe);
area.allocate(int_ptr, SZ);
area.allocate(uchar_ptr, SZ);
area.allocate(dbl_ptr, SZ);
area.commit();
ASSERT_TRUE(int_ptr != NULL);
ASSERT_TRUE(uchar_ptr != NULL);
ASSERT_TRUE(dbl_ptr != NULL);
EXPECT_EQ((size_t)0, (size_t)int_ptr % sizeof(int));
EXPECT_EQ((size_t)0, (size_t)dbl_ptr % sizeof(double));
}
EXPECT_TRUE(int_ptr == NULL);
EXPECT_TRUE(uchar_ptr == NULL);
EXPECT_TRUE(dbl_ptr == NULL);
}
TEST_P(BufferArea, align)
{
const bool safe = GetParam();
const size_t SZ = 3;
const size_t CNT = 5;
typedef int T;
T * buffers[CNT] = {0};
{
cv::utils::BufferArea area(safe);
// allocate buffers with 3 elements with growing alignment (power of two)
for (size_t i = 0; i < CNT; ++i)
{
const ushort ALIGN = static_cast<ushort>(sizeof(T) << i);
EXPECT_TRUE(buffers[i] == NULL);
area.allocate(buffers[i], SZ, ALIGN);
}
area.commit();
for (size_t i = 0; i < CNT; ++i)
{
const ushort ALIGN = static_cast<ushort>(sizeof(T) << i);
EXPECT_TRUE(buffers[i] != NULL);
EXPECT_EQ((size_t)0, reinterpret_cast<size_t>(buffers[i]) % ALIGN);
if (i < CNT - 1)
{
SCOPED_TRACE(i);
EXPECT_FALSE(buffers_overlap(buffers[i], SZ, buffers[i + 1], SZ))
<< "Buffers overlap: "
<< buffers[i] << " (" << SZ << " elems)"
<< " and "
<< buffers[i + 1] << " (" << SZ << " elems)"
<< " (element size: " << sizeof(T) << ")";
}
}
}
for (size_t i = 0; i < CNT; ++i)
{
EXPECT_TRUE(buffers[i] == NULL);
}
}
TEST_P(BufferArea, default_align)
{
const bool safe = GetParam();
const size_t CNT = 100;
const ushort ALIGN = 64;
typedef int T;
T * buffers[CNT] = {0};
{
cv::utils::BufferArea area(safe);
// allocate buffers with 1-99 elements with default alignment
for (size_t i = 0; i < CNT; ++ i)
{
EXPECT_TRUE(buffers[i] == NULL);
area.allocate(buffers[i], i + 1, ALIGN);
}
area.commit();
for (size_t i = 0; i < CNT; ++i)
{
EXPECT_TRUE(buffers[i] != NULL);
EXPECT_EQ((size_t)0, reinterpret_cast<size_t>(buffers[i]) % ALIGN);
if (i < CNT - 1)
{
SCOPED_TRACE(i);
EXPECT_FALSE(buffers_overlap(buffers[i], i + 1, buffers[i + 1], i + 2))
<< "Buffers overlap: "
<< buffers[i] << " (" << i + 1 << " elems)"
<< " and "
<< buffers[i + 1] << " (" << i + 2 << " elems)"
<< " (element size: " << sizeof(T) << ")";
}
}
}
}
TEST_P(BufferArea, bad)
{
const bool safe = GetParam();
int * ptr = 0;
cv::utils::BufferArea area(safe);
EXPECT_ANY_THROW(area.allocate(ptr, 0)); // bad size
EXPECT_ANY_THROW(area.allocate(ptr, 1, 0)); // bad alignment
EXPECT_ANY_THROW(area.allocate(ptr, 1, 3)); // bad alignment
ptr = (int*)1;
EXPECT_ANY_THROW(area.allocate(ptr, 1)); // non-zero pointer
}
INSTANTIATE_TEST_CASE_P(/**/, BufferArea, testing::Values(true, false));
}} // namespace }} // namespace

Loading…
Cancel
Save