From 29b13ec1def5e0a643aee73a5d13ebdfb9c07d30 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Thu, 30 May 2013 18:44:33 +0400 Subject: [PATCH 1/2] Replaced most of the usages of parallel_for with that of parallel_for_. This should allow many algorithms to take advantage of more parallelization technologies. --- apps/traincascade/boost.cpp | 30 +++--- modules/calib3d/src/solvepnp.cpp | 25 ----- modules/calib3d/src/stereobm.cpp | 20 ++-- modules/features2d/src/detectors.cpp | 38 +++----- modules/gpu/src/calib3d.cpp | 8 +- modules/imgproc/src/color.cpp | 96 ++++++++----------- modules/imgproc/src/distransform.cpp | 16 ++-- modules/imgproc/src/histogram.cpp | 57 ++++------- modules/imgproc/src/morph.cpp | 16 ++-- modules/ml/src/ann_mlp.cpp | 55 +++++------ modules/ml/src/gbt.cpp | 54 ++++------- modules/ml/src/knearest.cpp | 10 +- modules/ml/src/nbayes.cpp | 12 +-- modules/ml/src/svm.cpp | 8 +- modules/nonfree/src/surf.cpp | 44 ++++----- modules/objdetect/src/cascadedetect.cpp | 5 - modules/objdetect/src/latentsvm.cpp | 8 -- modules/photo/src/denoising.cpp | 12 +-- .../src/fast_nlmeans_denoising_invoker.hpp | 10 +- .../fast_nlmeans_multi_denoising_invoker.hpp | 10 +- modules/stitching/src/matchers.cpp | 10 +- modules/video/src/bgfg_gaussmix2.cpp | 24 ++--- modules/video/src/lkpyramid.cpp | 14 +-- modules/video/src/lkpyramid.hpp | 4 +- 24 files changed, 233 insertions(+), 353 deletions(-) diff --git a/apps/traincascade/boost.cpp b/apps/traincascade/boost.cpp index 2d29f338b0..4f91d5a29d 100644 --- a/apps/traincascade/boost.cpp +++ b/apps/traincascade/boost.cpp @@ -766,7 +766,7 @@ float CvCascadeBoostTrainData::getVarValue( int vi, int si ) } -struct FeatureIdxOnlyPrecalc +struct FeatureIdxOnlyPrecalc : ParallelLoopBody { FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u ) { @@ -776,11 +776,11 @@ struct FeatureIdxOnlyPrecalc idst = _buf->data.i; is_buf_16u = _is_buf_16u; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { cv::AutoBuffer valCache(sample_count); float* valCachePtr = (float*)valCache; - for ( int fi = range.begin(); fi < range.end(); fi++) + for ( int fi = range.start; fi < range.end; fi++) { for( int si = 0; si < sample_count; si++ ) { @@ -803,7 +803,7 @@ struct FeatureIdxOnlyPrecalc bool is_buf_16u; }; -struct FeatureValAndIdxPrecalc +struct FeatureValAndIdxPrecalc : ParallelLoopBody { FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u ) { @@ -814,9 +814,9 @@ struct FeatureValAndIdxPrecalc idst = _buf->data.i; is_buf_16u = _is_buf_16u; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { - for ( int fi = range.begin(); fi < range.end(); fi++) + for ( int fi = range.start; fi < range.end; fi++) { for( int si = 0; si < sample_count; si++ ) { @@ -840,7 +840,7 @@ struct FeatureValAndIdxPrecalc bool is_buf_16u; }; -struct FeatureValOnlyPrecalc +struct FeatureValOnlyPrecalc : ParallelLoopBody { FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count ) { @@ -848,9 +848,9 @@ struct FeatureValOnlyPrecalc valCache = _valCache; sample_count = _sample_count; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { - for ( int fi = range.begin(); fi < range.end(); fi++) + for ( int fi = range.start; fi < range.end; fi++) for( int si = 0; si < sample_count; si++ ) valCache->at(fi,si) = (*featureEvaluator)( fi, si ); } @@ -864,12 +864,12 @@ void CvCascadeBoostTrainData::precalculate() int minNum = MIN( numPrecalcVal, numPrecalcIdx); double proctime = -TIME( 0 ); - parallel_for( BlockedRange(numPrecalcVal, numPrecalcIdx), - FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) ); - parallel_for( BlockedRange(0, minNum), - FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) ); - parallel_for( BlockedRange(minNum, numPrecalcVal), - FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) ); + parallel_for_( Range(numPrecalcVal, numPrecalcIdx), + FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) ); + parallel_for_( Range(0, minNum), + FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) ); + parallel_for_( Range(minNum, numPrecalcVal), + FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) ); cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl; } diff --git a/modules/calib3d/src/solvepnp.cpp b/modules/calib3d/src/solvepnp.cpp index 25988be48a..3d2c0c2c47 100644 --- a/modules/calib3d/src/solvepnp.cpp +++ b/modules/calib3d/src/solvepnp.cpp @@ -115,31 +115,6 @@ namespace cv transform(points, modif_points, transformation); } - class Mutex - { - public: - Mutex() { - } - void lock() - { -#ifdef HAVE_TBB - resultsMutex.lock(); -#endif - } - - void unlock() - { -#ifdef HAVE_TBB - resultsMutex.unlock(); -#endif - } - - private: -#ifdef HAVE_TBB - tbb::mutex resultsMutex; -#endif - }; - struct CameraParameters { void init(Mat _intrinsics, Mat _distCoeffs) diff --git a/modules/calib3d/src/stereobm.cpp b/modules/calib3d/src/stereobm.cpp index 32514276b5..623883df74 100644 --- a/modules/calib3d/src/stereobm.cpp +++ b/modules/calib3d/src/stereobm.cpp @@ -699,7 +699,7 @@ struct PrefilterInvoker }; -struct FindStereoCorrespInvoker +struct FindStereoCorrespInvoker : ParallelLoopBody { FindStereoCorrespInvoker( const Mat& _left, const Mat& _right, Mat& _disp, CvStereoBMState* _state, @@ -713,12 +713,12 @@ struct FindStereoCorrespInvoker validDisparityRect = _validDisparityRect; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { int cols = left->cols, rows = left->rows; - int _row0 = min(cvRound(range.begin() * rows / nstripes), rows); - int _row1 = min(cvRound(range.end() * rows / nstripes), rows); - uchar *ptr = state->slidingSumBuf->data.ptr + range.begin() * stripeBufSize; + int _row0 = min(cvRound(range.start * rows / nstripes), rows); + int _row1 = min(cvRound(range.end * rows / nstripes), rows); + uchar *ptr = state->slidingSumBuf->data.ptr + range.start * stripeBufSize; int FILTERED = (state->minDisparity - 1)*16; Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0); @@ -871,14 +871,10 @@ static void findStereoCorrespondenceBM( const Mat& left0, const Mat& right0, Mat const bool useShorts = false; #endif -#ifdef HAVE_TBB const double SAD_overhead_coeff = 10.0; double N0 = 8000000 / (useShorts ? 1 : 4); // approx tbb's min number instructions reasonable for one thread double maxStripeSize = min(max(N0 / (width * ndisp), (wsz-1) * SAD_overhead_coeff), (double)height); int nstripes = cvCeil(height / maxStripeSize); -#else - const int nstripes = 1; -#endif int bufSize = max(bufSize0 * nstripes, max(bufSize1 * 2, bufSize2)); @@ -898,9 +894,9 @@ static void findStereoCorrespondenceBM( const Mat& left0, const Mat& right0, Mat state->minDisparity, state->numberOfDisparities, state->SADWindowSize); - parallel_for(BlockedRange(0, nstripes), - FindStereoCorrespInvoker(left, right, disp, state, nstripes, - bufSize0, useShorts, validDisparityRect)); + parallel_for_(Range(0, nstripes), + FindStereoCorrespInvoker(left, right, disp, state, nstripes, + bufSize0, useShorts, validDisparityRect)); if( state->speckleRange >= 0 && state->speckleWindowSize > 0 ) { diff --git a/modules/features2d/src/detectors.cpp b/modules/features2d/src/detectors.cpp index 2efd5a652a..a1e389a435 100644 --- a/modules/features2d/src/detectors.cpp +++ b/modules/features2d/src/detectors.cpp @@ -214,7 +214,7 @@ static void keepStrongest( int N, vector& keypoints ) } namespace { -class GridAdaptedFeatureDetectorInvoker +class GridAdaptedFeatureDetectorInvoker : public ParallelLoopBody { private: int gridRows_, gridCols_; @@ -223,29 +223,24 @@ private: const Mat& image_; const Mat& mask_; const Ptr& detector_; -#ifdef HAVE_TBB - tbb::mutex* kptLock_; -#endif + Mutex* kptLock_; GridAdaptedFeatureDetectorInvoker& operator=(const GridAdaptedFeatureDetectorInvoker&); // to quiet MSVC public: - GridAdaptedFeatureDetectorInvoker(const Ptr& detector, const Mat& image, const Mat& mask, vector& keypoints, int maxPerCell, int gridRows, int gridCols -#ifdef HAVE_TBB - , tbb::mutex* kptLock -#endif - ) : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell), - keypoints_(keypoints), image_(image), mask_(mask), detector_(detector) -#ifdef HAVE_TBB - , kptLock_(kptLock) -#endif + GridAdaptedFeatureDetectorInvoker(const Ptr& detector, const Mat& image, const Mat& mask, + vector& keypoints, int maxPerCell, int gridRows, int gridCols, + cv::Mutex* kptLock) + : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell), + keypoints_(keypoints), image_(image), mask_(mask), detector_(detector), + kptLock_(kptLock) { } - void operator() (const BlockedRange& range) const + void operator() (const Range& range) const { - for (int i = range.begin(); i < range.end(); ++i) + for (int i = range.start; i < range.end; ++i) { int celly = i / gridCols_; int cellx = i - celly * gridCols_; @@ -270,9 +265,8 @@ public: it->pt.x += col_range.start; it->pt.y += row_range.start; } -#ifdef HAVE_TBB - tbb::mutex::scoped_lock join_keypoints(*kptLock_); -#endif + + cv::AutoLock join_keypoints(*kptLock_); keypoints_.insert( keypoints_.end(), sub_keypoints.begin(), sub_keypoints.end() ); } } @@ -289,13 +283,9 @@ void GridAdaptedFeatureDetector::detectImpl( const Mat& image, vector& keypoints.reserve(maxTotalKeypoints); int maxPerCell = maxTotalKeypoints / (gridRows * gridCols); -#ifdef HAVE_TBB - tbb::mutex kptLock; - cv::parallel_for(cv::BlockedRange(0, gridRows * gridCols), + cv::Mutex kptLock; + cv::parallel_for_(cv::Range(0, gridRows * gridCols), GridAdaptedFeatureDetectorInvoker(detector, image, mask, keypoints, maxPerCell, gridRows, gridCols, &kptLock)); -#else - GridAdaptedFeatureDetectorInvoker(detector, image, mask, keypoints, maxPerCell, gridRows, gridCols)(cv::BlockedRange(0, gridRows * gridCols)); -#endif } /* diff --git a/modules/gpu/src/calib3d.cpp b/modules/gpu/src/calib3d.cpp index e83213f90f..b84f09d0ab 100644 --- a/modules/gpu/src/calib3d.cpp +++ b/modules/gpu/src/calib3d.cpp @@ -151,7 +151,7 @@ namespace } // Computes rotation, translation pair for small subsets if the input data - class TransformHypothesesGenerator + class TransformHypothesesGenerator : public ParallelLoopBody { public: TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_, @@ -161,7 +161,7 @@ namespace num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_), transl_vectors(transl_vectors_) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { // Input data for generation of the current hypothesis vector subset_indices(subset_size); @@ -173,7 +173,7 @@ namespace Mat rot_mat(3, 3, CV_64F); Mat transl_vec(1, 3, CV_64F); - for (int iter = range.begin(); iter < range.end(); ++iter) + for (int iter = range.start; iter < range.end; ++iter) { selectRandom(subset_size, num_points, subset_indices); for (int i = 0; i < subset_size; ++i) @@ -239,7 +239,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam // Generate set of hypotheses using small subsets of the input data TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat, num_points, subset_size, rot_matrices, transl_vectors); - parallel_for(BlockedRange(0, num_iters), body); + parallel_for_(Range(0, num_iters), body); // Compute scores (i.e. number of inliers) for each hypothesis GpuMat d_object(object); diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 3799d435e3..41ca2db9c0 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -2755,7 +2755,7 @@ const int ITUR_BT_601_CGV = -385875; const int ITUR_BT_601_CBV = -74448; template -struct YUV420sp2RGB888Invoker +struct YUV420sp2RGB888Invoker : ParallelLoopBody { Mat* dst; const uchar* my1, *muv; @@ -2764,10 +2764,10 @@ struct YUV420sp2RGB888Invoker YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv) : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin() * 2; - int rangeEnd = range.end() * 2; + int rangeBegin = range.start * 2; + int rangeEnd = range.end * 2; //R = 1.164(Y - 16) + 1.596(V - 128) //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) @@ -2824,7 +2824,7 @@ struct YUV420sp2RGB888Invoker }; template -struct YUV420sp2RGBA8888Invoker +struct YUV420sp2RGBA8888Invoker : ParallelLoopBody { Mat* dst; const uchar* my1, *muv; @@ -2833,10 +2833,10 @@ struct YUV420sp2RGBA8888Invoker YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv) : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin() * 2; - int rangeEnd = range.end() * 2; + int rangeBegin = range.start * 2; + int rangeEnd = range.end * 2; //R = 1.164(Y - 16) + 1.596(V - 128) //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) @@ -2897,7 +2897,7 @@ struct YUV420sp2RGBA8888Invoker }; template -struct YUV420p2RGB888Invoker +struct YUV420p2RGB888Invoker : ParallelLoopBody { Mat* dst; const uchar* my1, *mu, *mv; @@ -2907,19 +2907,19 @@ struct YUV420p2RGB888Invoker YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx) : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - const int rangeBegin = range.begin() * 2; - const int rangeEnd = range.end() * 2; + const int rangeBegin = range.start * 2; + const int rangeEnd = range.end * 2; size_t uvsteps[2] = {width/2, stride - width/2}; int usIdx = ustepIdx, vsIdx = vstepIdx; const uchar* y1 = my1 + rangeBegin * stride; - const uchar* u1 = mu + (range.begin() / 2) * stride; - const uchar* v1 = mv + (range.begin() / 2) * stride; + const uchar* u1 = mu + (range.start / 2) * stride; + const uchar* v1 = mv + (range.start / 2) * stride; - if(range.begin() % 2 == 1) + if(range.start % 2 == 1) { u1 += uvsteps[(usIdx++) & 1]; v1 += uvsteps[(vsIdx++) & 1]; @@ -2965,7 +2965,7 @@ struct YUV420p2RGB888Invoker }; template -struct YUV420p2RGBA8888Invoker +struct YUV420p2RGBA8888Invoker : ParallelLoopBody { Mat* dst; const uchar* my1, *mu, *mv; @@ -2975,19 +2975,19 @@ struct YUV420p2RGBA8888Invoker YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx) : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin() * 2; - int rangeEnd = range.end() * 2; + int rangeBegin = range.start * 2; + int rangeEnd = range.end * 2; size_t uvsteps[2] = {width/2, stride - width/2}; int usIdx = ustepIdx, vsIdx = vstepIdx; const uchar* y1 = my1 + rangeBegin * stride; - const uchar* u1 = mu + (range.begin() / 2) * stride; - const uchar* v1 = mv + (range.begin() / 2) * stride; + const uchar* u1 = mu + (range.start / 2) * stride; + const uchar* v1 = mv + (range.start / 2) * stride; - if(range.begin() % 2 == 1) + if(range.start % 2 == 1) { u1 += uvsteps[(usIdx++) & 1]; v1 += uvsteps[(vsIdx++) & 1]; @@ -3042,48 +3042,40 @@ template inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv) { YUV420sp2RGB888Invoker converter(&_dst, _stride, _y1, _uv); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows/2), converter); + parallel_for_(Range(0, _dst.rows/2), converter); else -#endif - converter(BlockedRange(0, _dst.rows/2)); + converter(Range(0, _dst.rows/2)); } template inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv) { YUV420sp2RGBA8888Invoker converter(&_dst, _stride, _y1, _uv); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows/2), converter); + parallel_for_(Range(0, _dst.rows/2), converter); else -#endif - converter(BlockedRange(0, _dst.rows/2)); + converter(Range(0, _dst.rows/2)); } template inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx) { YUV420p2RGB888Invoker converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows/2), converter); + parallel_for_(Range(0, _dst.rows/2), converter); else -#endif - converter(BlockedRange(0, _dst.rows/2)); + converter(Range(0, _dst.rows/2)); } template inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx) { YUV420p2RGBA8888Invoker converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows/2), converter); + parallel_for_(Range(0, _dst.rows/2), converter); else -#endif - converter(BlockedRange(0, _dst.rows/2)); + converter(Range(0, _dst.rows/2)); } ///////////////////////////////////// RGB -> YUV420p ///////////////////////////////////// @@ -3167,7 +3159,7 @@ static void cvtRGBtoYUV420p(const Mat& src, Mat& dst) ///////////////////////////////////// YUV422 -> RGB ///////////////////////////////////// template -struct YUV422toRGB888Invoker +struct YUV422toRGB888Invoker : ParallelLoopBody { Mat* dst; const uchar* src; @@ -3176,10 +3168,10 @@ struct YUV422toRGB888Invoker YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv) : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin(); - int rangeEnd = range.end(); + int rangeBegin = range.start; + int rangeEnd = range.end; const int uidx = 1 - yIdx + uIdx * 2; const int vidx = (2 + uidx) % 4; @@ -3213,7 +3205,7 @@ struct YUV422toRGB888Invoker }; template -struct YUV422toRGBA8888Invoker +struct YUV422toRGBA8888Invoker : ParallelLoopBody { Mat* dst; const uchar* src; @@ -3222,10 +3214,10 @@ struct YUV422toRGBA8888Invoker YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv) : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin(); - int rangeEnd = range.end(); + int rangeBegin = range.start; + int rangeEnd = range.end; const int uidx = 1 - yIdx + uIdx * 2; const int vidx = (2 + uidx) % 4; @@ -3266,24 +3258,20 @@ template inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv) { YUV422toRGB888Invoker converter(&_dst, _stride, _yuv); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows), converter); + parallel_for_(Range(0, _dst.rows), converter); else -#endif - converter(BlockedRange(0, _dst.rows)); + converter(Range(0, _dst.rows)); } template inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv) { YUV422toRGBA8888Invoker converter(&_dst, _stride, _yuv); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows), converter); + parallel_for_(Range(0, _dst.rows), converter); else -#endif - converter(BlockedRange(0, _dst.rows)); + converter(Range(0, _dst.rows)); } /////////////////////////// RGBA <-> mRGBA (alpha premultiplied) ////////////// diff --git a/modules/imgproc/src/distransform.cpp b/modules/imgproc/src/distransform.cpp index 89d3a550f4..d3e6f90242 100644 --- a/modules/imgproc/src/distransform.cpp +++ b/modules/imgproc/src/distransform.cpp @@ -443,7 +443,7 @@ icvGetDistanceTransformMask( int maskType, float *metrics ) namespace cv { -struct DTColumnInvoker +struct DTColumnInvoker : ParallelLoopBody { DTColumnInvoker( const CvMat* _src, CvMat* _dst, const int* _sat_tab, const float* _sqr_tab) { @@ -453,9 +453,9 @@ struct DTColumnInvoker sqr_tab = _sqr_tab; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { - int i, i1 = range.begin(), i2 = range.end(); + int i, i1 = range.start, i2 = range.end; int m = src->rows; size_t sstep = src->step, dstep = dst->step/sizeof(float); AutoBuffer _d(m); @@ -490,7 +490,7 @@ struct DTColumnInvoker }; -struct DTRowInvoker +struct DTRowInvoker : ParallelLoopBody { DTRowInvoker( CvMat* _dst, const float* _sqr_tab, const float* _inv_tab ) { @@ -499,10 +499,10 @@ struct DTRowInvoker inv_tab = _inv_tab; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { const float inf = 1e15f; - int i, i1 = range.begin(), i2 = range.end(); + int i, i1 = range.start, i2 = range.end; int n = dst->cols; AutoBuffer _buf((n+2)*2*sizeof(float) + (n+2)*sizeof(int)); float* f = (float*)(uchar*)_buf; @@ -586,7 +586,7 @@ icvTrueDistTrans( const CvMat* src, CvMat* dst ) for( ; i <= m*3; i++ ) sat_tab[i] = i - shift; - cv::parallel_for(cv::BlockedRange(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab)); + cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab)); // stage 2: compute modified distance transform for each row float* inv_tab = sqr_tab + n; @@ -598,7 +598,7 @@ icvTrueDistTrans( const CvMat* src, CvMat* dst ) sqr_tab[i] = (float)(i*i); } - cv::parallel_for(cv::BlockedRange(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab)); + cv::parallel_for_(cv::Range(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab)); } diff --git a/modules/imgproc/src/histogram.cpp b/modules/imgproc/src/histogram.cpp index 22dd9beb1f..5ca6c9d15b 100644 --- a/modules/imgproc/src/histogram.cpp +++ b/modules/imgproc/src/histogram.cpp @@ -2986,29 +2986,23 @@ cvCalcProbDensity( const CvHistogram* hist, const CvHistogram* hist_mask, } } -class EqualizeHistCalcHist_Invoker +class EqualizeHistCalcHist_Invoker : public cv::ParallelLoopBody { public: enum {HIST_SZ = 256}; -#ifdef HAVE_TBB - typedef tbb::mutex* MutextPtr; -#else - typedef void* MutextPtr; -#endif - - EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, MutextPtr histogramLock) + EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, cv::Mutex* histogramLock) : src_(src), globalHistogram_(histogram), histogramLock_(histogramLock) { } - void operator()( const cv::BlockedRange& rowRange ) const + void operator()( const cv::Range& rowRange ) const { int localHistogram[HIST_SZ] = {0, }; const size_t sstep = src_.step; int width = src_.cols; - int height = rowRange.end() - rowRange.begin(); + int height = rowRange.end - rowRange.start; if (src_.isContinuous()) { @@ -3016,7 +3010,7 @@ public: height = 1; } - for (const uchar* ptr = src_.ptr(rowRange.begin()); height--; ptr += sstep) + for (const uchar* ptr = src_.ptr(rowRange.start); height--; ptr += sstep) { int x = 0; for (; x <= width - 4; x += 4) @@ -3031,9 +3025,7 @@ public: localHistogram[ptr[x]]++; } -#ifdef HAVE_TBB - tbb::mutex::scoped_lock lock(*histogramLock_); -#endif + cv::AutoLock lock(*histogramLock_); for( int i = 0; i < HIST_SZ; i++ ) globalHistogram_[i] += localHistogram[i]; @@ -3041,12 +3033,7 @@ public: static bool isWorthParallel( const cv::Mat& src ) { -#ifdef HAVE_TBB return ( src.total() >= 640*480 ); -#else - (void)src; - return false; -#endif } private: @@ -3054,10 +3041,10 @@ private: cv::Mat& src_; int* globalHistogram_; - MutextPtr histogramLock_; + cv::Mutex* histogramLock_; }; -class EqualizeHistLut_Invoker +class EqualizeHistLut_Invoker : public cv::ParallelLoopBody { public: EqualizeHistLut_Invoker( cv::Mat& src, cv::Mat& dst, int* lut ) @@ -3066,13 +3053,13 @@ public: lut_(lut) { } - void operator()( const cv::BlockedRange& rowRange ) const + void operator()( const cv::Range& rowRange ) const { const size_t sstep = src_.step; const size_t dstep = dst_.step; int width = src_.cols; - int height = rowRange.end() - rowRange.begin(); + int height = rowRange.end - rowRange.start; int* lut = lut_; if (src_.isContinuous() && dst_.isContinuous()) @@ -3081,8 +3068,8 @@ public: height = 1; } - const uchar* sptr = src_.ptr(rowRange.begin()); - uchar* dptr = dst_.ptr(rowRange.begin()); + const uchar* sptr = src_.ptr(rowRange.start); + uchar* dptr = dst_.ptr(rowRange.start); for (; height--; sptr += sstep, dptr += dstep) { @@ -3111,12 +3098,7 @@ public: static bool isWorthParallel( const cv::Mat& src ) { -#ifdef HAVE_TBB return ( src.total() >= 640*480 ); -#else - (void)src; - return false; -#endif } private: @@ -3143,23 +3125,18 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst ) if(src.empty()) return; -#ifdef HAVE_TBB - tbb::mutex histogramLockInstance; - EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = &histogramLockInstance; -#else - EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = 0; -#endif + Mutex histogramLockInstance; const int hist_sz = EqualizeHistCalcHist_Invoker::HIST_SZ; int hist[hist_sz] = {0,}; int lut[hist_sz]; - EqualizeHistCalcHist_Invoker calcBody(src, hist, histogramLock); + EqualizeHistCalcHist_Invoker calcBody(src, hist, &histogramLockInstance); EqualizeHistLut_Invoker lutBody(src, dst, lut); - cv::BlockedRange heightRange(0, src.rows); + cv::Range heightRange(0, src.rows); if(EqualizeHistCalcHist_Invoker::isWorthParallel(src)) - parallel_for(heightRange, calcBody); + parallel_for_(heightRange, calcBody); else calcBody(heightRange); @@ -3183,7 +3160,7 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst ) } if(EqualizeHistLut_Invoker::isWorthParallel(src)) - parallel_for(heightRange, lutBody); + parallel_for_(heightRange, lutBody); else lutBody(heightRange); } diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index a63e08ff01..53d2347ec4 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1081,7 +1081,7 @@ cv::Mat cv::getStructuringElement(int shape, Size ksize, Point anchor) namespace cv { -class MorphologyRunner +class MorphologyRunner : public ParallelLoopBody { public: MorphologyRunner(Mat _src, Mat _dst, int _nStripes, int _iterations, @@ -1102,14 +1102,14 @@ public: columnBorderType = _columnBorderType; } - void operator () ( const BlockedRange& range ) const + void operator () ( const Range& range ) const { - int row0 = min(cvRound(range.begin() * src.rows / nStripes), src.rows); - int row1 = min(cvRound(range.end() * src.rows / nStripes), src.rows); + int row0 = min(cvRound(range.start * src.rows / nStripes), src.rows); + int row1 = min(cvRound(range.end * src.rows / nStripes), src.rows); /*if(0) printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", - src.rows, src.cols, range.begin(), range.end(), row0, row1);*/ + src.rows, src.cols, range.start, range.end, row0, row1);*/ Mat srcStripe = src.rowRange(row0, row1); Mat dstStripe = dst.rowRange(row0, row1); @@ -1173,15 +1173,15 @@ static void morphOp( int op, InputArray _src, OutputArray _dst, } int nStripes = 1; -#if defined HAVE_TBB && defined HAVE_TEGRA_OPTIMIZATION +#if defined HAVE_TEGRA_OPTIMIZATION if (src.data != dst.data && iterations == 1 && //NOTE: threads are not used for inplace processing (borderType & BORDER_ISOLATED) == 0 && //TODO: check border types src.rows >= 64 ) //NOTE: just heuristics nStripes = 4; #endif - parallel_for(BlockedRange(0, nStripes), - MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue)); + parallel_for_(Range(0, nStripes), + MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue)); //Ptr f = createMorphologyFilter(op, src.type(), // kernel, anchor, borderType, borderType, borderValue ); diff --git a/modules/ml/src/ann_mlp.cpp b/modules/ml/src/ann_mlp.cpp index 438872ae8c..bf85425b9c 100644 --- a/modules/ml/src/ann_mlp.cpp +++ b/modules/ml/src/ann_mlp.cpp @@ -40,10 +40,6 @@ #include "precomp.hpp" -#ifdef HAVE_TBB -#include -#endif - CvANN_MLP_TrainParams::CvANN_MLP_TrainParams() { term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 ); @@ -1022,7 +1018,7 @@ int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw ) return iter; } -struct rprop_loop { +struct rprop_loop : cv::ParallelLoopBody { rprop_loop(const CvANN_MLP* _point, double**& _weights, int& _count, int& _ivcount, CvVectors* _x0, int& _l_count, CvMat*& _layer_sizes, int& _ovcount, int& _max_count, CvVectors* _u, const double*& _sw, double& _inv_count, CvMat*& _dEdw, int& _dcount0, double* _E, int _buf_sz) @@ -1063,7 +1059,7 @@ struct rprop_loop { int buf_sz; - void operator()( const cv::BlockedRange& range ) const + void operator()( const cv::Range& range ) const { double* buf_ptr; double** x = 0; @@ -1084,7 +1080,7 @@ struct rprop_loop { buf_ptr += (df[i] - x[i])*2; } - for(int si = range.begin(); si < range.end(); si++ ) + for(int si = range.start; si < range.end; si++ ) { if (si % dcount0 != 0) continue; int n1, n2, k; @@ -1170,36 +1166,33 @@ struct rprop_loop { } // backward pass, update dEdw - #ifdef HAVE_TBB - static tbb::spin_mutex mutex; - tbb::spin_mutex::scoped_lock lock; - #endif + static cv::Mutex mutex; + for(int i = l_count-1; i > 0; i-- ) { n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i]; cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] ); cvMul( grad1, &_df, grad1 ); - #ifdef HAVE_TBB - lock.acquire(mutex); - #endif - cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) ); - cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] ); - cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T ); - - // update bias part of dEdw - for( k = 0; k < dcount; k++ ) - { - double* dst = _dEdw.data.db + n1*n2; - const double* src = grad1->data.db + k*n2; - for(int j = 0; j < n2; j++ ) - dst[j] += src[j]; + + { + cv::AutoLock lock(mutex); + cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) ); + cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] ); + cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T ); + + // update bias part of dEdw + for( k = 0; k < dcount; k++ ) + { + double* dst = _dEdw.data.db + n1*n2; + const double* src = grad1->data.db + k*n2; + for(int j = 0; j < n2; j++ ) + dst[j] += src[j]; + } + + if (i > 1) + cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] ); } - if (i > 1) - cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] ); - #ifdef HAVE_TBB - lock.release(); - #endif cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db ); if( i > 1 ) cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T ); @@ -1297,7 +1290,7 @@ int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw ) double E = 0; // first, iterate through all the samples and compute dEdw - cv::parallel_for(cv::BlockedRange(0, count), + cv::parallel_for_(cv::Range(0, count), rprop_loop(this, weights, count, ivcount, &x0, l_count, layer_sizes, ovcount, max_count, &u, sw, inv_count, dEdw, dcount0, &E, buf_sz) ); diff --git a/modules/ml/src/gbt.cpp b/modules/ml/src/gbt.cpp index 6671a3495b..b52ffbe5a3 100644 --- a/modules/ml/src/gbt.cpp +++ b/modules/ml/src/gbt.cpp @@ -900,7 +900,7 @@ float CvGBTrees::predict_serial( const CvMat* _sample, const CvMat* _missing, } -class Tree_predictor +class Tree_predictor : public cv::ParallelLoopBody { private: pCvSeq* weak; @@ -910,9 +910,7 @@ private: const CvMat* missing; const float shrinkage; -#ifdef HAVE_TBB - static tbb::spin_mutex SumMutex; -#endif + static cv::Mutex SumMutex; public: @@ -931,14 +929,11 @@ public: Tree_predictor& operator=( const Tree_predictor& ) { return *this; } - virtual void operator()(const cv::BlockedRange& range) const + virtual void operator()(const cv::Range& range) const { -#ifdef HAVE_TBB - tbb::spin_mutex::scoped_lock lock; -#endif CvSeqReader reader; - int begin = range.begin(); - int end = range.end(); + int begin = range.start; + int end = range.end; int weak_count = end - begin; CvDTree* tree; @@ -956,13 +951,11 @@ public: tmp_sum += shrinkage*(float)(tree->predict(sample, missing)->value); } } -#ifdef HAVE_TBB - lock.acquire(SumMutex); - sum[i] += tmp_sum; - lock.release(); -#else - sum[i] += tmp_sum; -#endif + + { + cv::AutoLock lock(SumMutex); + sum[i] += tmp_sum; + } } } // Tree_predictor::operator() @@ -970,11 +963,7 @@ public: }; // class Tree_predictor - -#ifdef HAVE_TBB -tbb::spin_mutex Tree_predictor::SumMutex; -#endif - +cv::Mutex Tree_predictor::SumMutex; float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing, @@ -992,12 +981,7 @@ float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing, Tree_predictor predictor = Tree_predictor(weak_seq, class_count, params.shrinkage, _sample, _missing, sum); -//#ifdef HAVE_TBB -// tbb::parallel_for(cv::BlockedRange(begin, end), predictor, -// tbb::auto_partitioner()); -//#else - cv::parallel_for(cv::BlockedRange(begin, end), predictor); -//#endif + cv::parallel_for_(cv::Range(begin, end), predictor); for (int i=0; i *resp ) Sample_predictor predictor = Sample_predictor(this, pred_resp, _data->get_values(), _data->get_missing(), _sample_idx); -//#ifdef HAVE_TBB -// tbb::parallel_for(cv::BlockedRange(0,n), predictor, tbb::auto_partitioner()); -//#else - cv::parallel_for(cv::BlockedRange(0,n), predictor); -//#endif + cv::parallel_for_(cv::Range(0,n), predictor); int* sidx = _sample_idx ? _sample_idx->data.i : 0; int r_step = CV_IS_MAT_CONT(response->type) ? diff --git a/modules/ml/src/knearest.cpp b/modules/ml/src/knearest.cpp index 3c2f9ebada..6b6f5e6afa 100644 --- a/modules/ml/src/knearest.cpp +++ b/modules/ml/src/knearest.cpp @@ -306,7 +306,7 @@ float CvKNearest::write_results( int k, int k1, int start, int end, return result; } -struct P1 { +struct P1 : cv::ParallelLoopBody { P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors, int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result) { @@ -333,10 +333,10 @@ struct P1 { float* result; int buf_sz; - void operator()( const cv::BlockedRange& range ) const + void operator()( const cv::Range& range ) const { cv::AutoBuffer buf(buf_sz); - for(int i = range.begin(); i < range.end(); i += 1 ) + for(int i = range.start; i < range.end; i += 1 ) { float* neighbor_responses = &buf[0]; float* dist = neighbor_responses + 1*k; @@ -410,8 +410,8 @@ float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* _results, int k1 = get_sample_count(); k1 = MIN( k1, k ); - cv::parallel_for(cv::BlockedRange(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1, - _results, _neighbor_responses, _dist, &result) + cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1, + _results, _neighbor_responses, _dist, &result) ); return result; diff --git a/modules/ml/src/nbayes.cpp b/modules/ml/src/nbayes.cpp index 15146d6f4e..f1f7a24ec0 100644 --- a/modules/ml/src/nbayes.cpp +++ b/modules/ml/src/nbayes.cpp @@ -277,7 +277,7 @@ bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _res return result; } -struct predict_body { +struct predict_body : cv::ParallelLoopBody { predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg, const CvMat* _samples, const int* _vidx, CvMat* _cls_labels, CvMat* _results, float* _value, int _var_count1 @@ -307,7 +307,7 @@ struct predict_body { float* value; int var_count1; - void operator()( const cv::BlockedRange& range ) const + void operator()( const cv::Range& range ) const { int cls = -1; @@ -324,7 +324,7 @@ struct predict_body { cv::AutoBuffer buffer(nclasses + var_count1); CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] ); - for(int k = range.begin(); k < range.end(); k += 1 ) + for(int k = range.start; k < range.end; k += 1 ) { int ival; double opt = FLT_MAX; @@ -397,9 +397,9 @@ float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) c const int* vidx = var_idx ? var_idx->data.i : 0; - cv::parallel_for(cv::BlockedRange(0, samples->rows), predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples, - vidx, cls_labels, results, &value, var_count - )); + cv::parallel_for_(cv::Range(0, samples->rows), + predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples, + vidx, cls_labels, results, &value, var_count)); return value; } diff --git a/modules/ml/src/svm.cpp b/modules/ml/src/svm.cpp index 9752848b9a..2e1b2e3565 100644 --- a/modules/ml/src/svm.cpp +++ b/modules/ml/src/svm.cpp @@ -2143,7 +2143,7 @@ float CvSVM::predict( const CvMat* sample, bool returnDFVal ) const return result; } -struct predict_body_svm { +struct predict_body_svm : ParallelLoopBody { predict_body_svm(const CvSVM* _pointer, float* _result, const CvMat* _samples, CvMat* _results) { pointer = _pointer; @@ -2157,9 +2157,9 @@ struct predict_body_svm { const CvMat* samples; CvMat* results; - void operator()( const cv::BlockedRange& range ) const + void operator()( const cv::Range& range ) const { - for(int i = range.begin(); i < range.end(); i++ ) + for(int i = range.start; i < range.end; i++ ) { CvMat sample; cvGetRow( samples, &sample, i ); @@ -2175,7 +2175,7 @@ struct predict_body_svm { float CvSVM::predict(const CvMat* samples, CV_OUT CvMat* results) const { float result = 0; - cv::parallel_for(cv::BlockedRange(0, samples->rows), + cv::parallel_for_(cv::Range(0, samples->rows), predict_body_svm(this, &result, samples, results) ); return result; diff --git a/modules/nonfree/src/surf.cpp b/modules/nonfree/src/surf.cpp index bb6d53e4b9..2fc459fb61 100644 --- a/modules/nonfree/src/surf.cpp +++ b/modules/nonfree/src/surf.cpp @@ -258,7 +258,7 @@ interpolateKeypoint( float N9[3][9], int dx, int dy, int ds, KeyPoint& kpt ) } // Multi-threaded construction of the scale-space pyramid -struct SURFBuildInvoker +struct SURFBuildInvoker : ParallelLoopBody { SURFBuildInvoker( const Mat& _sum, const vector& _sizes, const vector& _sampleSteps, @@ -271,9 +271,9 @@ struct SURFBuildInvoker traces = &_traces; } - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - for( int i=range.begin(); i& _dets, const vector& _traces, @@ -310,9 +310,9 @@ struct SURFFindInvoker const vector& sizes, vector& keypoints, int octave, int layer, float hessianThreshold, int sampleStep ); - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - for( int i=range.begin(); i& object int stripCount, stripSize; - #ifdef HAVE_TBB const int PTS_PER_THREAD = 1000; stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD; stripCount = std::min(std::max(stripCount, 1), 100); stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep; - #else - stripCount = 1; - stripSize = processingRectSize.height; - #endif if( !detectSingleScale( scaledImage, stripCount, processingRectSize, stripSize, yStep, factor, candidates, rejectLevels, levelWeights, outputRejectLevels ) ) diff --git a/modules/objdetect/src/latentsvm.cpp b/modules/objdetect/src/latentsvm.cpp index 521f0fdf56..5a45965e77 100644 --- a/modules/objdetect/src/latentsvm.cpp +++ b/modules/objdetect/src/latentsvm.cpp @@ -582,7 +582,6 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H, // For each component perform searching for (i = 0; i < kComponents; i++) { -#ifdef HAVE_TBB int error = searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i], b[i], maxXBorder, maxYBorder, scoreThreshold, &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), @@ -598,13 +597,6 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H, free(partsDisplacementArr); return LATENT_SVM_SEARCH_OBJECT_FAILED; } -#else - (void)numThreads; - searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i], - b[i], maxXBorder, maxYBorder, scoreThreshold, - &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), - &(scoreArr[i]), &(partsDisplacementArr[i])); -#endif estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i], filters[componentIndex]->sizeX, filters[componentIndex]->sizeY, &(oppPointsArr[i])); componentIndex += (kPartFilters[i] + 1); diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 02d7a6f620..191926ccb7 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -59,17 +59,17 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, switch (src.type()) { case CV_8U: - parallel_for(cv::BlockedRange(0, src.rows), + parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: - parallel_for(cv::BlockedRange(0, src.rows), + parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: - parallel_for(cv::BlockedRange(0, src.rows), + parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; @@ -159,19 +159,19 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds switch (srcImgs[0].type()) { case CV_8U: - parallel_for(cv::BlockedRange(0, srcImgs[0].rows), + parallel_for_(cv::Range(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: - parallel_for(cv::BlockedRange(0, srcImgs[0].rows), + parallel_for_(cv::Range(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: - parallel_for(cv::BlockedRange(0, srcImgs[0].rows), + parallel_for_(cv::Range(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index c4f13826d2..8824f17c0d 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -55,12 +55,12 @@ using namespace std; using namespace cv; template -struct FastNlMeansDenoisingInvoker { +struct FastNlMeansDenoisingInvoker : ParallelLoopBody { public: FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst, int template_window_size, int search_window_size, const float h); - void operator() (const BlockedRange& range) const; + void operator() (const Range& range) const; private: void operator= (const FastNlMeansDenoisingInvoker&); @@ -156,9 +156,9 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( } template -void FastNlMeansDenoisingInvoker::operator() (const BlockedRange& range) const { - int row_from = range.begin(); - int row_to = range.end() - 1; +void FastNlMeansDenoisingInvoker::operator() (const Range& range) const { + int row_from = range.start; + int row_to = range.end - 1; Array2d dist_sums(search_window_size_, search_window_size_); diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index 2ae5054e00..8b32eded18 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -55,13 +55,13 @@ using namespace std; using namespace cv; template -struct FastNlMeansMultiDenoisingInvoker { +struct FastNlMeansMultiDenoisingInvoker : ParallelLoopBody { public: FastNlMeansMultiDenoisingInvoker( const std::vector& srcImgs, int imgToDenoiseIndex, int temporalWindowSize, Mat& dst, int template_window_size, int search_window_size, const float h); - void operator() (const BlockedRange& range) const; + void operator() (const Range& range) const; private: void operator= (const FastNlMeansMultiDenoisingInvoker&); @@ -175,9 +175,9 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( } template -void FastNlMeansMultiDenoisingInvoker::operator() (const BlockedRange& range) const { - int row_from = range.begin(); - int row_to = range.end() - 1; +void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const { + int row_from = range.start; + int row_to = range.end - 1; Array3d dist_sums(temporal_window_size_, search_window_size_, search_window_size_); diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index 9bab58c52f..b5bd8ad4d1 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -66,7 +66,7 @@ struct DistIdxPair }; -struct MatchPairsBody +struct MatchPairsBody : ParallelLoopBody { MatchPairsBody(const MatchPairsBody& other) : matcher(other.matcher), features(other.features), @@ -77,10 +77,10 @@ struct MatchPairsBody : matcher(_matcher), features(_features), pairwise_matches(_pairwise_matches), near_pairs(_near_pairs) {} - void operator ()(const BlockedRange &r) const + void operator ()(const Range &r) const { const int num_images = static_cast(features.size()); - for (int i = r.begin(); i < r.end(); ++i) + for (int i = r.start; i < r.end; ++i) { int from = near_pairs[i].first; int to = near_pairs[i].second; @@ -526,9 +526,9 @@ void FeaturesMatcher::operator ()(const vector &features, vector< MatchPairsBody body(*this, features, pairwise_matches, near_pairs); if (is_thread_safe_) - parallel_for(BlockedRange(0, static_cast(near_pairs.size())), body); + parallel_for_(Range(0, static_cast(near_pairs.size())), body); else - body(BlockedRange(0, static_cast(near_pairs.size()))); + body(Range(0, static_cast(near_pairs.size()))); LOGLN_CHAT(""); } diff --git a/modules/video/src/bgfg_gaussmix2.cpp b/modules/video/src/bgfg_gaussmix2.cpp index e532af2ae6..6bbb960482 100644 --- a/modules/video/src/bgfg_gaussmix2.cpp +++ b/modules/video/src/bgfg_gaussmix2.cpp @@ -248,7 +248,7 @@ detectShadowGMM(const float* data, int nchannels, int nmodes, //IEEE Trans. on Pattern Analysis and Machine Intelligence, vol.26, no.5, pages 651-656, 2004 //http://www.zoranz.net/Publications/zivkovic2004PAMI.pdf -struct MOG2Invoker +struct MOG2Invoker : ParallelLoopBody { MOG2Invoker(const Mat& _src, Mat& _dst, GMM* _gmm, float* _mean, @@ -280,9 +280,9 @@ struct MOG2Invoker cvtfunc = src->depth() != CV_32F ? getConvertFunc(src->depth(), CV_32F) : 0; } - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int y0 = range.begin(), y1 = range.end(); + int y0 = range.start, y1 = range.end; int ncols = src->cols, nchannels = src->channels(); AutoBuffer buf(src->cols*nchannels); float alpha1 = 1.f - alphaT; @@ -562,15 +562,15 @@ void BackgroundSubtractorMOG2::operator()(InputArray _image, OutputArray _fgmask learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./min( 2*nframes, history ); CV_Assert(learningRate >= 0); - parallel_for(BlockedRange(0, image.rows), - MOG2Invoker(image, fgmask, - (GMM*)bgmodel.data, - (float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols), - bgmodelUsedModes.data, nmixtures, (float)learningRate, - (float)varThreshold, - backgroundRatio, varThresholdGen, - fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau, - bShadowDetection, nShadowDetection)); + parallel_for_(Range(0, image.rows), + MOG2Invoker(image, fgmask, + (GMM*)bgmodel.data, + (float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols), + bgmodelUsedModes.data, nmixtures, (float)learningRate, + (float)varThreshold, + backgroundRatio, varThresholdGen, + fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau, + bShadowDetection, nShadowDetection)); } void BackgroundSubtractorMOG2::getBackgroundImage(OutputArray backgroundImage) const diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index 9e47eb8029..291cb86a26 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -156,7 +156,7 @@ cv::detail::LKTrackerInvoker::LKTrackerInvoker( minEigThreshold = _minEigThreshold; } -void cv::detail::LKTrackerInvoker::operator()(const BlockedRange& range) const +void cv::detail::LKTrackerInvoker::operator()(const Range& range) const { Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f); const Mat& I = *prevImg; @@ -170,7 +170,7 @@ void cv::detail::LKTrackerInvoker::operator()(const BlockedRange& range) const Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf); Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn); - for( int ptidx = range.begin(); ptidx < range.end(); ptidx++ ) + for( int ptidx = range.start; ptidx < range.end; ptidx++ ) { Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level)); Point2f nextPt; @@ -733,11 +733,11 @@ void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg, typedef cv::detail::LKTrackerInvoker LKTrackerInvoker; #endif - parallel_for(BlockedRange(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI, - nextPyr[level * lvlStep2], prevPts, nextPts, - status, err, - winSize, criteria, level, maxLevel, - flags, (float)minEigThreshold)); + parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI, + nextPyr[level * lvlStep2], prevPts, nextPts, + status, err, + winSize, criteria, level, maxLevel, + flags, (float)minEigThreshold)); } } diff --git a/modules/video/src/lkpyramid.hpp b/modules/video/src/lkpyramid.hpp index 390e46bf99..4aff37ef84 100644 --- a/modules/video/src/lkpyramid.hpp +++ b/modules/video/src/lkpyramid.hpp @@ -7,7 +7,7 @@ namespace detail typedef short deriv_type; - struct LKTrackerInvoker + struct LKTrackerInvoker : ParallelLoopBody { LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg, const Point2f* _prevPts, Point2f* _nextPts, @@ -15,7 +15,7 @@ namespace detail Size _winSize, TermCriteria _criteria, int _level, int _maxLevel, int _flags, float _minEigThreshold ); - void operator()(const BlockedRange& range) const; + void operator()(const Range& range) const; const Mat* prevImg; const Mat* nextImg; From f90fd5b0da289759a29e3129242a20c67922443f Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Thu, 30 May 2013 19:05:59 +0400 Subject: [PATCH 2/2] Split CLAHE into its own file, because it's faster that way. Yes, it's as ludicrous as it sounds, but it's still true. Bizarrely, the previous commit makes CLAHE run about 10% slower on Android, even though it doesn't even touch any CLAHE code. Splitting it off fixes that, although the reason it does is a mystery for the ages. It's cleaner when it's in its own file, anyway. ;=] --- modules/imgproc/src/clahe.cpp | 334 ++++++++++++++++++++++++++++++ modules/imgproc/src/histogram.cpp | 292 -------------------------- 2 files changed, 334 insertions(+), 292 deletions(-) create mode 100644 modules/imgproc/src/clahe.cpp diff --git a/modules/imgproc/src/clahe.cpp b/modules/imgproc/src/clahe.cpp new file mode 100644 index 0000000000..4ce479713e --- /dev/null +++ b/modules/imgproc/src/clahe.cpp @@ -0,0 +1,334 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, NVIDIA Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the copyright holders or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +// ---------------------------------------------------------------------- +// CLAHE + +namespace +{ + class CLAHE_CalcLut_Body : public cv::ParallelLoopBody + { + public: + CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) : + src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale) + { + } + + void operator ()(const cv::Range& range) const; + + private: + cv::Mat src_; + mutable cv::Mat lut_; + + cv::Size tileSize_; + int tilesX_; + int tilesY_; + int clipLimit_; + float lutScale_; + }; + + void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const + { + const int histSize = 256; + + uchar* tileLut = lut_.ptr(range.start); + const size_t lut_step = lut_.step; + + for (int k = range.start; k < range.end; ++k, tileLut += lut_step) + { + const int ty = k / tilesX_; + const int tx = k % tilesX_; + + // retrieve tile submatrix + + cv::Rect tileROI; + tileROI.x = tx * tileSize_.width; + tileROI.y = ty * tileSize_.height; + tileROI.width = tileSize_.width; + tileROI.height = tileSize_.height; + + const cv::Mat tile = src_(tileROI); + + // calc histogram + + int tileHist[histSize] = {0, }; + + int height = tileROI.height; + const size_t sstep = tile.step; + for (const uchar* ptr = tile.ptr(0); height--; ptr += sstep) + { + int x = 0; + for (; x <= tileROI.width - 4; x += 4) + { + int t0 = ptr[x], t1 = ptr[x+1]; + tileHist[t0]++; tileHist[t1]++; + t0 = ptr[x+2]; t1 = ptr[x+3]; + tileHist[t0]++; tileHist[t1]++; + } + + for (; x < tileROI.width; ++x) + tileHist[ptr[x]]++; + } + + // clip histogram + + if (clipLimit_ > 0) + { + // how many pixels were clipped + int clipped = 0; + for (int i = 0; i < histSize; ++i) + { + if (tileHist[i] > clipLimit_) + { + clipped += tileHist[i] - clipLimit_; + tileHist[i] = clipLimit_; + } + } + + // redistribute clipped pixels + int redistBatch = clipped / histSize; + int residual = clipped - redistBatch * histSize; + + for (int i = 0; i < histSize; ++i) + tileHist[i] += redistBatch; + + for (int i = 0; i < residual; ++i) + tileHist[i]++; + } + + // calc Lut + + int sum = 0; + for (int i = 0; i < histSize; ++i) + { + sum += tileHist[i]; + tileLut[i] = cv::saturate_cast(sum * lutScale_); + } + } + } + + class CLAHE_Interpolation_Body : public cv::ParallelLoopBody + { + public: + CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) : + src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY) + { + } + + void operator ()(const cv::Range& range) const; + + private: + cv::Mat src_; + mutable cv::Mat dst_; + cv::Mat lut_; + + cv::Size tileSize_; + int tilesX_; + int tilesY_; + }; + + void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const + { + const size_t lut_step = lut_.step; + + for (int y = range.start; y < range.end; ++y) + { + const uchar* srcRow = src_.ptr(y); + uchar* dstRow = dst_.ptr(y); + + const float tyf = (static_cast(y) / tileSize_.height) - 0.5f; + + int ty1 = cvFloor(tyf); + int ty2 = ty1 + 1; + + const float ya = tyf - ty1; + + ty1 = std::max(ty1, 0); + ty2 = std::min(ty2, tilesY_ - 1); + + const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_); + const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_); + + for (int x = 0; x < src_.cols; ++x) + { + const float txf = (static_cast(x) / tileSize_.width) - 0.5f; + + int tx1 = cvFloor(txf); + int tx2 = tx1 + 1; + + const float xa = txf - tx1; + + tx1 = std::max(tx1, 0); + tx2 = std::min(tx2, tilesX_ - 1); + + const int srcVal = srcRow[x]; + + const size_t ind1 = tx1 * lut_step + srcVal; + const size_t ind2 = tx2 * lut_step + srcVal; + + float res = 0; + + res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya)); + res += lutPlane1[ind2] * ((xa) * (1.0f - ya)); + res += lutPlane2[ind1] * ((1.0f - xa) * (ya)); + res += lutPlane2[ind2] * ((xa) * (ya)); + + dstRow[x] = cv::saturate_cast(res); + } + } + } + + class CLAHE_Impl : public cv::CLAHE + { + public: + CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8); + + cv::AlgorithmInfo* info() const; + + void apply(cv::InputArray src, cv::OutputArray dst); + + void setClipLimit(double clipLimit); + double getClipLimit() const; + + void setTilesGridSize(cv::Size tileGridSize); + cv::Size getTilesGridSize() const; + + void collectGarbage(); + + private: + double clipLimit_; + int tilesX_; + int tilesY_; + + cv::Mat srcExt_; + cv::Mat lut_; + }; + + CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) : + clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY) + { + } + + CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE", + obj.info()->addParam(obj, "clipLimit", obj.clipLimit_); + obj.info()->addParam(obj, "tilesX", obj.tilesX_); + obj.info()->addParam(obj, "tilesY", obj.tilesY_)) + + void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst) + { + cv::Mat src = _src.getMat(); + + CV_Assert( src.type() == CV_8UC1 ); + + _dst.create( src.size(), src.type() ); + cv::Mat dst = _dst.getMat(); + + const int histSize = 256; + + lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1); + + cv::Size tileSize; + cv::Mat srcForLut; + + if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0) + { + tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_); + srcForLut = src; + } + else + { + cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101); + + tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_); + srcForLut = srcExt_; + } + + const int tileSizeTotal = tileSize.area(); + const float lutScale = static_cast(histSize - 1) / tileSizeTotal; + + int clipLimit = 0; + if (clipLimit_ > 0.0) + { + clipLimit = static_cast(clipLimit_ * tileSizeTotal / histSize); + clipLimit = std::max(clipLimit, 1); + } + + CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale); + cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody); + + CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_); + cv::parallel_for_(cv::Range(0, src.rows), interpolationBody); + } + + void CLAHE_Impl::setClipLimit(double clipLimit) + { + clipLimit_ = clipLimit; + } + + double CLAHE_Impl::getClipLimit() const + { + return clipLimit_; + } + + void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize) + { + tilesX_ = tileGridSize.width; + tilesY_ = tileGridSize.height; + } + + cv::Size CLAHE_Impl::getTilesGridSize() const + { + return cv::Size(tilesX_, tilesY_); + } + + void CLAHE_Impl::collectGarbage() + { + srcExt_.release(); + lut_.release(); + } +} + +cv::Ptr cv::createCLAHE(double clipLimit, cv::Size tileGridSize) +{ + return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height); +} diff --git a/modules/imgproc/src/histogram.cpp b/modules/imgproc/src/histogram.cpp index 5ca6c9d15b..bfcdee515f 100644 --- a/modules/imgproc/src/histogram.cpp +++ b/modules/imgproc/src/histogram.cpp @@ -3165,298 +3165,6 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst ) lutBody(heightRange); } -// ---------------------------------------------------------------------- -// CLAHE - -namespace -{ - class CLAHE_CalcLut_Body : public cv::ParallelLoopBody - { - public: - CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) : - src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale) - { - } - - void operator ()(const cv::Range& range) const; - - private: - cv::Mat src_; - mutable cv::Mat lut_; - - cv::Size tileSize_; - int tilesX_; - int tilesY_; - int clipLimit_; - float lutScale_; - }; - - void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const - { - const int histSize = 256; - - uchar* tileLut = lut_.ptr(range.start); - const size_t lut_step = lut_.step; - - for (int k = range.start; k < range.end; ++k, tileLut += lut_step) - { - const int ty = k / tilesX_; - const int tx = k % tilesX_; - - // retrieve tile submatrix - - cv::Rect tileROI; - tileROI.x = tx * tileSize_.width; - tileROI.y = ty * tileSize_.height; - tileROI.width = tileSize_.width; - tileROI.height = tileSize_.height; - - const cv::Mat tile = src_(tileROI); - - // calc histogram - - int tileHist[histSize] = {0, }; - - int height = tileROI.height; - const size_t sstep = tile.step; - for (const uchar* ptr = tile.ptr(0); height--; ptr += sstep) - { - int x = 0; - for (; x <= tileROI.width - 4; x += 4) - { - int t0 = ptr[x], t1 = ptr[x+1]; - tileHist[t0]++; tileHist[t1]++; - t0 = ptr[x+2]; t1 = ptr[x+3]; - tileHist[t0]++; tileHist[t1]++; - } - - for (; x < tileROI.width; ++x) - tileHist[ptr[x]]++; - } - - // clip histogram - - if (clipLimit_ > 0) - { - // how many pixels were clipped - int clipped = 0; - for (int i = 0; i < histSize; ++i) - { - if (tileHist[i] > clipLimit_) - { - clipped += tileHist[i] - clipLimit_; - tileHist[i] = clipLimit_; - } - } - - // redistribute clipped pixels - int redistBatch = clipped / histSize; - int residual = clipped - redistBatch * histSize; - - for (int i = 0; i < histSize; ++i) - tileHist[i] += redistBatch; - - for (int i = 0; i < residual; ++i) - tileHist[i]++; - } - - // calc Lut - - int sum = 0; - for (int i = 0; i < histSize; ++i) - { - sum += tileHist[i]; - tileLut[i] = cv::saturate_cast(sum * lutScale_); - } - } - } - - class CLAHE_Interpolation_Body : public cv::ParallelLoopBody - { - public: - CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) : - src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY) - { - } - - void operator ()(const cv::Range& range) const; - - private: - cv::Mat src_; - mutable cv::Mat dst_; - cv::Mat lut_; - - cv::Size tileSize_; - int tilesX_; - int tilesY_; - }; - - void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const - { - const size_t lut_step = lut_.step; - - for (int y = range.start; y < range.end; ++y) - { - const uchar* srcRow = src_.ptr(y); - uchar* dstRow = dst_.ptr(y); - - const float tyf = (static_cast(y) / tileSize_.height) - 0.5f; - - int ty1 = cvFloor(tyf); - int ty2 = ty1 + 1; - - const float ya = tyf - ty1; - - ty1 = std::max(ty1, 0); - ty2 = std::min(ty2, tilesY_ - 1); - - const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_); - const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_); - - for (int x = 0; x < src_.cols; ++x) - { - const float txf = (static_cast(x) / tileSize_.width) - 0.5f; - - int tx1 = cvFloor(txf); - int tx2 = tx1 + 1; - - const float xa = txf - tx1; - - tx1 = std::max(tx1, 0); - tx2 = std::min(tx2, tilesX_ - 1); - - const int srcVal = srcRow[x]; - - const size_t ind1 = tx1 * lut_step + srcVal; - const size_t ind2 = tx2 * lut_step + srcVal; - - float res = 0; - - res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya)); - res += lutPlane1[ind2] * ((xa) * (1.0f - ya)); - res += lutPlane2[ind1] * ((1.0f - xa) * (ya)); - res += lutPlane2[ind2] * ((xa) * (ya)); - - dstRow[x] = cv::saturate_cast(res); - } - } - } - - class CLAHE_Impl : public cv::CLAHE - { - public: - CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8); - - cv::AlgorithmInfo* info() const; - - void apply(cv::InputArray src, cv::OutputArray dst); - - void setClipLimit(double clipLimit); - double getClipLimit() const; - - void setTilesGridSize(cv::Size tileGridSize); - cv::Size getTilesGridSize() const; - - void collectGarbage(); - - private: - double clipLimit_; - int tilesX_; - int tilesY_; - - cv::Mat srcExt_; - cv::Mat lut_; - }; - - CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) : - clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY) - { - } - - CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE", - obj.info()->addParam(obj, "clipLimit", obj.clipLimit_); - obj.info()->addParam(obj, "tilesX", obj.tilesX_); - obj.info()->addParam(obj, "tilesY", obj.tilesY_)) - - void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst) - { - cv::Mat src = _src.getMat(); - - CV_Assert( src.type() == CV_8UC1 ); - - _dst.create( src.size(), src.type() ); - cv::Mat dst = _dst.getMat(); - - const int histSize = 256; - - lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1); - - cv::Size tileSize; - cv::Mat srcForLut; - - if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0) - { - tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_); - srcForLut = src; - } - else - { - cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101); - - tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_); - srcForLut = srcExt_; - } - - const int tileSizeTotal = tileSize.area(); - const float lutScale = static_cast(histSize - 1) / tileSizeTotal; - - int clipLimit = 0; - if (clipLimit_ > 0.0) - { - clipLimit = static_cast(clipLimit_ * tileSizeTotal / histSize); - clipLimit = std::max(clipLimit, 1); - } - - CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale); - cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody); - - CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_); - cv::parallel_for_(cv::Range(0, src.rows), interpolationBody); - } - - void CLAHE_Impl::setClipLimit(double clipLimit) - { - clipLimit_ = clipLimit; - } - - double CLAHE_Impl::getClipLimit() const - { - return clipLimit_; - } - - void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize) - { - tilesX_ = tileGridSize.width; - tilesY_ = tileGridSize.height; - } - - cv::Size CLAHE_Impl::getTilesGridSize() const - { - return cv::Size(tilesX_, tilesY_); - } - - void CLAHE_Impl::collectGarbage() - { - srcExt_.release(); - lut_.release(); - } -} - -cv::Ptr cv::createCLAHE(double clipLimit, cv::Size tileGridSize) -{ - return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height); -} - // ---------------------------------------------------------------------- /* Implementation of RTTI and Generic Functions for CvHistogram */