From 9b55c04e7594a536de7e1a93259c0b4b02793d12 Mon Sep 17 00:00:00 2001 From: Muresan Mircea Paul Date: Wed, 10 Jun 2015 20:23:32 +0300 Subject: [PATCH] I have put all the modules in the stereo namespace changed the ptr to ptr modified the documentation modified documentation for the stereo_c documentation doc fixed two issues modfified the precomp.hpp header by explicitly adding the cvdef header from core modified comments for documentation for stereo and removed some headers added a header and modified some function definition test test 2 changed exports_w to exports removed the correct matches module --- modules/stereo/include/opencv2/stereo.hpp | 420 ++-- .../stereo/include/opencv2/stereo/stereo.hpp | 1 + .../stereo/include/opencv2/stereo/stereo_c.h | 16 +- modules/stereo/src/compat_binary_stereo.cpp | 11 +- modules/stereo/src/precomp.hpp | 3 +- modules/stereo/src/stereo_binary_bm.cpp | 1069 ++++---- modules/stereo/src/stereo_binary_sgbm.cpp | 2239 +++++++++-------- modules/stereo/test/test_precomp.hpp | 15 +- 8 files changed, 1901 insertions(+), 1873 deletions(-) diff --git a/modules/stereo/include/opencv2/stereo.hpp b/modules/stereo/include/opencv2/stereo.hpp index 00750efe5..67b4873f8 100644 --- a/modules/stereo/include/opencv2/stereo.hpp +++ b/modules/stereo/include/opencv2/stereo.hpp @@ -47,218 +47,232 @@ #include "opencv2/core.hpp" #include "opencv2/features2d.hpp" #include "opencv2/core/affine.hpp" +#include "opencv2/core/cvdef.h" -namespace cv -{ -CV_EXPORTS_W void correctMatches( InputArray F, InputArray points1, InputArray points2, - OutputArray newPoints1, OutputArray newPoints2 ); - -/** @brief Filters off small noise blobs (speckles) in the disparity map - -@param img The input 16-bit signed disparity image -@param newVal The disparity value used to paint-off the speckles -@param maxSpeckleSize The maximum speckle size to consider it a speckle. Larger blobs are not -affected by the algorithm -@param maxDiff Maximum difference between neighbor disparity pixels to put them into the same -blob. Note that since StereoBM, StereoSGBM and may be other algorithms return a fixed-point -disparity map, where disparity values are multiplied by 16, this scale factor should be taken into -account when specifying this parameter value. -@param buf The optional temporary buffer to avoid memory allocation within the function. - */ -CV_EXPORTS_W void filterSpeckles( InputOutputArray img, double newVal, - int maxSpeckleSize, double maxDiff, - InputOutputArray buf = noArray() ); -//! computes valid disparity ROI from the valid ROIs of the rectified images (that are returned by cv::stereoRectify()) -CV_EXPORTS_W Rect getValidDisparityROI( Rect roi1, Rect roi2, - int minDisparity, int numberOfDisparities, - int SADWindowSize ); +/** +@defgroup stereo Stereo Correspondance Algorithms -//! validates disparity using the left-right check. The matrix "cost" should be computed by the stereo correspondence algorithm -CV_EXPORTS_W void validateDisparity( InputOutputArray disparity, InputArray cost, - int minDisparity, int numberOfDisparities, - int disp12MaxDisp = 1 ); - -/** @brief The base class for stereo correspondence algorithms. */ -class CV_EXPORTS_W StereoMatcher : public Algorithm -{ -public: - enum { DISP_SHIFT = 4, - DISP_SCALE = (1 << DISP_SHIFT) - }; - - /** @brief Computes disparity map for the specified stereo pair - - @param left Left 8-bit single-channel image. - @param right Right image of the same size and the same type as the left one. - @param disparity Output disparity map. It has the same size as the input images. Some algorithms, - like StereoBM or StereoSGBM compute 16-bit fixed-point disparity map (where each disparity value - has 4 fractional bits), whereas other algorithms output 32-bit floating-point disparity map. - */ - CV_WRAP virtual void compute( InputArray left, InputArray right, - OutputArray disparity ) = 0; - - CV_WRAP virtual int getMinDisparity() const = 0; - CV_WRAP virtual void setMinDisparity(int minDisparity) = 0; - - CV_WRAP virtual int getNumDisparities() const = 0; - CV_WRAP virtual void setNumDisparities(int numDisparities) = 0; - - CV_WRAP virtual int getBlockSize() const = 0; - CV_WRAP virtual void setBlockSize(int blockSize) = 0; - CV_WRAP virtual int getSpeckleWindowSize() const = 0; - CV_WRAP virtual void setSpeckleWindowSize(int speckleWindowSize) = 0; - - CV_WRAP virtual int getSpeckleRange() const = 0; - CV_WRAP virtual void setSpeckleRange(int speckleRange) = 0; - - CV_WRAP virtual int getDisp12MaxDiff() const = 0; - CV_WRAP virtual void setDisp12MaxDiff(int disp12MaxDiff) = 0; -}; - - -/** @brief Class for computing stereo correspondence using the block matching algorithm, introduced and -contributed to OpenCV by K. Konolige. - */ -class CV_EXPORTS_W StereoBinaryBM : public StereoMatcher -{ -public: - enum { PREFILTER_NORMALIZED_RESPONSE = 0, - PREFILTER_XSOBEL = 1 - }; - - CV_WRAP virtual int getPreFilterType() const = 0; - CV_WRAP virtual void setPreFilterType(int preFilterType) = 0; - - CV_WRAP virtual int getPreFilterSize() const = 0; - CV_WRAP virtual void setPreFilterSize(int preFilterSize) = 0; - - CV_WRAP virtual int getPreFilterCap() const = 0; - CV_WRAP virtual void setPreFilterCap(int preFilterCap) = 0; - - CV_WRAP virtual int getTextureThreshold() const = 0; - CV_WRAP virtual void setTextureThreshold(int textureThreshold) = 0; - - CV_WRAP virtual int getUniquenessRatio() const = 0; - CV_WRAP virtual void setUniquenessRatio(int uniquenessRatio) = 0; - - CV_WRAP virtual int getSmallerBlockSize() const = 0; - CV_WRAP virtual void setSmallerBlockSize(int blockSize) = 0; - - CV_WRAP virtual Rect getROI1() const = 0; - CV_WRAP virtual void setROI1(Rect roi1) = 0; - - CV_WRAP virtual Rect getROI2() const = 0; - CV_WRAP virtual void setROI2(Rect roi2) = 0; - - /** @brief Creates StereoBM object - - @param numDisparities the disparity search range. For each pixel algorithm will find the best - disparity from 0 (default minimum disparity) to numDisparities. The search range can then be - shifted by changing the minimum disparity. - @param blockSize the linear size of the blocks compared by the algorithm. The size should be odd - (as the block is centered at the current pixel). Larger block size implies smoother, though less - accurate disparity map. Smaller block size gives more detailed disparity map, but there is higher - chance for algorithm to find a wrong correspondence. - - The function create StereoBM object. You can then call StereoBM::compute() to compute disparity for - a specific stereo pair. - */ - CV_WRAP static Ptr create(int numDisparities = 0, int blockSize = 21); -}; - -/** @brief The class implements the modified H. Hirschmuller algorithm @cite HH08 that differs from the original -one as follows: - -- By default, the algorithm is single-pass, which means that you consider only 5 directions -instead of 8. Set mode=StereoSGBM::MODE_HH in createStereoSGBM to run the full variant of the -algorithm but beware that it may consume a lot of memory. -- The algorithm matches blocks, not individual pixels. Though, setting blockSize=1 reduces the -blocks to single pixels. -- Mutual information cost function is not implemented. Instead, a simpler Birchfield-Tomasi -sub-pixel metric from @cite BT98 is used. Though, the color images are supported as well. -- Some pre- and post- processing steps from K. Konolige algorithm StereoBM are included, for -example: pre-filtering (StereoBM::PREFILTER_XSOBEL type) and post-filtering (uniqueness -check, quadratic interpolation and speckle filtering). - -@note - - (Python) An example illustrating the use of the StereoSGBM matching algorithm can be found - at opencv_source_code/samples/python2/stereo_match.py - */ -class CV_EXPORTS_W StereoBinarySGBM : public StereoMatcher +namespace cv { -public: - enum - { - MODE_SGBM = 0, - MODE_HH = 1 - }; - - CV_WRAP virtual int getPreFilterCap() const = 0; - CV_WRAP virtual void setPreFilterCap(int preFilterCap) = 0; - - CV_WRAP virtual int getUniquenessRatio() const = 0; - CV_WRAP virtual void setUniquenessRatio(int uniquenessRatio) = 0; - - CV_WRAP virtual int getP1() const = 0; - CV_WRAP virtual void setP1(int P1) = 0; - - CV_WRAP virtual int getP2() const = 0; - CV_WRAP virtual void setP2(int P2) = 0; - - CV_WRAP virtual int getMode() const = 0; - CV_WRAP virtual void setMode(int mode) = 0; - - /** @brief Creates StereoSGBM object - - @param minDisparity Minimum possible disparity value. Normally, it is zero but sometimes - rectification algorithms can shift images, so this parameter needs to be adjusted accordingly. - @param numDisparities Maximum disparity minus minimum disparity. The value is always greater than - zero. In the current implementation, this parameter must be divisible by 16. - @param blockSize Matched block size. It must be an odd number \>=1 . Normally, it should be - somewhere in the 3..11 range. - @param P1 The first parameter controlling the disparity smoothness. See below. - @param P2 The second parameter controlling the disparity smoothness. The larger the values are, - the smoother the disparity is. P1 is the penalty on the disparity change by plus or minus 1 - between neighbor pixels. P2 is the penalty on the disparity change by more than 1 between neighbor - pixels. The algorithm requires P2 \> P1 . See stereo_match.cpp sample where some reasonably good - P1 and P2 values are shown (like 8\*number_of_image_channels\*SADWindowSize\*SADWindowSize and - 32\*number_of_image_channels\*SADWindowSize\*SADWindowSize , respectively). - @param disp12MaxDiff Maximum allowed difference (in integer pixel units) in the left-right - disparity check. Set it to a non-positive value to disable the check. - @param preFilterCap Truncation value for the prefiltered image pixels. The algorithm first - computes x-derivative at each pixel and clips its value by [-preFilterCap, preFilterCap] interval. - The result values are passed to the Birchfield-Tomasi pixel cost function. - @param uniquenessRatio Margin in percentage by which the best (minimum) computed cost function - value should "win" the second best value to consider the found match correct. Normally, a value - within the 5-15 range is good enough. - @param speckleWindowSize Maximum size of smooth disparity regions to consider their noise speckles - and invalidate. Set it to 0 to disable speckle filtering. Otherwise, set it somewhere in the - 50-200 range. - @param speckleRange Maximum disparity variation within each connected component. If you do speckle - filtering, set the parameter to a positive value, it will be implicitly multiplied by 16. - Normally, 1 or 2 is good enough. - @param mode Set it to StereoSGBM::MODE_HH to run the full-scale two-pass dynamic programming - algorithm. It will consume O(W\*H\*numDisparities) bytes, which is large for 640x480 stereo and - huge for HD-size pictures. By default, it is set to false . - - The first constructor initializes StereoSGBM with all the default parameters. So, you only have to - set StereoSGBM::numDisparities at minimum. The second constructor enables you to set each parameter - to a custom value. - */ - CV_WRAP static Ptr create(int minDisparity, int numDisparities, int blockSize, - int P1 = 0, int P2 = 0, int disp12MaxDiff = 0, - int preFilterCap = 0, int uniquenessRatio = 0, - int speckleWindowSize = 0, int speckleRange = 0, - int mode = StereoBinarySGBM::MODE_SGBM); -}; - + namespace stereo + { + +//! @addtogroup stereo +//! @{ +// void correctMatches( InputArray F, InputArray points1, InputArray points2, + // OutputArray newPoints1, OutputArray newPoints2 ); + + /** @brief Filters off small noise blobs (speckles) in the disparity map + + @param img The input 16-bit signed disparity image + @param newVal The disparity value used to paint-off the speckles + @param maxSpeckleSize The maximum speckle size to consider it a speckle. Larger blobs are not + affected by the algorithm + @param maxDiff Maximum difference between neighbor disparity pixels to put them into the same + blob. Note that since StereoBM, StereoSGBM and may be other algorithms return a fixed-point + disparity map, where disparity values are multiplied by 16, this scale factor should be taken into + account when specifying this parameter value. + @param buf The optional temporary buffer to avoid memory allocation within the function. + */ + void filterSpeckles( InputOutputArray img, double newVal, + int maxSpeckleSize, double maxDiff, + InputOutputArray buf = noArray() ); + + //! computes valid disparity ROI from the valid ROIs of the rectified images (that are returned by cv::stereoRectify()) + Rect getValidDisparityROI( Rect roi1, Rect roi2, + int minDisparity, int numberOfDisparities, + int SADWindowSize ); + + //! validates disparity using the left-right check. The matrix "cost" should be computed by the stereo correspondence algorithm + void validateDisparity( InputOutputArray disparity, InputArray cost, + int minDisparity, int numberOfDisparities, + int disp12MaxDisp = 1 ); + + /** @brief The base class for stereo correspondence algorithms. + */ + class StereoMatcher : public Algorithm + { + public: + enum { DISP_SHIFT = 4, + DISP_SCALE = (1 << DISP_SHIFT) + }; + + /** @brief Computes disparity map for the specified stereo pair + + @param left Left 8-bit single-channel image. + @param right Right image of the same size and the same type as the left one. + @param disparity Output disparity map. It has the same size as the input images. Some algorithms, + like StereoBM or StereoSGBM compute 16-bit fixed-point disparity map (where each disparity value + has 4 fractional bits), whereas other algorithms output 32-bit floating-point disparity map. + */ + virtual void compute( InputArray left, InputArray right, + OutputArray disparity ) = 0; + + virtual int getMinDisparity() const = 0; + virtual void setMinDisparity(int minDisparity) = 0; + + virtual int getNumDisparities() const = 0; + virtual void setNumDisparities(int numDisparities) = 0; + + virtual int getBlockSize() const = 0; + virtual void setBlockSize(int blockSize) = 0; + + virtual int getSpeckleWindowSize() const = 0; + virtual void setSpeckleWindowSize(int speckleWindowSize) = 0; + + virtual int getSpeckleRange() const = 0; + virtual void setSpeckleRange(int speckleRange) = 0; + + virtual int getDisp12MaxDiff() const = 0; + virtual void setDisp12MaxDiff(int disp12MaxDiff) = 0; + }; + + + /** @brief Class for computing stereo correspondence using the block matching algorithm, introduced and + contributed to OpenCV by K. Konolige. + */ + class StereoBinaryBM : public StereoMatcher + { + public: + enum { PREFILTER_NORMALIZED_RESPONSE = 0, + PREFILTER_XSOBEL = 1 + }; + + virtual int getPreFilterType() const = 0; + virtual void setPreFilterType(int preFilterType) = 0; + + virtual int getPreFilterSize() const = 0; + virtual void setPreFilterSize(int preFilterSize) = 0; + + virtual int getPreFilterCap() const = 0; + virtual void setPreFilterCap(int preFilterCap) = 0; + + virtual int getTextureThreshold() const = 0; + virtual void setTextureThreshold(int textureThreshold) = 0; + + virtual int getUniquenessRatio() const = 0; + virtual void setUniquenessRatio(int uniquenessRatio) = 0; + + virtual int getSmallerBlockSize() const = 0; + virtual void setSmallerBlockSize(int blockSize) = 0; + + virtual Rect getROI1() const = 0; + virtual void setROI1(Rect roi1) = 0; + + virtual Rect getROI2() const = 0; + virtual void setROI2(Rect roi2) = 0; + + /** @brief Creates StereoBM object + + @param numDisparities the disparity search range. For each pixel algorithm will find the best + disparity from 0 (default minimum disparity) to numDisparities. The search range can then be + shifted by changing the minimum disparity. + @param blockSize the linear size of the blocks compared by the algorithm. The size should be odd + (as the block is centered at the current pixel). Larger block size implies smoother, though less + accurate disparity map. Smaller block size gives more detailed disparity map, but there is higher + chance for algorithm to find a wrong correspondence. + + The function create StereoBM object. You can then call StereoBM::compute() to compute disparity for + a specific stereo pair. + */ + CV_EXPORTS static Ptr< cv::stereo::StereoBinaryBM > create(int numDisparities = 0, int blockSize = 21); + }; + + /** @brief The class implements the modified H. Hirschmuller algorithm @cite HH08 that differs from the original + one as follows: + + - By default, the algorithm is single-pass, which means that you consider only 5 directions + instead of 8. Set mode=StereoSGBM::MODE_HH in createStereoSGBM to run the full variant of the + algorithm but beware that it may consume a lot of memory. + - The algorithm matches blocks, not individual pixels. Though, setting blockSize=1 reduces the + blocks to single pixels. + - Mutual information cost function is not implemented. Instead, a simpler Birchfield-Tomasi + sub-pixel metric from @cite BT98 is used. Though, the color images are supported as well. + - Some pre- and post- processing steps from K. Konolige algorithm StereoBM are included, for + example: pre-filtering (StereoBM::PREFILTER_XSOBEL type) and post-filtering (uniqueness + check, quadratic interpolation and speckle filtering). + + @note + - (Python) An example illustrating the use of the StereoSGBM matching algorithm can be found + at opencv_source_code/samples/python2/stereo_match.py + */ + class StereoBinarySGBM : public StereoMatcher + { + public: + enum + { + MODE_SGBM = 0, + MODE_HH = 1 + }; + + virtual int getPreFilterCap() const = 0; + virtual void setPreFilterCap(int preFilterCap) = 0; + + virtual int getUniquenessRatio() const = 0; + virtual void setUniquenessRatio(int uniquenessRatio) = 0; + + virtual int getP1() const = 0; + virtual void setP1(int P1) = 0; + + virtual int getP2() const = 0; + virtual void setP2(int P2) = 0; + + virtual int getMode() const = 0; + virtual void setMode(int mode) = 0; + + /** @brief Creates StereoSGBM object + + @param minDisparity Minimum possible disparity value. Normally, it is zero but sometimes + rectification algorithms can shift images, so this parameter needs to be adjusted accordingly. + @param numDisparities Maximum disparity minus minimum disparity. The value is always greater than + zero. In the current implementation, this parameter must be divisible by 16. + @param blockSize Matched block size. It must be an odd number \>=1 . Normally, it should be + somewhere in the 3..11 range. + @param P1 The first parameter controlling the disparity smoothness. See below. + @param P2 The second parameter controlling the disparity smoothness. The larger the values are, + the smoother the disparity is. P1 is the penalty on the disparity change by plus or minus 1 + between neighbor pixels. P2 is the penalty on the disparity change by more than 1 between neighbor + pixels. The algorithm requires P2 \> P1 . See stereo_match.cpp sample where some reasonably good + P1 and P2 values are shown (like 8\*number_of_image_channels\*SADWindowSize\*SADWindowSize and + 32\*number_of_image_channels\*SADWindowSize\*SADWindowSize , respectively). + @param disp12MaxDiff Maximum allowed difference (in integer pixel units) in the left-right + disparity check. Set it to a non-positive value to disable the check. + @param preFilterCap Truncation value for the prefiltered image pixels. The algorithm first + computes x-derivative at each pixel and clips its value by [-preFilterCap, preFilterCap] interval. + The result values are passed to the Birchfield-Tomasi pixel cost function. + @param uniquenessRatio Margin in percentage by which the best (minimum) computed cost function + value should "win" the second best value to consider the found match correct. Normally, a value + within the 5-15 range is good enough. + @param speckleWindowSize Maximum size of smooth disparity regions to consider their noise speckles + and invalidate. Set it to 0 to disable speckle filtering. Otherwise, set it somewhere in the + 50-200 range. + @param speckleRange Maximum disparity variation within each connected component. If you do speckle + filtering, set the parameter to a positive value, it will be implicitly multiplied by 16. + Normally, 1 or 2 is good enough. + @param mode Set it to StereoSGBM::MODE_HH to run the full-scale two-pass dynamic programming + algorithm. It will consume O(W\*H\*numDisparities) bytes, which is large for 640x480 stereo and + huge for HD-size pictures. By default, it is set to false . + + The first constructor initializes StereoSGBM with all the default parameters. So, you only have to + set StereoSGBM::numDisparities at minimum. The second constructor enables you to set each parameter + to a custom value. + */ + CV_EXPORTS static Ptr create(int minDisparity, int numDisparities, int blockSize, + int P1 = 0, int P2 = 0, int disp12MaxDiff = 0, + int preFilterCap = 0, int uniquenessRatio = 0, + int speckleWindowSize = 0, int speckleRange = 0, + int mode = StereoBinarySGBM::MODE_SGBM); + }; + //! @} + }//sterep } // cv #ifndef DISABLE_OPENCV_24_COMPATIBILITY -#include "opencv2/calib3d/calib3d_c.h" +#include "opencv2/stereo/stereo_c.h" #endif #endif + diff --git a/modules/stereo/include/opencv2/stereo/stereo.hpp b/modules/stereo/include/opencv2/stereo/stereo.hpp index 8ca9dedd2..bab7c41f2 100644 --- a/modules/stereo/include/opencv2/stereo/stereo.hpp +++ b/modules/stereo/include/opencv2/stereo/stereo.hpp @@ -46,3 +46,4 @@ #endif #include "opencv2/stereo.hpp" + diff --git a/modules/stereo/include/opencv2/stereo/stereo_c.h b/modules/stereo/include/opencv2/stereo/stereo_c.h index 9bbd365fa..b6d25d483 100644 --- a/modules/stereo/include/opencv2/stereo/stereo_c.h +++ b/modules/stereo/include/opencv2/stereo/stereo_c.h @@ -52,17 +52,14 @@ extern "C" { /** @addtogroup stereo_c @{ - */ + **/ -/****************************************************************************************\ -* Stereo * -\****************************************************************************************/ -/* stereo correspondence parameters and functions */ + //! stereo correspondence parameters and functions #define CV_STEREO_BM_NORMALIZED_RESPONSE 0 #define CV_STEREO_BM_XSOBEL 1 -/* Block matching algorithm structure */ +//! Block matching algorithm structure typedef struct CvStereoBinaryBMState { // pre-filtering (normalization of input images) @@ -109,14 +106,15 @@ CVAPI(void) cvReleaseStereoBinaryBMState( CvStereoBinaryBMState** state ); CVAPI(void) cvFindStereoCorrespondenceBinaryBM( const CvArr* left, const CvArr* right, CvArr* disparity, CvStereoBinaryBMState* state ); -CVAPI(CvRect) cvGetValidDisparityROI( CvRect roi1, CvRect roi2, int minDisparity, +CVAPI(CvRect) cvStereoBinaryGetValidDisparityROI( CvRect roi1, CvRect roi2, int minDisparity, int numberOfDisparities, int SADWindowSize ); -CVAPI(void) cvValidateDisparity( CvArr* disparity, const CvArr* cost, +CVAPI(void) cvStereoBinaryValidateDisparity( CvArr* disparity, const CvArr* cost, int minDisparity, int numberOfDisparities, int disp12MaxDiff CV_DEFAULT(1) ); - +/** @} stereo_c */ #ifdef __cplusplus } // extern "C" #endif #endif /* __OPENCV_STEREO_C_H__ */ + diff --git a/modules/stereo/src/compat_binary_stereo.cpp b/modules/stereo/src/compat_binary_stereo.cpp index ef7a97236..986616542 100644 --- a/modules/stereo/src/compat_binary_stereo.cpp +++ b/modules/stereo/src/compat_binary_stereo.cpp @@ -92,7 +92,7 @@ void cvFindStereoCorrespondenceBinaryBM( const CvArr* leftarr, const CvArr* righ CV_Assert( state != 0 ); - cv::Ptr sm = cv::StereoBinaryBM::create(state->numberOfDisparities, + cv::Ptr sm = cv::stereo::StereoBinaryBM::create(state->numberOfDisparities, state->SADWindowSize); sm->setPreFilterType(state->preFilterType); sm->setPreFilterSize(state->preFilterSize); @@ -108,17 +108,18 @@ void cvFindStereoCorrespondenceBinaryBM( const CvArr* leftarr, const CvArr* righ sm->compute(left, right, disp); } -CvRect cvGetValidDisparityROI( CvRect roi1, CvRect roi2, int minDisparity, +CvRect cvStereoBinaryGetValidDisparityROI( CvRect roi1, CvRect roi2, int minDisparity, int numberOfDisparities, int SADWindowSize ) { - return (CvRect)cv::getValidDisparityROI( roi1, roi2, minDisparity, + return (CvRect)cv::stereo::getValidDisparityROI( roi1, roi2, minDisparity, numberOfDisparities, SADWindowSize ); } -void cvValidateDisparity( CvArr* _disp, const CvArr* _cost, int minDisparity, +void cvStereoBinaryValidateDisparity( CvArr* _disp, const CvArr* _cost, int minDisparity, int numberOfDisparities, int disp12MaxDiff ) { cv::Mat disp = cv::cvarrToMat(_disp), cost = cv::cvarrToMat(_cost); - cv::validateDisparity( disp, cost, minDisparity, numberOfDisparities, disp12MaxDiff ); + cv::stereo::validateDisparity( disp, cost, minDisparity, numberOfDisparities, disp12MaxDiff ); } + diff --git a/modules/stereo/src/precomp.hpp b/modules/stereo/src/precomp.hpp index 28c34a1cd..b5036f095 100644 --- a/modules/stereo/src/precomp.hpp +++ b/modules/stereo/src/precomp.hpp @@ -45,9 +45,10 @@ #include "opencv2/stereo.hpp" #include "opencv2/imgproc.hpp" #include "opencv2/features2d.hpp" +#include "opencv2/core.hpp" #include "opencv2/core/utility.hpp" #include "opencv2/core/private.hpp" -#include "opencv2/core.hpp" +#include "opencv2/core/cvdef.h" #include "opencv2/highgui.hpp" #include diff --git a/modules/stereo/src/stereo_binary_bm.cpp b/modules/stereo/src/stereo_binary_bm.cpp index f9a49a42d..5f1e48e26 100644 --- a/modules/stereo/src/stereo_binary_bm.cpp +++ b/modules/stereo/src/stereo_binary_bm.cpp @@ -51,686 +51,687 @@ namespace cv { - - struct StereoBinaryBMParams + namespace stereo { - StereoBinaryBMParams(int _numDisparities = 64, int _SADWindowSize = 9) + struct StereoBinaryBMParams { - preFilterType = StereoBinaryBM::PREFILTER_XSOBEL; - preFilterSize = 9; - preFilterCap = 31; - SADWindowSize = _SADWindowSize; - minDisparity = 0; - numDisparities = _numDisparities > 0 ? _numDisparities : 64; - textureThreshold = 10; - uniquenessRatio = 15; - speckleRange = speckleWindowSize = 0; - roi1 = roi2 = Rect(0, 0, 0, 0); - disp12MaxDiff = -1; - dispType = CV_16S; - } - - int preFilterType; - int preFilterSize; - int preFilterCap; - int SADWindowSize; - int minDisparity; - int numDisparities; - int textureThreshold; - int uniquenessRatio; - int speckleRange; - int speckleWindowSize; - Rect roi1, roi2; - int disp12MaxDiff; - int dispType; - }; - - static void prefilterNorm(const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf) - { - int x, y, wsz2 = winsize / 2; - int* vsum = (int*)alignPtr(buf + (wsz2 + 1)*sizeof(vsum[0]), 32); - int scale_g = winsize*winsize / 8, scale_s = (1024 + scale_g) / (scale_g * 2); - const int OFS = 256 * 5, TABSZ = OFS * 2 + 256; - uchar tab[TABSZ]; - const uchar* sptr = src.ptr(); - int srcstep = (int)src.step; - Size size = src.size(); - - scale_g *= scale_s; - - for (x = 0; x < TABSZ; x++) - tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero * 2 : x - OFS + ftzero); - - for (x = 0; x < size.width; x++) - vsum[x] = (ushort)(sptr[x] * (wsz2 + 2)); + StereoBinaryBMParams(int _numDisparities = 64, int _SADWindowSize = 9) + { + preFilterType = StereoBinaryBM::PREFILTER_XSOBEL; + preFilterSize = 9; + preFilterCap = 31; + SADWindowSize = _SADWindowSize; + minDisparity = 0; + numDisparities = _numDisparities > 0 ? _numDisparities : 64; + textureThreshold = 10; + uniquenessRatio = 15; + speckleRange = speckleWindowSize = 0; + roi1 = roi2 = Rect(0, 0, 0, 0); + disp12MaxDiff = -1; + dispType = CV_16S; + } - for (y = 1; y < wsz2; y++) + int preFilterType; + int preFilterSize; + int preFilterCap; + int SADWindowSize; + int minDisparity; + int numDisparities; + int textureThreshold; + int uniquenessRatio; + int speckleRange; + int speckleWindowSize; + Rect roi1, roi2; + int disp12MaxDiff; + int dispType; + }; + + static void prefilterNorm(const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf) { - for (x = 0; x < size.width; x++) - vsum[x] = (ushort)(vsum[x] + sptr[srcstep*y + x]); - } + int x, y, wsz2 = winsize / 2; + int* vsum = (int*)alignPtr(buf + (wsz2 + 1)*sizeof(vsum[0]), 32); + int scale_g = winsize*winsize / 8, scale_s = (1024 + scale_g) / (scale_g * 2); + const int OFS = 256 * 5, TABSZ = OFS * 2 + 256; + uchar tab[TABSZ]; + const uchar* sptr = src.ptr(); + int srcstep = (int)src.step; + Size size = src.size(); - for (y = 0; y < size.height; y++) - { - const uchar* top = sptr + srcstep*MAX(y - wsz2 - 1, 0); - const uchar* bottom = sptr + srcstep*MIN(y + wsz2, size.height - 1); - const uchar* prev = sptr + srcstep*MAX(y - 1, 0); - const uchar* curr = sptr + srcstep*y; - const uchar* next = sptr + srcstep*MIN(y + 1, size.height - 1); - uchar* dptr = dst.ptr(y); + scale_g *= scale_s; + + for (x = 0; x < TABSZ; x++) + tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero * 2 : x - OFS + ftzero); for (x = 0; x < size.width; x++) - vsum[x] = (ushort)(vsum[x] + bottom[x] - top[x]); + vsum[x] = (ushort)(sptr[x] * (wsz2 + 2)); - for (x = 0; x <= wsz2; x++) + for (y = 1; y < wsz2; y++) { - vsum[-x - 1] = vsum[0]; - vsum[size.width + x] = vsum[size.width - 1]; + for (x = 0; x < size.width; x++) + vsum[x] = (ushort)(vsum[x] + sptr[srcstep*y + x]); } - int sum = vsum[0] * (wsz2 + 1); - for (x = 1; x <= wsz2; x++) - sum += vsum[x]; + for (y = 0; y < size.height; y++) + { + const uchar* top = sptr + srcstep*MAX(y - wsz2 - 1, 0); + const uchar* bottom = sptr + srcstep*MIN(y + wsz2, size.height - 1); + const uchar* prev = sptr + srcstep*MAX(y - 1, 0); + const uchar* curr = sptr + srcstep*y; + const uchar* next = sptr + srcstep*MIN(y + 1, size.height - 1); + uchar* dptr = dst.ptr(y); - int val = ((curr[0] * 5 + curr[1] + prev[0] + next[0])*scale_g - sum*scale_s) >> 10; - dptr[0] = tab[val + OFS]; + for (x = 0; x < size.width; x++) + vsum[x] = (ushort)(vsum[x] + bottom[x] - top[x]); + + for (x = 0; x <= wsz2; x++) + { + vsum[-x - 1] = vsum[0]; + vsum[size.width + x] = vsum[size.width - 1]; + } + + int sum = vsum[0] * (wsz2 + 1); + for (x = 1; x <= wsz2; x++) + sum += vsum[x]; + + int val = ((curr[0] * 5 + curr[1] + prev[0] + next[0])*scale_g - sum*scale_s) >> 10; + dptr[0] = tab[val + OFS]; + + for (x = 1; x < size.width - 1; x++) + { + sum += vsum[x + wsz2] - vsum[x - wsz2 - 1]; + val = ((curr[x] * 4 + curr[x - 1] + curr[x + 1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10; + dptr[x] = tab[val + OFS]; + } - for (x = 1; x < size.width - 1; x++) - { sum += vsum[x + wsz2] - vsum[x - wsz2 - 1]; - val = ((curr[x] * 4 + curr[x - 1] + curr[x + 1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10; + val = ((curr[x] * 5 + curr[x - 1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10; dptr[x] = tab[val + OFS]; } - - sum += vsum[x + wsz2] - vsum[x - wsz2 - 1]; - val = ((curr[x] * 5 + curr[x - 1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10; - dptr[x] = tab[val + OFS]; } - } - static void - prefilterXSobel(const Mat& src, Mat& dst, int ftzero) - { - int x, y; - const int OFS = 256 * 4, TABSZ = OFS * 2 + 256; - uchar tab[TABSZ]; - Size size = src.size(); + static void + prefilterXSobel(const Mat& src, Mat& dst, int ftzero) + { + int x, y; + const int OFS = 256 * 4, TABSZ = OFS * 2 + 256; + uchar tab[TABSZ]; + Size size = src.size(); - for (x = 0; x < TABSZ; x++) - tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero * 2 : x - OFS + ftzero); - uchar val0 = tab[0 + OFS]; + for (x = 0; x < TABSZ; x++) + tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero * 2 : x - OFS + ftzero); + uchar val0 = tab[0 + OFS]; #if CV_SSE2 - volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2); + volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2); #endif - for (y = 0; y < size.height - 1; y += 2) - { - const uchar* srow1 = src.ptr(y); - const uchar* srow0 = y > 0 ? srow1 - src.step : size.height > 1 ? srow1 + src.step : srow1; - const uchar* srow2 = y < size.height - 1 ? srow1 + src.step : size.height > 1 ? srow1 - src.step : srow1; - const uchar* srow3 = y < size.height - 2 ? srow1 + src.step * 2 : srow1; - uchar* dptr0 = dst.ptr(y); - uchar* dptr1 = dptr0 + dst.step; + for (y = 0; y < size.height - 1; y += 2) + { + const uchar* srow1 = src.ptr(y); + const uchar* srow0 = y > 0 ? srow1 - src.step : size.height > 1 ? srow1 + src.step : srow1; + const uchar* srow2 = y < size.height - 1 ? srow1 + src.step : size.height > 1 ? srow1 - src.step : srow1; + const uchar* srow3 = y < size.height - 2 ? srow1 + src.step * 2 : srow1; + uchar* dptr0 = dst.ptr(y); + uchar* dptr1 = dptr0 + dst.step; - dptr0[0] = dptr0[size.width - 1] = dptr1[0] = dptr1[size.width - 1] = val0; - x = 1; + dptr0[0] = dptr0[size.width - 1] = dptr1[0] = dptr1[size.width - 1] = val0; + x = 1; #if CV_SSE2 - if (useSIMD) - { - __m128i z = _mm_setzero_si128(), ftz = _mm_set1_epi16((short)ftzero), - ftz2 = _mm_set1_epi8(cv::saturate_cast(ftzero * 2)); - for (; x <= size.width - 9; x += 8) + if (useSIMD) { - __m128i c0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow0 + x - 1)), z); - __m128i c1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow1 + x - 1)), z); - __m128i d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow0 + x + 1)), z); - __m128i d1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow1 + x + 1)), z); + __m128i z = _mm_setzero_si128(), ftz = _mm_set1_epi16((short)ftzero), + ftz2 = _mm_set1_epi8(cv::saturate_cast(ftzero * 2)); + for (; x <= size.width - 9; x += 8) + { + __m128i c0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow0 + x - 1)), z); + __m128i c1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow1 + x - 1)), z); + __m128i d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow0 + x + 1)), z); + __m128i d1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow1 + x + 1)), z); - d0 = _mm_sub_epi16(d0, c0); - d1 = _mm_sub_epi16(d1, c1); + d0 = _mm_sub_epi16(d0, c0); + d1 = _mm_sub_epi16(d1, c1); - __m128i c2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x - 1)), z); - __m128i c3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x - 1)), z); - __m128i d2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x + 1)), z); - __m128i d3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x + 1)), z); + __m128i c2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x - 1)), z); + __m128i c3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x - 1)), z); + __m128i d2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x + 1)), z); + __m128i d3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x + 1)), z); - d2 = _mm_sub_epi16(d2, c2); - d3 = _mm_sub_epi16(d3, c3); + d2 = _mm_sub_epi16(d2, c2); + d3 = _mm_sub_epi16(d3, c3); - __m128i v0 = _mm_add_epi16(d0, _mm_add_epi16(d2, _mm_add_epi16(d1, d1))); - __m128i v1 = _mm_add_epi16(d1, _mm_add_epi16(d3, _mm_add_epi16(d2, d2))); - v0 = _mm_packus_epi16(_mm_add_epi16(v0, ftz), _mm_add_epi16(v1, ftz)); - v0 = _mm_min_epu8(v0, ftz2); + __m128i v0 = _mm_add_epi16(d0, _mm_add_epi16(d2, _mm_add_epi16(d1, d1))); + __m128i v1 = _mm_add_epi16(d1, _mm_add_epi16(d3, _mm_add_epi16(d2, d2))); + v0 = _mm_packus_epi16(_mm_add_epi16(v0, ftz), _mm_add_epi16(v1, ftz)); + v0 = _mm_min_epu8(v0, ftz2); - _mm_storel_epi64((__m128i*)(dptr0 + x), v0); - _mm_storel_epi64((__m128i*)(dptr1 + x), _mm_unpackhi_epi64(v0, v0)); + _mm_storel_epi64((__m128i*)(dptr0 + x), v0); + _mm_storel_epi64((__m128i*)(dptr1 + x), _mm_unpackhi_epi64(v0, v0)); + } } - } #endif - for (; x < size.width - 1; x++) - { - int d0 = srow0[x + 1] - srow0[x - 1], d1 = srow1[x + 1] - srow1[x - 1], - d2 = srow2[x + 1] - srow2[x - 1], d3 = srow3[x + 1] - srow3[x - 1]; - int v0 = tab[d0 + d1 * 2 + d2 + OFS]; - int v1 = tab[d1 + d2 * 2 + d3 + OFS]; - dptr0[x] = (uchar)v0; - dptr1[x] = (uchar)v1; + for (; x < size.width - 1; x++) + { + int d0 = srow0[x + 1] - srow0[x - 1], d1 = srow1[x + 1] - srow1[x - 1], + d2 = srow2[x + 1] - srow2[x - 1], d3 = srow3[x + 1] - srow3[x - 1]; + int v0 = tab[d0 + d1 * 2 + d2 + OFS]; + int v1 = tab[d1 + d2 * 2 + d3 + OFS]; + dptr0[x] = (uchar)v0; + dptr1[x] = (uchar)v1; + } } - } - for (; y < size.height; y++) - { - uchar* dptr = dst.ptr(y); - for (x = 0; x < size.width; x++) - dptr[x] = val0; - } - } - - static const int DISPARITY_SHIFT = 4; - - static void - findStereoCorrespondenceBM(const Mat& left, const Mat& right, - Mat& disp, Mat& cost, const StereoBinaryBMParams& state, - uchar* buf, int _dy0, int _dy1) - { - const int ALIGN = 16; - int x, y, d; - int wsz = state.SADWindowSize, wsz2 = wsz / 2; - int dy0 = MIN(_dy0, wsz2 + 1), dy1 = MIN(_dy1, wsz2 + 1); - int ndisp = state.numDisparities; - int mindisp = state.minDisparity; - int lofs = MAX(ndisp - 1 + mindisp, 0); - int rofs = -MIN(ndisp - 1 + mindisp, 0); - int width = left.cols, height = left.rows; - int width1 = width - rofs - ndisp + 1; - int ftzero = state.preFilterCap; - int textureThreshold = state.textureThreshold; - int uniquenessRatio = state.uniquenessRatio; - short FILTERED = (short)((mindisp - 1) << DISPARITY_SHIFT); - - int *sad, *hsad0, *hsad, *hsad_sub, *htext; - uchar *cbuf0, *cbuf; - const uchar* lptr0 = left.ptr() + lofs; - const uchar* rptr0 = right.ptr() + rofs; - const uchar *lptr, *lptr_sub, *rptr; - short* dptr = disp.ptr(); - int sstep = (int)left.step; - int dstep = (int)(disp.step / sizeof(dptr[0])); - int cstep = (height + dy0 + dy1)*ndisp; - int costbuf = 0; - int coststep = cost.data ? (int)(cost.step / sizeof(costbuf)) : 0; - const int TABSZ = 256; - uchar tab[TABSZ]; - - sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN); - hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN); - htext = (int*)alignPtr((int*)(hsad0 + (height + dy1)*ndisp) + wsz2 + 2, ALIGN); - cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN); - - for (x = 0; x < TABSZ; x++) - tab[x] = (uchar)std::abs(x - ftzero); - - // initialize buffers - memset(hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0])); - memset(htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0])); - - for (x = -wsz2 - 1; x < wsz2; x++) - { - hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp; - lptr = lptr0 + std::min(std::max(x, -lofs), width - lofs - 1) - dy0*sstep; - rptr = rptr0 + std::min(std::max(x, -rofs), width - rofs - 1) - dy0*sstep; - for (y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep) + for (; y < size.height; y++) { - int lval = lptr[0]; - for (d = 0; d < ndisp; d++) - { - int diff = std::abs(lval - rptr[d]); - cbuf[d] = (uchar)diff; - hsad[d] = (int)(hsad[d] + diff); - } - htext[y] += tab[lval]; + uchar* dptr = dst.ptr(y); + for (x = 0; x < size.width; x++) + dptr[x] = val0; } } - // initialize the left and right borders of the disparity map - for (y = 0; y < height; y++) - { - for (x = 0; x < lofs; x++) - dptr[y*dstep + x] = FILTERED; - for (x = lofs + width1; x < width; x++) - dptr[y*dstep + x] = FILTERED; - } - dptr += lofs; + static const int DISPARITY_SHIFT = 4; - for (x = 0; x < width1; x++, dptr++) + static void + findStereoCorrespondenceBM(const Mat& left, const Mat& right, + Mat& disp, Mat& cost, const StereoBinaryBMParams& state, + uchar* buf, int _dy0, int _dy1) { - int* costptr = cost.data ? cost.ptr() + lofs + x : &costbuf; - int x0 = x - wsz2 - 1, x1 = x + wsz2; - const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp; - cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp; - hsad = hsad0 - dy0*ndisp; - lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width - 1 - lofs) - dy0*sstep; - lptr = lptr0 + MIN(MAX(x1, -lofs), width - 1 - lofs) - dy0*sstep; - rptr = rptr0 + MIN(MAX(x1, -rofs), width - 1 - rofs) - dy0*sstep; - - for (y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp, - hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep) + const int ALIGN = 16; + int x, y, d; + int wsz = state.SADWindowSize, wsz2 = wsz / 2; + int dy0 = MIN(_dy0, wsz2 + 1), dy1 = MIN(_dy1, wsz2 + 1); + int ndisp = state.numDisparities; + int mindisp = state.minDisparity; + int lofs = MAX(ndisp - 1 + mindisp, 0); + int rofs = -MIN(ndisp - 1 + mindisp, 0); + int width = left.cols, height = left.rows; + int width1 = width - rofs - ndisp + 1; + int ftzero = state.preFilterCap; + int textureThreshold = state.textureThreshold; + int uniquenessRatio = state.uniquenessRatio; + short FILTERED = (short)((mindisp - 1) << DISPARITY_SHIFT); + + int *sad, *hsad0, *hsad, *hsad_sub, *htext; + uchar *cbuf0, *cbuf; + const uchar* lptr0 = left.ptr() + lofs; + const uchar* rptr0 = right.ptr() + rofs; + const uchar *lptr, *lptr_sub, *rptr; + short* dptr = disp.ptr(); + int sstep = (int)left.step; + int dstep = (int)(disp.step / sizeof(dptr[0])); + int cstep = (height + dy0 + dy1)*ndisp; + int costbuf = 0; + int coststep = cost.data ? (int)(cost.step / sizeof(costbuf)) : 0; + const int TABSZ = 256; + uchar tab[TABSZ]; + + sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN); + hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN); + htext = (int*)alignPtr((int*)(hsad0 + (height + dy1)*ndisp) + wsz2 + 2, ALIGN); + cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN); + + for (x = 0; x < TABSZ; x++) + tab[x] = (uchar)std::abs(x - ftzero); + + // initialize buffers + memset(hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0])); + memset(htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0])); + + for (x = -wsz2 - 1; x < wsz2; x++) { - int lval = lptr[0]; - for (d = 0; d < ndisp; d++) + hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp; + lptr = lptr0 + std::min(std::max(x, -lofs), width - lofs - 1) - dy0*sstep; + rptr = rptr0 + std::min(std::max(x, -rofs), width - rofs - 1) - dy0*sstep; + for (y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep) { - int diff = std::abs(lval - rptr[d]); - cbuf[d] = (uchar)diff; - hsad[d] = hsad[d] + diff - cbuf_sub[d]; + int lval = lptr[0]; + for (d = 0; d < ndisp; d++) + { + int diff = std::abs(lval - rptr[d]); + cbuf[d] = (uchar)diff; + hsad[d] = (int)(hsad[d] + diff); + } + htext[y] += tab[lval]; } - htext[y] += tab[lval] - tab[lptr_sub[0]]; } - // fill borders - for (y = dy1; y <= wsz2; y++) - htext[height + y] = htext[height + dy1 - 1]; - for (y = -wsz2 - 1; y < -dy0; y++) - htext[y] = htext[-dy0]; - - // initialize sums - int tsum = 0; + // initialize the left and right borders of the disparity map + for (y = 0; y < height; y++) { - for (d = 0; d < ndisp; d++) - sad[d] = (int)(hsad0[d - ndisp*dy0] * (wsz2 + 2 - dy0)); - - hsad = hsad0 + (1 - dy0)*ndisp; - for (y = 1 - dy0; y < wsz2; y++, hsad += ndisp) - for (d = 0; d < ndisp; d++) - sad[d] = (int)(sad[d] + hsad[d]); - - for (y = -wsz2 - 1; y < wsz2; y++) - tsum += htext[y]; + for (x = 0; x < lofs; x++) + dptr[y*dstep + x] = FILTERED; + for (x = lofs + width1; x < width; x++) + dptr[y*dstep + x] = FILTERED; } - // finally, start the real processing + dptr += lofs; + + for (x = 0; x < width1; x++, dptr++) { - for (y = 0; y < height; y++) + int* costptr = cost.data ? cost.ptr() + lofs + x : &costbuf; + int x0 = x - wsz2 - 1, x1 = x + wsz2; + const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp; + cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp; + hsad = hsad0 - dy0*ndisp; + lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width - 1 - lofs) - dy0*sstep; + lptr = lptr0 + MIN(MAX(x1, -lofs), width - 1 - lofs) - dy0*sstep; + rptr = rptr0 + MIN(MAX(x1, -rofs), width - 1 - rofs) - dy0*sstep; + + for (y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp, + hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep) { - int minsad = INT_MAX, mind = -1; - hsad = hsad0 + MIN(y + wsz2, height + dy1 - 1)*ndisp; - hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp; - + int lval = lptr[0]; for (d = 0; d < ndisp; d++) { - int currsad = sad[d] + hsad[d] - hsad_sub[d]; - sad[d] = currsad; - if (currsad < minsad) - { - minsad = currsad; - mind = d; - } + int diff = std::abs(lval - rptr[d]); + cbuf[d] = (uchar)diff; + hsad[d] = hsad[d] + diff - cbuf_sub[d]; } + htext[y] += tab[lval] - tab[lptr_sub[0]]; + } - tsum += htext[y + wsz2] - htext[y - wsz2 - 1]; - if (tsum < textureThreshold) - { - dptr[y*dstep] = FILTERED; - continue; - } + // fill borders + for (y = dy1; y <= wsz2; y++) + htext[height + y] = htext[height + dy1 - 1]; + for (y = -wsz2 - 1; y < -dy0; y++) + htext[y] = htext[-dy0]; - if (uniquenessRatio > 0) + // initialize sums + int tsum = 0; + { + for (d = 0; d < ndisp; d++) + sad[d] = (int)(hsad0[d - ndisp*dy0] * (wsz2 + 2 - dy0)); + + hsad = hsad0 + (1 - dy0)*ndisp; + for (y = 1 - dy0; y < wsz2; y++, hsad += ndisp) + for (d = 0; d < ndisp; d++) + sad[d] = (int)(sad[d] + hsad[d]); + + for (y = -wsz2 - 1; y < wsz2; y++) + tsum += htext[y]; + } + // finally, start the real processing + { + for (y = 0; y < height; y++) { - int thresh = minsad + (minsad * uniquenessRatio / 100); + int minsad = INT_MAX, mind = -1; + hsad = hsad0 + MIN(y + wsz2, height + dy1 - 1)*ndisp; + hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp; + for (d = 0; d < ndisp; d++) { - if ((d < mind - 1 || d > mind + 1) && sad[d] <= thresh) - break; + int currsad = sad[d] + hsad[d] - hsad_sub[d]; + sad[d] = currsad; + if (currsad < minsad) + { + minsad = currsad; + mind = d; + } } - if (d < ndisp) + + tsum += htext[y + wsz2] - htext[y - wsz2 - 1]; + if (tsum < textureThreshold) { dptr[y*dstep] = FILTERED; continue; } - } - { - sad[-1] = sad[1]; - sad[ndisp] = sad[ndisp - 2]; - int p = sad[mind + 1], n = sad[mind - 1]; - d = p + n - 2 * sad[mind] + std::abs(p - n); - dptr[y*dstep] = (short)(((ndisp - mind - 1 + mindisp) * 256 + (d != 0 ? (p - n) * 256 / d : 0) + 15) >> 4); - costptr[y*coststep] = sad[mind]; - } + if (uniquenessRatio > 0) + { + int thresh = minsad + (minsad * uniquenessRatio / 100); + for (d = 0; d < ndisp; d++) + { + if ((d < mind - 1 || d > mind + 1) && sad[d] <= thresh) + break; + } + if (d < ndisp) + { + dptr[y*dstep] = FILTERED; + continue; + } + } + + { + sad[-1] = sad[1]; + sad[ndisp] = sad[ndisp - 2]; + int p = sad[mind + 1], n = sad[mind - 1]; + d = p + n - 2 * sad[mind] + std::abs(p - n); + dptr[y*dstep] = (short)(((ndisp - mind - 1 + mindisp) * 256 + (d != 0 ? (p - n) * 256 / d : 0) + 15) >> 4); + costptr[y*coststep] = sad[mind]; + } + } } } } - } - struct PrefilterInvoker : public ParallelLoopBody - { - PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right, - uchar* buf0, uchar* buf1, StereoBinaryBMParams* _state) + struct PrefilterInvoker : public ParallelLoopBody { - imgs0[0] = &left0; imgs0[1] = &right0; - imgs[0] = &left; imgs[1] = &right; - buf[0] = buf0; buf[1] = buf1; - state = _state; - } - - void operator()(const Range& range) const - { - for (int i = range.start; i < range.end; i++) + PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right, + uchar* buf0, uchar* buf1, StereoBinaryBMParams* _state) { - if (state->preFilterType == StereoBinaryBM::PREFILTER_NORMALIZED_RESPONSE) - prefilterNorm(*imgs0[i], *imgs[i], state->preFilterSize, state->preFilterCap, buf[i]); - else - prefilterXSobel(*imgs0[i], *imgs[i], state->preFilterCap); + imgs0[0] = &left0; imgs0[1] = &right0; + imgs[0] = &left; imgs[1] = &right; + buf[0] = buf0; buf[1] = buf1; + state = _state; } - } - const Mat* imgs0[2]; - Mat* imgs[2]; - uchar* buf[2]; - StereoBinaryBMParams* state; - }; + void operator()(const Range& range) const + { + for (int i = range.start; i < range.end; i++) + { + if (state->preFilterType == StereoBinaryBM::PREFILTER_NORMALIZED_RESPONSE) + prefilterNorm(*imgs0[i], *imgs[i], state->preFilterSize, state->preFilterCap, buf[i]); + else + prefilterXSobel(*imgs0[i], *imgs[i], state->preFilterCap); + } + } - struct FindStereoCorrespInvoker : public ParallelLoopBody - { - FindStereoCorrespInvoker(const Mat& _left, const Mat& _right, - Mat& _disp, StereoBinaryBMParams* _state, - int _nstripes, size_t _stripeBufSize, - bool _useShorts, Rect _validDisparityRect, - Mat& _slidingSumBuf, Mat& _cost) - { - left = &_left; right = &_right; - disp = &_disp; state = _state; - nstripes = _nstripes; stripeBufSize = _stripeBufSize; - useShorts = _useShorts; - validDisparityRect = _validDisparityRect; - slidingSumBuf = &_slidingSumBuf; - cost = &_cost; - } + const Mat* imgs0[2]; + Mat* imgs[2]; + uchar* buf[2]; + StereoBinaryBMParams* state; + }; - void operator()(const Range& range) const + struct FindStereoCorrespInvoker : public ParallelLoopBody { - int cols = left->cols, rows = left->rows; - int _row0 = std::min(cvRound(range.start * rows / nstripes), rows); - int _row1 = std::min(cvRound(range.end * rows / nstripes), rows); - uchar *ptr = slidingSumBuf->ptr() + range.start * stripeBufSize; - int FILTERED = (state->minDisparity - 1) * 16; - - Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0); - if (roi.height == 0) - return; - int row0 = roi.y; - int row1 = roi.y + roi.height; - - Mat part; - if (row0 > _row0) + FindStereoCorrespInvoker(const Mat& _left, const Mat& _right, + Mat& _disp, StereoBinaryBMParams* _state, + int _nstripes, size_t _stripeBufSize, + bool _useShorts, Rect _validDisparityRect, + Mat& _slidingSumBuf, Mat& _cost) { - part = disp->rowRange(_row0, row0); - part = Scalar::all(FILTERED); + left = &_left; right = &_right; + disp = &_disp; state = _state; + nstripes = _nstripes; stripeBufSize = _stripeBufSize; + useShorts = _useShorts; + validDisparityRect = _validDisparityRect; + slidingSumBuf = &_slidingSumBuf; + cost = &_cost; } - if (_row1 > row1) + + void operator()(const Range& range) const { - part = disp->rowRange(row1, _row1); - part = Scalar::all(FILTERED); - } + int cols = left->cols, rows = left->rows; + int _row0 = std::min(cvRound(range.start * rows / nstripes), rows); + int _row1 = std::min(cvRound(range.end * rows / nstripes), rows); + uchar *ptr = slidingSumBuf->ptr() + range.start * stripeBufSize; + int FILTERED = (state->minDisparity - 1) * 16; + + Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0); + if (roi.height == 0) + return; + int row0 = roi.y; + int row1 = roi.y + roi.height; + + Mat part; + if (row0 > _row0) + { + part = disp->rowRange(_row0, row0); + part = Scalar::all(FILTERED); + } + if (_row1 > row1) + { + part = disp->rowRange(row1, _row1); + part = Scalar::all(FILTERED); + } - Mat left_i = left->rowRange(row0, row1); - Mat right_i = right->rowRange(row0, row1); - Mat disp_i = disp->rowRange(row0, row1); - Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat(); + Mat left_i = left->rowRange(row0, row1); + Mat right_i = right->rowRange(row0, row1); + Mat disp_i = disp->rowRange(row0, row1); + Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat(); - findStereoCorrespondenceBM(left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1); + findStereoCorrespondenceBM(left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1); - if (state->disp12MaxDiff >= 0) - validateDisparity(disp_i, cost_i, state->minDisparity, state->numDisparities, state->disp12MaxDiff); + if (state->disp12MaxDiff >= 0) + validateDisparity(disp_i, cost_i, state->minDisparity, state->numDisparities, state->disp12MaxDiff); - if (roi.x > 0) - { - part = disp_i.colRange(0, roi.x); - part = Scalar::all(FILTERED); - } - if (roi.x + roi.width < cols) - { - part = disp_i.colRange(roi.x + roi.width, cols); - part = Scalar::all(FILTERED); + if (roi.x > 0) + { + part = disp_i.colRange(0, roi.x); + part = Scalar::all(FILTERED); + } + if (roi.x + roi.width < cols) + { + part = disp_i.colRange(roi.x + roi.width, cols); + part = Scalar::all(FILTERED); + } } - } - protected: - const Mat *left, *right; - Mat* disp, *slidingSumBuf, *cost; - StereoBinaryBMParams *state; + protected: + const Mat *left, *right; + Mat* disp, *slidingSumBuf, *cost; + StereoBinaryBMParams *state; - int nstripes; - size_t stripeBufSize; - bool useShorts; - Rect validDisparityRect; - }; + int nstripes; + size_t stripeBufSize; + bool useShorts; + Rect validDisparityRect; + }; - class StereoBinaryBMImpl : public StereoBinaryBM - { - public: - StereoBinaryBMImpl() + class StereoBinaryBMImpl : public StereoBinaryBM { - params = StereoBinaryBMParams(); - } + public: + StereoBinaryBMImpl() + { + params = StereoBinaryBMParams(); + } - StereoBinaryBMImpl(int _numDisparities, int _SADWindowSize) - { - params = StereoBinaryBMParams(_numDisparities, _SADWindowSize); - } + StereoBinaryBMImpl(int _numDisparities, int _SADWindowSize) + { + params = StereoBinaryBMParams(_numDisparities, _SADWindowSize); + } - void compute(InputArray leftarr, InputArray rightarr, OutputArray disparr) - { - int dtype = disparr.fixedType() ? disparr.type() : params.dispType; - Size leftsize = leftarr.size(); + void compute(InputArray leftarr, InputArray rightarr, OutputArray disparr) + { + int dtype = disparr.fixedType() ? disparr.type() : params.dispType; + Size leftsize = leftarr.size(); - if (leftarr.size() != rightarr.size()) - CV_Error(Error::StsUnmatchedSizes, "All the images must have the same size"); + if (leftarr.size() != rightarr.size()) + CV_Error(Error::StsUnmatchedSizes, "All the images must have the same size"); - if (leftarr.type() != CV_8UC1 || rightarr.type() != CV_8UC1) - CV_Error(Error::StsUnsupportedFormat, "Both input images must have CV_8UC1"); + if (leftarr.type() != CV_8UC1 || rightarr.type() != CV_8UC1) + CV_Error(Error::StsUnsupportedFormat, "Both input images must have CV_8UC1"); - if (dtype != CV_16SC1 && dtype != CV_32FC1) - CV_Error(Error::StsUnsupportedFormat, "Disparity image must have CV_16SC1 or CV_32FC1 format"); + if (dtype != CV_16SC1 && dtype != CV_32FC1) + CV_Error(Error::StsUnsupportedFormat, "Disparity image must have CV_16SC1 or CV_32FC1 format"); - if (params.preFilterType != PREFILTER_NORMALIZED_RESPONSE && - params.preFilterType != PREFILTER_XSOBEL) - CV_Error(Error::StsOutOfRange, "preFilterType must be = CV_STEREO_BM_NORMALIZED_RESPONSE"); + if (params.preFilterType != PREFILTER_NORMALIZED_RESPONSE && + params.preFilterType != PREFILTER_XSOBEL) + CV_Error(Error::StsOutOfRange, "preFilterType must be = CV_STEREO_BM_NORMALIZED_RESPONSE"); - if (params.preFilterSize < 5 || params.preFilterSize > 255 || params.preFilterSize % 2 == 0) - CV_Error(Error::StsOutOfRange, "preFilterSize must be odd and be within 5..255"); + if (params.preFilterSize < 5 || params.preFilterSize > 255 || params.preFilterSize % 2 == 0) + CV_Error(Error::StsOutOfRange, "preFilterSize must be odd and be within 5..255"); - if (params.preFilterCap < 1 || params.preFilterCap > 63) - CV_Error(Error::StsOutOfRange, "preFilterCap must be within 1..63"); + if (params.preFilterCap < 1 || params.preFilterCap > 63) + CV_Error(Error::StsOutOfRange, "preFilterCap must be within 1..63"); - if (params.SADWindowSize < 5 || params.SADWindowSize > 255 || params.SADWindowSize % 2 == 0 || - params.SADWindowSize >= std::min(leftsize.width, leftsize.height)) - CV_Error(Error::StsOutOfRange, "SADWindowSize must be odd, be within 5..255 and be not larger than image width or height"); + if (params.SADWindowSize < 5 || params.SADWindowSize > 255 || params.SADWindowSize % 2 == 0 || + params.SADWindowSize >= std::min(leftsize.width, leftsize.height)) + CV_Error(Error::StsOutOfRange, "SADWindowSize must be odd, be within 5..255 and be not larger than image width or height"); - if (params.numDisparities <= 0 || params.numDisparities % 16 != 0) - CV_Error(Error::StsOutOfRange, "numDisparities must be positive and divisble by 16"); + if (params.numDisparities <= 0 || params.numDisparities % 16 != 0) + CV_Error(Error::StsOutOfRange, "numDisparities must be positive and divisble by 16"); - if (params.textureThreshold < 0) - CV_Error(Error::StsOutOfRange, "texture threshold must be non-negative"); + if (params.textureThreshold < 0) + CV_Error(Error::StsOutOfRange, "texture threshold must be non-negative"); - if (params.uniquenessRatio < 0) - CV_Error(Error::StsOutOfRange, "uniqueness ratio must be non-negative"); + if (params.uniquenessRatio < 0) + CV_Error(Error::StsOutOfRange, "uniqueness ratio must be non-negative"); - int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT; + int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT; - Mat left0 = leftarr.getMat(), right0 = rightarr.getMat(); - disparr.create(left0.size(), dtype); - Mat disp0 = disparr.getMat(); + Mat left0 = leftarr.getMat(), right0 = rightarr.getMat(); + disparr.create(left0.size(), dtype); + Mat disp0 = disparr.getMat(); - preFilteredImg0.create(left0.size(), CV_8U); - preFilteredImg1.create(left0.size(), CV_8U); - cost.create(left0.size(), CV_16S); + preFilteredImg0.create(left0.size(), CV_8U); + preFilteredImg1.create(left0.size(), CV_8U); + cost.create(left0.size(), CV_16S); - Mat left = preFilteredImg0, right = preFilteredImg1; + Mat left = preFilteredImg0, right = preFilteredImg1; - int mindisp = params.minDisparity; - int ndisp = params.numDisparities; + int mindisp = params.minDisparity; + int ndisp = params.numDisparities; - int width = left0.cols; - int height = left0.rows; - int lofs = std::max(ndisp - 1 + mindisp, 0); - int rofs = -std::min(ndisp - 1 + mindisp, 0); - int width1 = width - rofs - ndisp + 1; + int width = left0.cols; + int height = left0.rows; + int lofs = std::max(ndisp - 1 + mindisp, 0); + int rofs = -std::min(ndisp - 1 + mindisp, 0); + int width1 = width - rofs - ndisp + 1; - if (lofs >= width || rofs >= width || width1 < 1) - { - disp0 = Scalar::all(FILTERED * (disp0.type() < CV_32F ? 1 : 1. / (1 << DISPARITY_SHIFT))); - return; - } + if (lofs >= width || rofs >= width || width1 < 1) + { + disp0 = Scalar::all(FILTERED * (disp0.type() < CV_32F ? 1 : 1. / (1 << DISPARITY_SHIFT))); + return; + } - Mat disp = disp0; - if (dtype == CV_32F) - { - dispbuf.create(disp0.size(), CV_16S); - disp = dispbuf; - } + Mat disp = disp0; + if (dtype == CV_32F) + { + dispbuf.create(disp0.size(), CV_16S); + disp = dispbuf; + } - int wsz = params.SADWindowSize; - int bufSize0 = (int)((ndisp + 2)*sizeof(int)); - bufSize0 += (int)((height + wsz + 2)*ndisp*sizeof(int)); - bufSize0 += (int)((height + wsz + 2)*sizeof(int)); - bufSize0 += (int)((height + wsz + 2)*ndisp*(wsz + 2)*sizeof(uchar) + 256); + int wsz = params.SADWindowSize; + int bufSize0 = (int)((ndisp + 2)*sizeof(int)); + bufSize0 += (int)((height + wsz + 2)*ndisp*sizeof(int)); + bufSize0 += (int)((height + wsz + 2)*sizeof(int)); + bufSize0 += (int)((height + wsz + 2)*ndisp*(wsz + 2)*sizeof(uchar) + 256); - int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256); - int bufSize2 = 0; - if (params.speckleRange >= 0 && params.speckleWindowSize > 0) - bufSize2 = width*height*(sizeof(Point_) + sizeof(int) + sizeof(uchar)); + int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256); + int bufSize2 = 0; + if (params.speckleRange >= 0 && params.speckleWindowSize > 0) + bufSize2 = width*height*(sizeof(Point_) + sizeof(int) + sizeof(uchar)); #if CV_SSE2 - bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2); + bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2); #else - const bool useShorts = false; + const bool useShorts = false; #endif - const double SAD_overhead_coeff = 10.0; - double N0 = 8000000 / (useShorts ? 1 : 4); // approx tbb's min number instructions reasonable for one thread - double maxStripeSize = std::min(std::max(N0 / (width * ndisp), (wsz - 1) * SAD_overhead_coeff), (double)height); - int nstripes = cvCeil(height / maxStripeSize); - int bufSize = std::max(bufSize0 * nstripes, std::max(bufSize1 * 2, bufSize2)); + const double SAD_overhead_coeff = 10.0; + double N0 = 8000000 / (useShorts ? 1 : 4); // approx tbb's min number instructions reasonable for one thread + double maxStripeSize = std::min(std::max(N0 / (width * ndisp), (wsz - 1) * SAD_overhead_coeff), (double)height); + int nstripes = cvCeil(height / maxStripeSize); + int bufSize = std::max(bufSize0 * nstripes, std::max(bufSize1 * 2, bufSize2)); - if (slidingSumBuf.cols < bufSize) - slidingSumBuf.create(1, bufSize, CV_8U); + if (slidingSumBuf.cols < bufSize) + slidingSumBuf.create(1, bufSize, CV_8U); - uchar *_buf = slidingSumBuf.ptr(); + uchar *_buf = slidingSumBuf.ptr(); - parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, ¶ms), 1); + parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, ¶ms), 1); - Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2; - validDisparityRect = getValidDisparityROI(R1.area() > 0 ? Rect(0, 0, width, height) : validDisparityRect, - R2.area() > 0 ? Rect(0, 0, width, height) : validDisparityRect, - params.minDisparity, params.numDisparities, - params.SADWindowSize); + Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2; + validDisparityRect = getValidDisparityROI(R1.area() > 0 ? Rect(0, 0, width, height) : validDisparityRect, + R2.area() > 0 ? Rect(0, 0, width, height) : validDisparityRect, + params.minDisparity, params.numDisparities, + params.SADWindowSize); - parallel_for_(Range(0, nstripes), - FindStereoCorrespInvoker(left, right, disp, ¶ms, nstripes, - bufSize0, useShorts, validDisparityRect, - slidingSumBuf, cost)); + parallel_for_(Range(0, nstripes), + FindStereoCorrespInvoker(left, right, disp, ¶ms, nstripes, + bufSize0, useShorts, validDisparityRect, + slidingSumBuf, cost)); - if (params.speckleRange >= 0 && params.speckleWindowSize > 0) - filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf); + if (params.speckleRange >= 0 && params.speckleWindowSize > 0) + filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf); - if (disp0.data != disp.data) - disp.convertTo(disp0, disp0.type(), 1. / (1 << DISPARITY_SHIFT), 0); - } + if (disp0.data != disp.data) + disp.convertTo(disp0, disp0.type(), 1. / (1 << DISPARITY_SHIFT), 0); + } - int getMinDisparity() const { return params.minDisparity; } - void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; } + int getMinDisparity() const { return params.minDisparity; } + void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; } - int getNumDisparities() const { return params.numDisparities; } - void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; } + int getNumDisparities() const { return params.numDisparities; } + void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; } - int getBlockSize() const { return params.SADWindowSize; } - void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; } + int getBlockSize() const { return params.SADWindowSize; } + void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; } - int getSpeckleWindowSize() const { return params.speckleWindowSize; } - void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; } + int getSpeckleWindowSize() const { return params.speckleWindowSize; } + void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; } - int getSpeckleRange() const { return params.speckleRange; } - void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; } + int getSpeckleRange() const { return params.speckleRange; } + void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; } - int getDisp12MaxDiff() const { return params.disp12MaxDiff; } - void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; } + int getDisp12MaxDiff() const { return params.disp12MaxDiff; } + void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; } - int getPreFilterType() const { return params.preFilterType; } - void setPreFilterType(int preFilterType) { params.preFilterType = preFilterType; } + int getPreFilterType() const { return params.preFilterType; } + void setPreFilterType(int preFilterType) { params.preFilterType = preFilterType; } - int getPreFilterSize() const { return params.preFilterSize; } - void setPreFilterSize(int preFilterSize) { params.preFilterSize = preFilterSize; } + int getPreFilterSize() const { return params.preFilterSize; } + void setPreFilterSize(int preFilterSize) { params.preFilterSize = preFilterSize; } - int getPreFilterCap() const { return params.preFilterCap; } - void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; } + int getPreFilterCap() const { return params.preFilterCap; } + void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; } - int getTextureThreshold() const { return params.textureThreshold; } - void setTextureThreshold(int textureThreshold) { params.textureThreshold = textureThreshold; } + int getTextureThreshold() const { return params.textureThreshold; } + void setTextureThreshold(int textureThreshold) { params.textureThreshold = textureThreshold; } - int getUniquenessRatio() const { return params.uniquenessRatio; } - void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; } + int getUniquenessRatio() const { return params.uniquenessRatio; } + void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; } - int getSmallerBlockSize() const { return 0; } - void setSmallerBlockSize(int) {} + int getSmallerBlockSize() const { return 0; } + void setSmallerBlockSize(int) {} - Rect getROI1() const { return params.roi1; } - void setROI1(Rect roi1) { params.roi1 = roi1; } + Rect getROI1() const { return params.roi1; } + void setROI1(Rect roi1) { params.roi1 = roi1; } - Rect getROI2() const { return params.roi2; } - void setROI2(Rect roi2) { params.roi2 = roi2; } + Rect getROI2() const { return params.roi2; } + void setROI2(Rect roi2) { params.roi2 = roi2; } - void write(FileStorage& fs) const - { - fs << "name" << name_ - << "minDisparity" << params.minDisparity - << "numDisparities" << params.numDisparities - << "blockSize" << params.SADWindowSize - << "speckleWindowSize" << params.speckleWindowSize - << "speckleRange" << params.speckleRange - << "disp12MaxDiff" << params.disp12MaxDiff - << "preFilterType" << params.preFilterType - << "preFilterSize" << params.preFilterSize - << "preFilterCap" << params.preFilterCap - << "textureThreshold" << params.textureThreshold - << "uniquenessRatio" << params.uniquenessRatio; - } + void write(FileStorage& fs) const + { + fs << "name" << name_ + << "minDisparity" << params.minDisparity + << "numDisparities" << params.numDisparities + << "blockSize" << params.SADWindowSize + << "speckleWindowSize" << params.speckleWindowSize + << "speckleRange" << params.speckleRange + << "disp12MaxDiff" << params.disp12MaxDiff + << "preFilterType" << params.preFilterType + << "preFilterSize" << params.preFilterSize + << "preFilterCap" << params.preFilterCap + << "textureThreshold" << params.textureThreshold + << "uniquenessRatio" << params.uniquenessRatio; + } - void read(const FileNode& fn) - { - FileNode n = fn["name"]; - CV_Assert(n.isString() && String(n) == name_); - params.minDisparity = (int)fn["minDisparity"]; - params.numDisparities = (int)fn["numDisparities"]; - params.SADWindowSize = (int)fn["blockSize"]; - params.speckleWindowSize = (int)fn["speckleWindowSize"]; - params.speckleRange = (int)fn["speckleRange"]; - params.disp12MaxDiff = (int)fn["disp12MaxDiff"]; - params.preFilterType = (int)fn["preFilterType"]; - params.preFilterSize = (int)fn["preFilterSize"]; - params.preFilterCap = (int)fn["preFilterCap"]; - params.textureThreshold = (int)fn["textureThreshold"]; - params.uniquenessRatio = (int)fn["uniquenessRatio"]; - params.roi1 = params.roi2 = Rect(); - } + void read(const FileNode& fn) + { + FileNode n = fn["name"]; + CV_Assert(n.isString() && String(n) == name_); + params.minDisparity = (int)fn["minDisparity"]; + params.numDisparities = (int)fn["numDisparities"]; + params.SADWindowSize = (int)fn["blockSize"]; + params.speckleWindowSize = (int)fn["speckleWindowSize"]; + params.speckleRange = (int)fn["speckleRange"]; + params.disp12MaxDiff = (int)fn["disp12MaxDiff"]; + params.preFilterType = (int)fn["preFilterType"]; + params.preFilterSize = (int)fn["preFilterSize"]; + params.preFilterCap = (int)fn["preFilterCap"]; + params.textureThreshold = (int)fn["textureThreshold"]; + params.uniquenessRatio = (int)fn["uniquenessRatio"]; + params.roi1 = params.roi2 = Rect(); + } - StereoBinaryBMParams params; - Mat preFilteredImg0, preFilteredImg1, cost, dispbuf; - Mat slidingSumBuf; + StereoBinaryBMParams params; + Mat preFilteredImg0, preFilteredImg1, cost, dispbuf; + Mat slidingSumBuf; - static const char* name_; - }; + static const char* name_; + }; - const char* StereoBinaryBMImpl::name_ = "StereoMatcher.BM"; + const char* StereoBinaryBMImpl::name_ = "StereoMatcher.BM"; - Ptr StereoBinaryBM::create(int _numDisparities, int _SADWindowSize) - { - return makePtr(_numDisparities, _SADWindowSize); + Ptr StereoBinaryBM::create(int _numDisparities, int _SADWindowSize) + { + return makePtr(_numDisparities, _SADWindowSize); + } } - } /* End of file. */ diff --git a/modules/stereo/src/stereo_binary_sgbm.cpp b/modules/stereo/src/stereo_binary_sgbm.cpp index 45b9297ab..491c8d234 100644 --- a/modules/stereo/src/stereo_binary_sgbm.cpp +++ b/modules/stereo/src/stereo_binary_sgbm.cpp @@ -42,1162 +42,1163 @@ //M*/ /* - This is a variation of - "Stereo Processing by Semiglobal Matching and Mutual Information" - by Heiko Hirschmuller. +This is a variation of +"Stereo Processing by Semiglobal Matching and Mutual Information" +by Heiko Hirschmuller. - We match blocks rather than individual pixels, thus the algorithm is called - SGBM (Semi-global block matching) - */ +We match blocks rather than individual pixels, thus the algorithm is called +SGBM (Semi-global block matching) +*/ #include "precomp.hpp" #include namespace cv { - -typedef uchar PixType; -typedef short CostType; -typedef short DispType; - -enum { NR = 16, NR2 = NR/2 }; - - -struct StereoBinarySGBMParams -{ - StereoBinarySGBMParams() - { - minDisparity = numDisparities = 0; - SADWindowSize = 0; - P1 = P2 = 0; - disp12MaxDiff = 0; - preFilterCap = 0; - uniquenessRatio = 0; - speckleWindowSize = 0; - speckleRange = 0; - mode = StereoBinarySGBM::MODE_SGBM; - } - - StereoBinarySGBMParams( int _minDisparity, int _numDisparities, int _SADWindowSize, - int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap, - int _uniquenessRatio, int _speckleWindowSize, int _speckleRange, - int _mode ) - { - minDisparity = _minDisparity; - numDisparities = _numDisparities; - SADWindowSize = _SADWindowSize; - P1 = _P1; - P2 = _P2; - disp12MaxDiff = _disp12MaxDiff; - preFilterCap = _preFilterCap; - uniquenessRatio = _uniquenessRatio; - speckleWindowSize = _speckleWindowSize; - speckleRange = _speckleRange; - mode = _mode; - } - - int minDisparity; - int numDisparities; - int SADWindowSize; - int preFilterCap; - int uniquenessRatio; - int P1; - int P2; - int speckleWindowSize; - int speckleRange; - int disp12MaxDiff; - int mode; -}; - -/* - For each pixel row1[x], max(-maxD, 0) <= minX <= x < maxX <= width - max(0, -minD), - and for each disparity minD<=d(y), *row2 = img2.ptr(y); - PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2; - - tab += tabOfs; - - for( c = 0; c < cn*2; c++ ) - { - prow1[width*c] = prow1[width*c + width-1] = - prow2[width*c] = prow2[width*c + width-1] = tab[0]; - } - - int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0; - int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0; - - if( cn == 1 ) - { - for( x = 1; x < width-1; x++ ) - { - prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]]; - prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]]; - - prow1[x+width] = row1[x]; - prow2[width-1-x+width] = row2[x]; - } - } - else - { - for( x = 1; x < width-1; x++ ) - { - prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]]; - prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]]; - prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]]; - - prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]]; - prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]]; - prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]]; - - prow1[x+width*3] = row1[x*3]; - prow1[x+width*4] = row1[x*3+1]; - prow1[x+width*5] = row1[x*3+2]; - - prow2[width-1-x+width*3] = row2[x*3]; - prow2[width-1-x+width*4] = row2[x*3+1]; - prow2[width-1-x+width*5] = row2[x*3+2]; - } - } - - memset( cost, 0, width1*D*sizeof(cost[0]) ); - - buffer -= minX2; - cost -= minX1*D + minD; // simplify the cost indices inside the loop + namespace stereo + { + typedef uchar PixType; + typedef short CostType; + typedef short DispType; + + enum { NR = 16, NR2 = NR/2 }; + + + struct StereoBinarySGBMParams + { + StereoBinarySGBMParams() + { + minDisparity = numDisparities = 0; + SADWindowSize = 0; + P1 = P2 = 0; + disp12MaxDiff = 0; + preFilterCap = 0; + uniquenessRatio = 0; + speckleWindowSize = 0; + speckleRange = 0; + mode = StereoBinarySGBM::MODE_SGBM; + } + + StereoBinarySGBMParams( int _minDisparity, int _numDisparities, int _SADWindowSize, + int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap, + int _uniquenessRatio, int _speckleWindowSize, int _speckleRange, + int _mode ) + { + minDisparity = _minDisparity; + numDisparities = _numDisparities; + SADWindowSize = _SADWindowSize; + P1 = _P1; + P2 = _P2; + disp12MaxDiff = _disp12MaxDiff; + preFilterCap = _preFilterCap; + uniquenessRatio = _uniquenessRatio; + speckleWindowSize = _speckleWindowSize; + speckleRange = _speckleRange; + mode = _mode; + } + + int minDisparity; + int numDisparities; + int SADWindowSize; + int preFilterCap; + int uniquenessRatio; + int P1; + int P2; + int speckleWindowSize; + int speckleRange; + int disp12MaxDiff; + int mode; + }; + + /* + For each pixel row1[x], max(-maxD, 0) <= minX <= x < maxX <= width - max(0, -minD), + and for each disparity minD<=d(y), *row2 = img2.ptr(y); + PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2; + + tab += tabOfs; + + for( c = 0; c < cn*2; c++ ) + { + prow1[width*c] = prow1[width*c + width-1] = + prow2[width*c] = prow2[width*c + width-1] = tab[0]; + } + + int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0; + int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0; + + if( cn == 1 ) + { + for( x = 1; x < width-1; x++ ) + { + prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]]; + prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]]; + + prow1[x+width] = row1[x]; + prow2[width-1-x+width] = row2[x]; + } + } + else + { + for( x = 1; x < width-1; x++ ) + { + prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]]; + prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]]; + prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]]; + + prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]]; + prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]]; + prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]]; + + prow1[x+width*3] = row1[x*3]; + prow1[x+width*4] = row1[x*3+1]; + prow1[x+width*5] = row1[x*3+2]; + + prow2[width-1-x+width*3] = row2[x*3]; + prow2[width-1-x+width*4] = row2[x*3+1]; + prow2[width-1-x+width*5] = row2[x*3+2]; + } + } + + memset( cost, 0, width1*D*sizeof(cost[0]) ); + + buffer -= minX2; + cost -= minX1*D + minD; // simplify the cost indices inside the loop #if CV_SSE2 - volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2); + volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2); #endif #if 1 - for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width ) - { - int diff_scale = c < cn ? 0 : 2; - - // precompute - // v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and - // v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and - for( x = minX2; x < maxX2; x++ ) - { - int v = prow2[x]; - int vl = x > 0 ? (v + prow2[x-1])/2 : v; - int vr = x < width-1 ? (v + prow2[x+1])/2 : v; - int v0 = std::min(vl, vr); v0 = std::min(v0, v); - int v1 = std::max(vl, vr); v1 = std::max(v1, v); - buffer[x] = (PixType)v0; - buffer[x + width2] = (PixType)v1; - } - - for( x = minX1; x < maxX1; x++ ) - { - int u = prow1[x]; - int ul = x > 0 ? (u + prow1[x-1])/2 : u; - int ur = x < width-1 ? (u + prow1[x+1])/2 : u; - int u0 = std::min(ul, ur); u0 = std::min(u0, u); - int u1 = std::max(ul, ur); u1 = std::max(u1, u); - - #if CV_SSE2 - if( useSIMD ) - { - __m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0); - __m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128(); - __m128i ds = _mm_cvtsi32_si128(diff_scale); - - for( int d = minD; d < maxD; d += 16 ) - { - __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d)); - __m128i _v0 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d)); - __m128i _v1 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d + width2)); - __m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u)); - __m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v)); - __m128i diff = _mm_min_epu8(c0, c1); - - c0 = _mm_load_si128((__m128i*)(cost + x*D + d)); - c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8)); - - _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds))); - _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds))); - } - } - else - #endif - { - for( int d = minD; d < maxD; d++ ) - { - int v = prow2[width-x-1 + d]; - int v0 = buffer[width-x-1 + d]; - int v1 = buffer[width-x-1 + d + width2]; - int c0 = std::max(0, u - v1); c0 = std::max(c0, v0 - u); - int c1 = std::max(0, v - u1); c1 = std::max(c1, u0 - v); - - cost[x*D + d] = (CostType)(cost[x*D+d] + (std::min(c0, c1) >> diff_scale)); - } - } - } - } + for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width ) + { + int diff_scale = c < cn ? 0 : 2; + + // precompute + // v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and + // v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and + for( x = minX2; x < maxX2; x++ ) + { + int v = prow2[x]; + int vl = x > 0 ? (v + prow2[x-1])/2 : v; + int vr = x < width-1 ? (v + prow2[x+1])/2 : v; + int v0 = std::min(vl, vr); v0 = std::min(v0, v); + int v1 = std::max(vl, vr); v1 = std::max(v1, v); + buffer[x] = (PixType)v0; + buffer[x + width2] = (PixType)v1; + } + + for( x = minX1; x < maxX1; x++ ) + { + int u = prow1[x]; + int ul = x > 0 ? (u + prow1[x-1])/2 : u; + int ur = x < width-1 ? (u + prow1[x+1])/2 : u; + int u0 = std::min(ul, ur); u0 = std::min(u0, u); + int u1 = std::max(ul, ur); u1 = std::max(u1, u); + +#if CV_SSE2 + if( useSIMD ) + { + __m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0); + __m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128(); + __m128i ds = _mm_cvtsi32_si128(diff_scale); + + for( int d = minD; d < maxD; d += 16 ) + { + __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d)); + __m128i _v0 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d)); + __m128i _v1 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d + width2)); + __m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u)); + __m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v)); + __m128i diff = _mm_min_epu8(c0, c1); + + c0 = _mm_load_si128((__m128i*)(cost + x*D + d)); + c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8)); + + _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds))); + _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds))); + } + } + else +#endif + { + for( int d = minD; d < maxD; d++ ) + { + int v = prow2[width-x-1 + d]; + int v0 = buffer[width-x-1 + d]; + int v1 = buffer[width-x-1 + d + width2]; + int c0 = std::max(0, u - v1); c0 = std::max(c0, v0 - u); + int c1 = std::max(0, v - u1); c1 = std::max(c1, u0 - v); + + cost[x*D + d] = (CostType)(cost[x*D+d] + (std::min(c0, c1) >> diff_scale)); + } + } + } + } #else - for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width ) - { - for( x = minX1; x < maxX1; x++ ) - { - int u = prow1[x]; - #if CV_SSE2 - if( useSIMD ) - { - __m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128(); - - for( int d = minD; d < maxD; d += 16 ) - { - __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d)); - __m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u)); - __m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d)); - __m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8)); - - _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z))); - _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z))); - } - } - else - #endif - { - for( int d = minD; d < maxD; d++ ) - { - int v = prow2[width-1-x + d]; - cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v)); - } - } - } - } + for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width ) + { + for( x = minX1; x < maxX1; x++ ) + { + int u = prow1[x]; +#if CV_SSE2 + if( useSIMD ) + { + __m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128(); + + for( int d = minD; d < maxD; d += 16 ) + { + __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d)); + __m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u)); + __m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d)); + __m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8)); + + _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z))); + _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z))); + } + } + else +#endif + { + for( int d = minD; d < maxD; d++ ) + { + int v = prow2[width-1-x + d]; + cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v)); + } + } + } + } +#endif + } + + + /* + computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf. + that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y). + minD <= d < maxD. + disp2full is the reverse disparity map, that is: + disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y) + + note that disp1buf will have the same size as the roi and + disp2full will have the same size as img1 (or img2). + On exit disp2buf is not the final disparity, it is an intermediate result that becomes + final after all the tiles are processed. + + the disparity in disp1buf is written with sub-pixel accuracy + (4 fractional bits, see StereoSGBM::DISP_SCALE), + using quadratic interpolation, while the disparity in disp2buf + is written as is, without interpolation. + + disp2cost also has the same size as img1 (or img2). + It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost. + */ + static void computeDisparityBinarySGBM( const Mat& img1, const Mat& img2, + Mat& disp1, const StereoBinarySGBMParams& params, + Mat& buffer ) + { +#if CV_SSE2 + static const uchar LSBTab[] = + { + 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 + }; + + volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2); #endif -} + const int ALIGN = 16; + const int DISP_SHIFT = StereoMatcher::DISP_SHIFT; + const int DISP_SCALE = (1 << DISP_SHIFT); + const CostType MAX_COST = SHRT_MAX; + + int minD = params.minDisparity, maxD = minD + params.numDisparities; + Size SADWindowSize; + SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5; + int ftzero = std::max(params.preFilterCap, 15) | 1; + int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10; + int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1; + int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1); + int k, width = disp1.cols, height = disp1.rows; + int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0); + int D = maxD - minD, width1 = maxX1 - minX1; + int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE; + int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2; + bool fullDP = params.mode == StereoBinarySGBM::MODE_HH; + int npasses = fullDP ? 2 : 1; + const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2; + PixType clipTab[TAB_SIZE]; + + for( k = 0; k < TAB_SIZE; k++ ) + clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero); + + if( minX1 >= maxX1 ) + { + disp1 = Scalar::all(INVALID_DISP_SCALED); + return; + } + + CV_Assert( D % 16 == 0 ); + + // NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8. + // if you change NR, please, modify the loop as well. + int D2 = D+16, NRD2 = NR2*D2; + + // the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer: + // for 8-way dynamic programming we need the current row and + // the previous row, i.e. 2 rows in total + const int NLR = 2; + const int LrBorder = NLR - 1; + + // for each possible stereo match (img1(x,y) <=> img2(x-d,y)) + // we keep pixel difference cost (C) and the summary cost over NR directions (S). + // we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k) + size_t costBufSize = width1*D; + size_t CSBufSize = costBufSize*(fullDP ? height : 1); + size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2; + int hsumBufNRows = SH2*2 + 2; + size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[] + costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff + CSBufSize*2*sizeof(CostType) + // C, S + width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost + width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2 + + if( buffer.empty() || !buffer.isContinuous() || + buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize ) + buffer.create(1, (int)totalBufSize, CV_8U); + + // summary cost over different (nDirs) directions + CostType* Cbuf = (CostType*)alignPtr(buffer.ptr(), ALIGN); + CostType* Sbuf = Cbuf + CSBufSize; + CostType* hsumBuf = Sbuf + CSBufSize; + CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows; + + CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR; + DispType* disp2ptr = (DispType*)(disp2cost + width); + PixType* tempBuf = (PixType*)(disp2ptr + width); + + // add P2 to every C(x,y). it saves a few operations in the inner loops + for( k = 0; k < width1*D; k++ ) + Cbuf[k] = (CostType)P2; + + for( int pass = 1; pass <= npasses; pass++ ) + { + int x1, y1, x2, y2, dx, dy; + + if( pass == 1 ) + { + y1 = 0; y2 = height; dy = 1; + x1 = 0; x2 = width1; dx = 1; + } + else + { + y1 = height-1; y2 = -1; dy = -1; + x1 = width1-1; x2 = -1; dx = -1; + } + + CostType *Lr[NLR]={0}, *minLr[NLR]={0}; + + for( k = 0; k < NLR; k++ ) + { + // shift Lr[k] and minLr[k] pointers, because we allocated them with the borders, + // and will occasionally use negative indices with the arrays + // we need to shift Lr[k] pointers by 1, to give the space for d=-1. + // however, then the alignment will be imperfect, i.e. bad for SSE, + // thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment) + Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8; + memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) ); + minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*LrBorder; + memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) ); + } + + for( int y = y1; y != y2; y += dy ) + { + int x, d; + DispType* disp1ptr = disp1.ptr(y); + CostType* C = Cbuf + (!fullDP ? 0 : y*costBufSize); + CostType* S = Sbuf + (!fullDP ? 0 : y*costBufSize); + + if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any. + { + int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1; + + for( k = dy1; k <= dy2; k++ ) + { + CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize; + + if( k < height ) + { + calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero ); + + memset(hsumAdd, 0, D*sizeof(CostType)); + for( x = 0; x <= SW2*D; x += D ) + { + int scale = x == 0 ? SW2 + 1 : 1; + for( d = 0; d < D; d++ ) + hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale); + } + + if( y > 0 ) + { + const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize; + const CostType* Cprev = !fullDP || y == 0 ? C : C - costBufSize; + + for( x = D; x < width1*D; x += D ) + { + const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D); + const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0); -/* - computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf. - that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y). - minD <= d < maxD. - disp2full is the reverse disparity map, that is: - disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y) - - note that disp1buf will have the same size as the roi and - disp2full will have the same size as img1 (or img2). - On exit disp2buf is not the final disparity, it is an intermediate result that becomes - final after all the tiles are processed. - - the disparity in disp1buf is written with sub-pixel accuracy - (4 fractional bits, see StereoSGBM::DISP_SCALE), - using quadratic interpolation, while the disparity in disp2buf - is written as is, without interpolation. - - disp2cost also has the same size as img1 (or img2). - It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost. - */ -static void computeDisparityBinarySGBM( const Mat& img1, const Mat& img2, - Mat& disp1, const StereoBinarySGBMParams& params, - Mat& buffer ) -{ #if CV_SSE2 - static const uchar LSBTab[] = - { - 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 - }; - - volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2); + if( useSIMD ) + { + for( d = 0; d < D; d += 8 ) + { + __m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d)); + __m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d)); + hv = _mm_adds_epi16(_mm_subs_epi16(hv, + _mm_load_si128((const __m128i*)(pixSub + d))), + _mm_load_si128((const __m128i*)(pixAdd + d))); + Cx = _mm_adds_epi16(_mm_subs_epi16(Cx, + _mm_load_si128((const __m128i*)(hsumSub + x + d))), + hv); + _mm_store_si128((__m128i*)(hsumAdd + x + d), hv); + _mm_store_si128((__m128i*)(C + x + d), Cx); + } + } + else #endif + { + for( d = 0; d < D; d++ ) + { + int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]); + C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]); + } + } + } + } + else + { + for( x = D; x < width1*D; x += D ) + { + const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D); + const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0); + + for( d = 0; d < D; d++ ) + hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]); + } + } + } + + if( y == 0 ) + { + int scale = k == 0 ? SH2 + 1 : 1; + for( x = 0; x < width1*D; x++ ) + C[x] = (CostType)(C[x] + hsumAdd[x]*scale); + } + } + + // also, clear the S buffer + for( k = 0; k < width1*D; k++ ) + S[k] = 0; + } + + // clear the left and the right borders + memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) ); + memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) ); + memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) ); + memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) ); + + /* + [formula 13 in the paper] + compute L_r(p, d) = C(p, d) + + min(L_r(p-r, d), + L_r(p-r, d-1) + P1, + L_r(p-r, d+1) + P1, + min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k) + where p = (x,y), r is one of the directions. + we process all the directions at once: + 0: r=(-dx, 0) + 1: r=(-1, -dy) + 2: r=(0, -dy) + 3: r=(1, -dy) + 4: r=(-2, -dy) + 5: r=(-1, -dy*2) + 6: r=(1, -dy*2) + 7: r=(2, -dy) + */ + for( x = x1; x != x2; x += dx ) + { + int xm = x*NR2, xd = xm*D2; + + int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2; + int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2; + + CostType* Lr_p0 = Lr[0] + xd - dx*NRD2; + CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2; + CostType* Lr_p2 = Lr[1] + xd + D2*2; + CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3; + + Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] = + Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST; + + CostType* Lr_p = Lr[0] + xd; + const CostType* Cp = C + x*D; + CostType* Sp = S + x*D; - const int ALIGN = 16; - const int DISP_SHIFT = StereoMatcher::DISP_SHIFT; - const int DISP_SCALE = (1 << DISP_SHIFT); - const CostType MAX_COST = SHRT_MAX; - - int minD = params.minDisparity, maxD = minD + params.numDisparities; - Size SADWindowSize; - SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5; - int ftzero = std::max(params.preFilterCap, 15) | 1; - int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10; - int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1; - int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1); - int k, width = disp1.cols, height = disp1.rows; - int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0); - int D = maxD - minD, width1 = maxX1 - minX1; - int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE; - int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2; - bool fullDP = params.mode == StereoBinarySGBM::MODE_HH; - int npasses = fullDP ? 2 : 1; - const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2; - PixType clipTab[TAB_SIZE]; - - for( k = 0; k < TAB_SIZE; k++ ) - clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero); - - if( minX1 >= maxX1 ) - { - disp1 = Scalar::all(INVALID_DISP_SCALED); - return; - } - - CV_Assert( D % 16 == 0 ); - - // NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8. - // if you change NR, please, modify the loop as well. - int D2 = D+16, NRD2 = NR2*D2; - - // the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer: - // for 8-way dynamic programming we need the current row and - // the previous row, i.e. 2 rows in total - const int NLR = 2; - const int LrBorder = NLR - 1; - - // for each possible stereo match (img1(x,y) <=> img2(x-d,y)) - // we keep pixel difference cost (C) and the summary cost over NR directions (S). - // we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k) - size_t costBufSize = width1*D; - size_t CSBufSize = costBufSize*(fullDP ? height : 1); - size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2; - int hsumBufNRows = SH2*2 + 2; - size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[] - costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff - CSBufSize*2*sizeof(CostType) + // C, S - width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost - width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2 - - if( buffer.empty() || !buffer.isContinuous() || - buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize ) - buffer.create(1, (int)totalBufSize, CV_8U); - - // summary cost over different (nDirs) directions - CostType* Cbuf = (CostType*)alignPtr(buffer.ptr(), ALIGN); - CostType* Sbuf = Cbuf + CSBufSize; - CostType* hsumBuf = Sbuf + CSBufSize; - CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows; - - CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR; - DispType* disp2ptr = (DispType*)(disp2cost + width); - PixType* tempBuf = (PixType*)(disp2ptr + width); - - // add P2 to every C(x,y). it saves a few operations in the inner loops - for( k = 0; k < width1*D; k++ ) - Cbuf[k] = (CostType)P2; - - for( int pass = 1; pass <= npasses; pass++ ) - { - int x1, y1, x2, y2, dx, dy; - - if( pass == 1 ) - { - y1 = 0; y2 = height; dy = 1; - x1 = 0; x2 = width1; dx = 1; - } - else - { - y1 = height-1; y2 = -1; dy = -1; - x1 = width1-1; x2 = -1; dx = -1; - } - - CostType *Lr[NLR]={0}, *minLr[NLR]={0}; - - for( k = 0; k < NLR; k++ ) - { - // shift Lr[k] and minLr[k] pointers, because we allocated them with the borders, - // and will occasionally use negative indices with the arrays - // we need to shift Lr[k] pointers by 1, to give the space for d=-1. - // however, then the alignment will be imperfect, i.e. bad for SSE, - // thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment) - Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8; - memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) ); - minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*LrBorder; - memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) ); - } - - for( int y = y1; y != y2; y += dy ) - { - int x, d; - DispType* disp1ptr = disp1.ptr(y); - CostType* C = Cbuf + (!fullDP ? 0 : y*costBufSize); - CostType* S = Sbuf + (!fullDP ? 0 : y*costBufSize); - - if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any. - { - int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1; - - for( k = dy1; k <= dy2; k++ ) - { - CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize; - - if( k < height ) - { - calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero ); - - memset(hsumAdd, 0, D*sizeof(CostType)); - for( x = 0; x <= SW2*D; x += D ) - { - int scale = x == 0 ? SW2 + 1 : 1; - for( d = 0; d < D; d++ ) - hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale); - } - - if( y > 0 ) - { - const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize; - const CostType* Cprev = !fullDP || y == 0 ? C : C - costBufSize; - - for( x = D; x < width1*D; x += D ) - { - const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D); - const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0); - - #if CV_SSE2 - if( useSIMD ) - { - for( d = 0; d < D; d += 8 ) - { - __m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d)); - __m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d)); - hv = _mm_adds_epi16(_mm_subs_epi16(hv, - _mm_load_si128((const __m128i*)(pixSub + d))), - _mm_load_si128((const __m128i*)(pixAdd + d))); - Cx = _mm_adds_epi16(_mm_subs_epi16(Cx, - _mm_load_si128((const __m128i*)(hsumSub + x + d))), - hv); - _mm_store_si128((__m128i*)(hsumAdd + x + d), hv); - _mm_store_si128((__m128i*)(C + x + d), Cx); - } - } - else - #endif - { - for( d = 0; d < D; d++ ) - { - int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]); - C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]); - } - } - } - } - else - { - for( x = D; x < width1*D; x += D ) - { - const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D); - const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0); - - for( d = 0; d < D; d++ ) - hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]); - } - } - } - - if( y == 0 ) - { - int scale = k == 0 ? SH2 + 1 : 1; - for( x = 0; x < width1*D; x++ ) - C[x] = (CostType)(C[x] + hsumAdd[x]*scale); - } - } - - // also, clear the S buffer - for( k = 0; k < width1*D; k++ ) - S[k] = 0; - } - - // clear the left and the right borders - memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) ); - memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) ); - memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) ); - memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) ); - - /* - [formula 13 in the paper] - compute L_r(p, d) = C(p, d) + - min(L_r(p-r, d), - L_r(p-r, d-1) + P1, - L_r(p-r, d+1) + P1, - min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k) - where p = (x,y), r is one of the directions. - we process all the directions at once: - 0: r=(-dx, 0) - 1: r=(-1, -dy) - 2: r=(0, -dy) - 3: r=(1, -dy) - 4: r=(-2, -dy) - 5: r=(-1, -dy*2) - 6: r=(1, -dy*2) - 7: r=(2, -dy) - */ - for( x = x1; x != x2; x += dx ) - { - int xm = x*NR2, xd = xm*D2; - - int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2; - int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2; - - CostType* Lr_p0 = Lr[0] + xd - dx*NRD2; - CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2; - CostType* Lr_p2 = Lr[1] + xd + D2*2; - CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3; - - Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] = - Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST; - - CostType* Lr_p = Lr[0] + xd; - const CostType* Cp = C + x*D; - CostType* Sp = S + x*D; - - #if CV_SSE2 - if( useSIMD ) - { - __m128i _P1 = _mm_set1_epi16((short)P1); - - __m128i _delta0 = _mm_set1_epi16((short)delta0); - __m128i _delta1 = _mm_set1_epi16((short)delta1); - __m128i _delta2 = _mm_set1_epi16((short)delta2); - __m128i _delta3 = _mm_set1_epi16((short)delta3); - __m128i _minL0 = _mm_set1_epi16((short)MAX_COST); - - for( d = 0; d < D; d += 8 ) - { - __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)); - __m128i L0, L1, L2, L3; - - L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d)); - L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d)); - L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d)); - L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d)); - - L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1)); - L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1)); - - L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1)); - L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1)); - - L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1)); - L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1)); - - L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1)); - L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1)); - - L0 = _mm_min_epi16(L0, _delta0); - L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd); - - L1 = _mm_min_epi16(L1, _delta1); - L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd); - - L2 = _mm_min_epi16(L2, _delta2); - L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd); - - L3 = _mm_min_epi16(L3, _delta3); - L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd); - - _mm_store_si128( (__m128i*)(Lr_p + d), L0); - _mm_store_si128( (__m128i*)(Lr_p + d + D2), L1); - _mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2); - _mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3); - - __m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2)); - __m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3)); - t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1)); - _minL0 = _mm_min_epi16(_minL0, t0); - - __m128i Sval = _mm_load_si128((const __m128i*)(Sp + d)); - - L0 = _mm_adds_epi16(L0, L1); - L2 = _mm_adds_epi16(L2, L3); - Sval = _mm_adds_epi16(Sval, L0); - Sval = _mm_adds_epi16(Sval, L2); - - _mm_store_si128((__m128i*)(Sp + d), Sval); - } - - _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8)); - _mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0); - } - else - #endif - { - int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST; - - for( d = 0; d < D; d++ ) - { - int Cpd = Cp[d], L0, L1, L2, L3; - - L0 = Cpd + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0; - L1 = Cpd + std::min((int)Lr_p1[d], std::min(Lr_p1[d-1] + P1, std::min(Lr_p1[d+1] + P1, delta1))) - delta1; - L2 = Cpd + std::min((int)Lr_p2[d], std::min(Lr_p2[d-1] + P1, std::min(Lr_p2[d+1] + P1, delta2))) - delta2; - L3 = Cpd + std::min((int)Lr_p3[d], std::min(Lr_p3[d-1] + P1, std::min(Lr_p3[d+1] + P1, delta3))) - delta3; - - Lr_p[d] = (CostType)L0; - minL0 = std::min(minL0, L0); - - Lr_p[d + D2] = (CostType)L1; - minL1 = std::min(minL1, L1); - - Lr_p[d + D2*2] = (CostType)L2; - minL2 = std::min(minL2, L2); - - Lr_p[d + D2*3] = (CostType)L3; - minL3 = std::min(minL3, L3); - - Sp[d] = saturate_cast(Sp[d] + L0 + L1 + L2 + L3); - } - minLr[0][xm] = (CostType)minL0; - minLr[0][xm+1] = (CostType)minL1; - minLr[0][xm+2] = (CostType)minL2; - minLr[0][xm+3] = (CostType)minL3; - } - } - - if( pass == npasses ) - { - for( x = 0; x < width; x++ ) - { - disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED; - disp2cost[x] = MAX_COST; - } - - for( x = width1 - 1; x >= 0; x-- ) - { - CostType* Sp = S + x*D; - int minS = MAX_COST, bestDisp = -1; - - if( npasses == 1 ) - { - int xm = x*NR2, xd = xm*D2; - - int minL0 = MAX_COST; - int delta0 = minLr[0][xm + NR2] + P2; - CostType* Lr_p0 = Lr[0] + xd + NRD2; - Lr_p0[-1] = Lr_p0[D] = MAX_COST; - CostType* Lr_p = Lr[0] + xd; - - const CostType* Cp = C + x*D; - - #if CV_SSE2 - if( useSIMD ) - { - __m128i _P1 = _mm_set1_epi16((short)P1); - __m128i _delta0 = _mm_set1_epi16((short)delta0); - - __m128i _minL0 = _mm_set1_epi16((short)minL0); - __m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1); - __m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8); - - for( d = 0; d < D; d += 8 ) - { - __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0; - - L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d)); - L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1)); - L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1)); - L0 = _mm_min_epi16(L0, _delta0); - L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd); - - _mm_store_si128((__m128i*)(Lr_p + d), L0); - _minL0 = _mm_min_epi16(_minL0, L0); - L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d)); - _mm_store_si128((__m128i*)(Sp + d), L0); - - __m128i mask = _mm_cmpgt_epi16(_minS, L0); - _minS = _mm_min_epi16(_minS, L0); - _bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask)); - _d8 = _mm_adds_epi16(_d8, _8); - } - - short CV_DECL_ALIGNED(16) bestDispBuf[8]; - _mm_store_si128((__m128i*)bestDispBuf, _bestDisp); - - _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8)); - _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4)); - _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2)); - - __m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8)); - qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4)); - qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2)); - - minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0); - minS = (CostType)_mm_cvtsi128_si32(qS); - - qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0); - qS = _mm_cmpeq_epi16(_minS, qS); - int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255; - - bestDisp = bestDispBuf[LSBTab[idx]]; - } - else - #endif - { - for( d = 0; d < D; d++ ) - { - int L0 = Cp[d] + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0; - - Lr_p[d] = (CostType)L0; - minL0 = std::min(minL0, L0); - - int Sval = Sp[d] = saturate_cast(Sp[d] + L0); - if( Sval < minS ) - { - minS = Sval; - bestDisp = d; - } - } - minLr[0][xm] = (CostType)minL0; - } - } - else - { - for( d = 0; d < D; d++ ) - { - int Sval = Sp[d]; - if( Sval < minS ) - { - minS = Sval; - bestDisp = d; - } - } - } - - for( d = 0; d < D; d++ ) - { - if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 ) - break; - } - if( d < D ) - continue; - d = bestDisp; - int _x2 = x + minX1 - d - minD; - if( disp2cost[_x2] > minS ) - { - disp2cost[_x2] = (CostType)minS; - disp2ptr[_x2] = (DispType)(d + minD); - } - - if( 0 < d && d < D-1 ) - { - // do subpixel quadratic interpolation: - // fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1]) - // then find minimum of the parabola. - int denom2 = std::max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1); - d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2); - } - else - d *= DISP_SCALE; - disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE); - } - - for( x = minX1; x < maxX1; x++ ) - { - // we round the computed disparity both towards -inf and +inf and check - // if either of the corresponding disparities in disp2 is consistent. - // This is to give the computed disparity a chance to look valid if it is. - int d1 = disp1ptr[x]; - if( d1 == INVALID_DISP_SCALED ) - continue; - int _d = d1 >> DISP_SHIFT; - int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT; - int _x = x - _d, x_ = x - d_; - if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff && - 0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff ) - disp1ptr[x] = (DispType)INVALID_DISP_SCALED; - } - } - - // now shift the cyclic buffers - std::swap( Lr[0], Lr[1] ); - std::swap( minLr[0], minLr[1] ); - } - } -} +#if CV_SSE2 + if( useSIMD ) + { + __m128i _P1 = _mm_set1_epi16((short)P1); -class StereoBinarySGBMImpl : public StereoBinarySGBM -{ -public: - StereoBinarySGBMImpl() - { - params = StereoBinarySGBMParams(); - } - - StereoBinarySGBMImpl( int _minDisparity, int _numDisparities, int _SADWindowSize, - int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap, - int _uniquenessRatio, int _speckleWindowSize, int _speckleRange, - int _mode ) - { - params = StereoBinarySGBMParams( _minDisparity, _numDisparities, _SADWindowSize, - _P1, _P2, _disp12MaxDiff, _preFilterCap, - _uniquenessRatio, _speckleWindowSize, _speckleRange, - _mode ); - } - - void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr ) - { - Mat left = leftarr.getMat(), right = rightarr.getMat(); - CV_Assert( left.size() == right.size() && left.type() == right.type() && - left.depth() == CV_8U ); - - disparr.create( left.size(), CV_16S ); - Mat disp = disparr.getMat(); - - computeDisparityBinarySGBM( left, right, disp, params, buffer ); - medianBlur(disp, disp, 3); - - if( params.speckleWindowSize > 0 ) - filterSpeckles(disp, (params.minDisparity - 1)*StereoMatcher::DISP_SCALE, params.speckleWindowSize, - StereoMatcher::DISP_SCALE*params.speckleRange, buffer); - } - - int getMinDisparity() const { return params.minDisparity; } - void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; } - - int getNumDisparities() const { return params.numDisparities; } - void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; } - - int getBlockSize() const { return params.SADWindowSize; } - void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; } - - int getSpeckleWindowSize() const { return params.speckleWindowSize; } - void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; } - - int getSpeckleRange() const { return params.speckleRange; } - void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; } - - int getDisp12MaxDiff() const { return params.disp12MaxDiff; } - void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; } - - int getPreFilterCap() const { return params.preFilterCap; } - void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; } - - int getUniquenessRatio() const { return params.uniquenessRatio; } - void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; } - - int getP1() const { return params.P1; } - void setP1(int P1) { params.P1 = P1; } - - int getP2() const { return params.P2; } - void setP2(int P2) { params.P2 = P2; } - - int getMode() const { return params.mode; } - void setMode(int mode) { params.mode = mode; } - - void write(FileStorage& fs) const - { - fs << "name" << name_ - << "minDisparity" << params.minDisparity - << "numDisparities" << params.numDisparities - << "blockSize" << params.SADWindowSize - << "speckleWindowSize" << params.speckleWindowSize - << "speckleRange" << params.speckleRange - << "disp12MaxDiff" << params.disp12MaxDiff - << "preFilterCap" << params.preFilterCap - << "uniquenessRatio" << params.uniquenessRatio - << "P1" << params.P1 - << "P2" << params.P2 - << "mode" << params.mode; - } - - void read(const FileNode& fn) - { - FileNode n = fn["name"]; - CV_Assert( n.isString() && String(n) == name_ ); - params.minDisparity = (int)fn["minDisparity"]; - params.numDisparities = (int)fn["numDisparities"]; - params.SADWindowSize = (int)fn["blockSize"]; - params.speckleWindowSize = (int)fn["speckleWindowSize"]; - params.speckleRange = (int)fn["speckleRange"]; - params.disp12MaxDiff = (int)fn["disp12MaxDiff"]; - params.preFilterCap = (int)fn["preFilterCap"]; - params.uniquenessRatio = (int)fn["uniquenessRatio"]; - params.P1 = (int)fn["P1"]; - params.P2 = (int)fn["P2"]; - params.mode = (int)fn["mode"]; - } - - StereoBinarySGBMParams params; - Mat buffer; - static const char* name_; -}; - -const char* StereoBinarySGBMImpl::name_ = "StereoMatcher.SGBM"; - - -Ptr StereoBinarySGBM::create(int minDisparity, int numDisparities, int SADWindowSize, - int P1, int P2, int disp12MaxDiff, - int preFilterCap, int uniquenessRatio, - int speckleWindowSize, int speckleRange, - int mode) -{ - return Ptr( - new StereoBinarySGBMImpl(minDisparity, numDisparities, SADWindowSize, - P1, P2, disp12MaxDiff, - preFilterCap, uniquenessRatio, - speckleWindowSize, speckleRange, - mode)); -} + __m128i _delta0 = _mm_set1_epi16((short)delta0); + __m128i _delta1 = _mm_set1_epi16((short)delta1); + __m128i _delta2 = _mm_set1_epi16((short)delta2); + __m128i _delta3 = _mm_set1_epi16((short)delta3); + __m128i _minL0 = _mm_set1_epi16((short)MAX_COST); -Rect getValidDisparityROI( Rect roi1, Rect roi2, - int minDisparity, - int numberOfDisparities, - int SADWindowSize ) -{ - int SW2 = SADWindowSize/2; - int minD = minDisparity, maxD = minDisparity + numberOfDisparities - 1; + for( d = 0; d < D; d += 8 ) + { + __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)); + __m128i L0, L1, L2, L3; - int xmin = std::max(roi1.x, roi2.x + maxD) + SW2; - int xmax = std::min(roi1.x + roi1.width, roi2.x + roi2.width - minD) - SW2; - int ymin = std::max(roi1.y, roi2.y) + SW2; - int ymax = std::min(roi1.y + roi1.height, roi2.y + roi2.height) - SW2; + L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d)); + L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d)); + L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d)); + L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d)); - Rect r(xmin, ymin, xmax - xmin, ymax - ymin); + L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1)); + L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1)); - return r.width > 0 && r.height > 0 ? r : Rect(); -} + L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1)); + L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1)); -typedef cv::Point_ Point2s; + L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1)); + L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1)); -template -void filterSpecklesImpl(cv::Mat& img, int newVal, int maxSpeckleSize, int maxDiff, cv::Mat& _buf) -{ - using namespace cv; - - int width = img.cols, height = img.rows, npixels = width*height; - size_t bufSize = npixels*(int)(sizeof(Point2s) + sizeof(int) + sizeof(uchar)); - if( !_buf.isContinuous() || _buf.empty() || _buf.cols*_buf.rows*_buf.elemSize() < bufSize ) - _buf.create(1, (int)bufSize, CV_8U); - - uchar* buf = _buf.ptr(); - int i, j, dstep = (int)(img.step/sizeof(T)); - int* labels = (int*)buf; - buf += npixels*sizeof(labels[0]); - Point2s* wbuf = (Point2s*)buf; - buf += npixels*sizeof(wbuf[0]); - uchar* rtype = (uchar*)buf; - int curlabel = 0; - - // clear out label assignments - memset(labels, 0, npixels*sizeof(labels[0])); - - for( i = 0; i < height; i++ ) - { - T* ds = img.ptr(i); - int* ls = labels + width*i; - - for( j = 0; j < width; j++ ) - { - if( ds[j] != newVal ) // not a bad disparity - { - if( ls[j] ) // has a label, check for bad label - { - if( rtype[ls[j]] ) // small region, zero out disparity - ds[j] = (T)newVal; - } - // no label, assign and propagate - else - { - Point2s* ws = wbuf; // initialize wavefront - Point2s p((short)j, (short)i); // current pixel - curlabel++; // next label - int count = 0; // current region size - ls[j] = curlabel; - - // wavefront propagation - while( ws >= wbuf ) // wavefront not empty - { - count++; - // put neighbors onto wavefront - T* dpp = &img.at(p.y, p.x); - T dp = *dpp; - int* lpp = labels + width*p.y + p.x; - - if( p.y < height-1 && !lpp[+width] && dpp[+dstep] != newVal && std::abs(dp - dpp[+dstep]) <= maxDiff ) - { - lpp[+width] = curlabel; - *ws++ = Point2s(p.x, p.y+1); - } - - if( p.y > 0 && !lpp[-width] && dpp[-dstep] != newVal && std::abs(dp - dpp[-dstep]) <= maxDiff ) - { - lpp[-width] = curlabel; - *ws++ = Point2s(p.x, p.y-1); - } - - if( p.x < width-1 && !lpp[+1] && dpp[+1] != newVal && std::abs(dp - dpp[+1]) <= maxDiff ) - { - lpp[+1] = curlabel; - *ws++ = Point2s(p.x+1, p.y); - } - - if( p.x > 0 && !lpp[-1] && dpp[-1] != newVal && std::abs(dp - dpp[-1]) <= maxDiff ) - { - lpp[-1] = curlabel; - *ws++ = Point2s(p.x-1, p.y); - } - - // pop most recent and propagate - // NB: could try least recent, maybe better convergence - p = *--ws; - } - - // assign label type - if( count <= maxSpeckleSize ) // speckle region - { - rtype[ls[j]] = 1; // small region label - ds[j] = (T)newVal; - } - else - rtype[ls[j]] = 0; // large region label - } - } - } - } -} + L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1)); + L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1)); + + L0 = _mm_min_epi16(L0, _delta0); + L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd); + + L1 = _mm_min_epi16(L1, _delta1); + L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd); + + L2 = _mm_min_epi16(L2, _delta2); + L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd); + L3 = _mm_min_epi16(L3, _delta3); + L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd); + + _mm_store_si128( (__m128i*)(Lr_p + d), L0); + _mm_store_si128( (__m128i*)(Lr_p + d + D2), L1); + _mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2); + _mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3); + + __m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2)); + __m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3)); + t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1)); + _minL0 = _mm_min_epi16(_minL0, t0); + + __m128i Sval = _mm_load_si128((const __m128i*)(Sp + d)); + + L0 = _mm_adds_epi16(L0, L1); + L2 = _mm_adds_epi16(L2, L3); + Sval = _mm_adds_epi16(Sval, L0); + Sval = _mm_adds_epi16(Sval, L2); + + _mm_store_si128((__m128i*)(Sp + d), Sval); + } + + _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8)); + _mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0); + } + else +#endif + { + int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST; + + for( d = 0; d < D; d++ ) + { + int Cpd = Cp[d], L0, L1, L2, L3; + + L0 = Cpd + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0; + L1 = Cpd + std::min((int)Lr_p1[d], std::min(Lr_p1[d-1] + P1, std::min(Lr_p1[d+1] + P1, delta1))) - delta1; + L2 = Cpd + std::min((int)Lr_p2[d], std::min(Lr_p2[d-1] + P1, std::min(Lr_p2[d+1] + P1, delta2))) - delta2; + L3 = Cpd + std::min((int)Lr_p3[d], std::min(Lr_p3[d-1] + P1, std::min(Lr_p3[d+1] + P1, delta3))) - delta3; + + Lr_p[d] = (CostType)L0; + minL0 = std::min(minL0, L0); + + Lr_p[d + D2] = (CostType)L1; + minL1 = std::min(minL1, L1); + + Lr_p[d + D2*2] = (CostType)L2; + minL2 = std::min(minL2, L2); + + Lr_p[d + D2*3] = (CostType)L3; + minL3 = std::min(minL3, L3); + + Sp[d] = saturate_cast(Sp[d] + L0 + L1 + L2 + L3); + } + minLr[0][xm] = (CostType)minL0; + minLr[0][xm+1] = (CostType)minL1; + minLr[0][xm+2] = (CostType)minL2; + minLr[0][xm+3] = (CostType)minL3; + } + } + + if( pass == npasses ) + { + for( x = 0; x < width; x++ ) + { + disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED; + disp2cost[x] = MAX_COST; + } + + for( x = width1 - 1; x >= 0; x-- ) + { + CostType* Sp = S + x*D; + int minS = MAX_COST, bestDisp = -1; + + if( npasses == 1 ) + { + int xm = x*NR2, xd = xm*D2; + + int minL0 = MAX_COST; + int delta0 = minLr[0][xm + NR2] + P2; + CostType* Lr_p0 = Lr[0] + xd + NRD2; + Lr_p0[-1] = Lr_p0[D] = MAX_COST; + CostType* Lr_p = Lr[0] + xd; + + const CostType* Cp = C + x*D; + +#if CV_SSE2 + if( useSIMD ) + { + __m128i _P1 = _mm_set1_epi16((short)P1); + __m128i _delta0 = _mm_set1_epi16((short)delta0); + + __m128i _minL0 = _mm_set1_epi16((short)minL0); + __m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1); + __m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8); + + for( d = 0; d < D; d += 8 ) + { + __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0; + + L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d)); + L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1)); + L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1)); + L0 = _mm_min_epi16(L0, _delta0); + L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd); + + _mm_store_si128((__m128i*)(Lr_p + d), L0); + _minL0 = _mm_min_epi16(_minL0, L0); + L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d)); + _mm_store_si128((__m128i*)(Sp + d), L0); + + __m128i mask = _mm_cmpgt_epi16(_minS, L0); + _minS = _mm_min_epi16(_minS, L0); + _bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask)); + _d8 = _mm_adds_epi16(_d8, _8); + } + + short CV_DECL_ALIGNED(16) bestDispBuf[8]; + _mm_store_si128((__m128i*)bestDispBuf, _bestDisp); + + _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8)); + _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4)); + _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2)); + + __m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8)); + qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4)); + qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2)); + + minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0); + minS = (CostType)_mm_cvtsi128_si32(qS); + + qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0); + qS = _mm_cmpeq_epi16(_minS, qS); + int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255; + + bestDisp = bestDispBuf[LSBTab[idx]]; + } + else +#endif + { + for( d = 0; d < D; d++ ) + { + int L0 = Cp[d] + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0; + + Lr_p[d] = (CostType)L0; + minL0 = std::min(minL0, L0); + + int Sval = Sp[d] = saturate_cast(Sp[d] + L0); + if( Sval < minS ) + { + minS = Sval; + bestDisp = d; + } + } + minLr[0][xm] = (CostType)minL0; + } + } + else + { + for( d = 0; d < D; d++ ) + { + int Sval = Sp[d]; + if( Sval < minS ) + { + minS = Sval; + bestDisp = d; + } + } + } + + for( d = 0; d < D; d++ ) + { + if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 ) + break; + } + if( d < D ) + continue; + d = bestDisp; + int _x2 = x + minX1 - d - minD; + if( disp2cost[_x2] > minS ) + { + disp2cost[_x2] = (CostType)minS; + disp2ptr[_x2] = (DispType)(d + minD); + } + + if( 0 < d && d < D-1 ) + { + // do subpixel quadratic interpolation: + // fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1]) + // then find minimum of the parabola. + int denom2 = std::max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1); + d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2); + } + else + d *= DISP_SCALE; + disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE); + } + + for( x = minX1; x < maxX1; x++ ) + { + // we round the computed disparity both towards -inf and +inf and check + // if either of the corresponding disparities in disp2 is consistent. + // This is to give the computed disparity a chance to look valid if it is. + int d1 = disp1ptr[x]; + if( d1 == INVALID_DISP_SCALED ) + continue; + int _d = d1 >> DISP_SHIFT; + int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT; + int _x = x - _d, x_ = x - d_; + if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff && + 0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff ) + disp1ptr[x] = (DispType)INVALID_DISP_SCALED; + } + } + + // now shift the cyclic buffers + std::swap( Lr[0], Lr[1] ); + std::swap( minLr[0], minLr[1] ); + } + } + } + + class StereoBinarySGBMImpl : public StereoBinarySGBM + { + public: + StereoBinarySGBMImpl() + { + params = StereoBinarySGBMParams(); + } + + StereoBinarySGBMImpl( int _minDisparity, int _numDisparities, int _SADWindowSize, + int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap, + int _uniquenessRatio, int _speckleWindowSize, int _speckleRange, + int _mode ) + { + params = StereoBinarySGBMParams( _minDisparity, _numDisparities, _SADWindowSize, + _P1, _P2, _disp12MaxDiff, _preFilterCap, + _uniquenessRatio, _speckleWindowSize, _speckleRange, + _mode ); + } + + void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr ) + { + Mat left = leftarr.getMat(), right = rightarr.getMat(); + CV_Assert( left.size() == right.size() && left.type() == right.type() && + left.depth() == CV_8U ); + + disparr.create( left.size(), CV_16S ); + Mat disp = disparr.getMat(); + + computeDisparityBinarySGBM( left, right, disp, params, buffer ); + medianBlur(disp, disp, 3); + + if( params.speckleWindowSize > 0 ) + filterSpeckles(disp, (params.minDisparity - 1)*StereoMatcher::DISP_SCALE, params.speckleWindowSize, + StereoMatcher::DISP_SCALE*params.speckleRange, buffer); + } + + int getMinDisparity() const { return params.minDisparity; } + void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; } + + int getNumDisparities() const { return params.numDisparities; } + void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; } + + int getBlockSize() const { return params.SADWindowSize; } + void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; } + + int getSpeckleWindowSize() const { return params.speckleWindowSize; } + void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; } + + int getSpeckleRange() const { return params.speckleRange; } + void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; } + + int getDisp12MaxDiff() const { return params.disp12MaxDiff; } + void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; } + + int getPreFilterCap() const { return params.preFilterCap; } + void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; } + + int getUniquenessRatio() const { return params.uniquenessRatio; } + void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; } + + int getP1() const { return params.P1; } + void setP1(int P1) { params.P1 = P1; } + + int getP2() const { return params.P2; } + void setP2(int P2) { params.P2 = P2; } + + int getMode() const { return params.mode; } + void setMode(int mode) { params.mode = mode; } + + void write(FileStorage& fs) const + { + fs << "name" << name_ + << "minDisparity" << params.minDisparity + << "numDisparities" << params.numDisparities + << "blockSize" << params.SADWindowSize + << "speckleWindowSize" << params.speckleWindowSize + << "speckleRange" << params.speckleRange + << "disp12MaxDiff" << params.disp12MaxDiff + << "preFilterCap" << params.preFilterCap + << "uniquenessRatio" << params.uniquenessRatio + << "P1" << params.P1 + << "P2" << params.P2 + << "mode" << params.mode; + } + + void read(const FileNode& fn) + { + FileNode n = fn["name"]; + CV_Assert( n.isString() && String(n) == name_ ); + params.minDisparity = (int)fn["minDisparity"]; + params.numDisparities = (int)fn["numDisparities"]; + params.SADWindowSize = (int)fn["blockSize"]; + params.speckleWindowSize = (int)fn["speckleWindowSize"]; + params.speckleRange = (int)fn["speckleRange"]; + params.disp12MaxDiff = (int)fn["disp12MaxDiff"]; + params.preFilterCap = (int)fn["preFilterCap"]; + params.uniquenessRatio = (int)fn["uniquenessRatio"]; + params.P1 = (int)fn["P1"]; + params.P2 = (int)fn["P2"]; + params.mode = (int)fn["mode"]; + } + + StereoBinarySGBMParams params; + Mat buffer; + static const char* name_; + }; + + const char* StereoBinarySGBMImpl::name_ = "StereoMatcher.SGBM"; + + + Ptr StereoBinarySGBM::create(int minDisparity, int numDisparities, int SADWindowSize, + int P1, int P2, int disp12MaxDiff, + int preFilterCap, int uniquenessRatio, + int speckleWindowSize, int speckleRange, + int mode) + { + return Ptr( + new StereoBinarySGBMImpl(minDisparity, numDisparities, SADWindowSize, + P1, P2, disp12MaxDiff, + preFilterCap, uniquenessRatio, + speckleWindowSize, speckleRange, + mode)); + } + + Rect getValidDisparityROI( Rect roi1, Rect roi2, + int minDisparity, + int numberOfDisparities, + int SADWindowSize ) + { + int SW2 = SADWindowSize/2; + int minD = minDisparity, maxD = minDisparity + numberOfDisparities - 1; + + int xmin = std::max(roi1.x, roi2.x + maxD) + SW2; + int xmax = std::min(roi1.x + roi1.width, roi2.x + roi2.width - minD) - SW2; + int ymin = std::max(roi1.y, roi2.y) + SW2; + int ymax = std::min(roi1.y + roi1.height, roi2.y + roi2.height) - SW2; + + Rect r(xmin, ymin, xmax - xmin, ymax - ymin); + + return r.width > 0 && r.height > 0 ? r : Rect(); + } + + typedef cv::Point_ Point2s; + + template + void filterSpecklesImpl(cv::Mat& img, int newVal, int maxSpeckleSize, int maxDiff, cv::Mat& _buf) + { + using namespace cv; + + int width = img.cols, height = img.rows, npixels = width*height; + size_t bufSize = npixels*(int)(sizeof(Point2s) + sizeof(int) + sizeof(uchar)); + if( !_buf.isContinuous() || _buf.empty() || _buf.cols*_buf.rows*_buf.elemSize() < bufSize ) + _buf.create(1, (int)bufSize, CV_8U); + + uchar* buf = _buf.ptr(); + int i, j, dstep = (int)(img.step/sizeof(T)); + int* labels = (int*)buf; + buf += npixels*sizeof(labels[0]); + Point2s* wbuf = (Point2s*)buf; + buf += npixels*sizeof(wbuf[0]); + uchar* rtype = (uchar*)buf; + int curlabel = 0; + + // clear out label assignments + memset(labels, 0, npixels*sizeof(labels[0])); + + for( i = 0; i < height; i++ ) + { + T* ds = img.ptr(i); + int* ls = labels + width*i; + + for( j = 0; j < width; j++ ) + { + if( ds[j] != newVal ) // not a bad disparity + { + if( ls[j] ) // has a label, check for bad label + { + if( rtype[ls[j]] ) // small region, zero out disparity + ds[j] = (T)newVal; + } + // no label, assign and propagate + else + { + Point2s* ws = wbuf; // initialize wavefront + Point2s p((short)j, (short)i); // current pixel + curlabel++; // next label + int count = 0; // current region size + ls[j] = curlabel; + + // wavefront propagation + while( ws >= wbuf ) // wavefront not empty + { + count++; + // put neighbors onto wavefront + T* dpp = &img.at(p.y, p.x); + T dp = *dpp; + int* lpp = labels + width*p.y + p.x; + + if( p.y < height-1 && !lpp[+width] && dpp[+dstep] != newVal && std::abs(dp - dpp[+dstep]) <= maxDiff ) + { + lpp[+width] = curlabel; + *ws++ = Point2s(p.x, p.y+1); + } + + if( p.y > 0 && !lpp[-width] && dpp[-dstep] != newVal && std::abs(dp - dpp[-dstep]) <= maxDiff ) + { + lpp[-width] = curlabel; + *ws++ = Point2s(p.x, p.y-1); + } + + if( p.x < width-1 && !lpp[+1] && dpp[+1] != newVal && std::abs(dp - dpp[+1]) <= maxDiff ) + { + lpp[+1] = curlabel; + *ws++ = Point2s(p.x+1, p.y); + } + + if( p.x > 0 && !lpp[-1] && dpp[-1] != newVal && std::abs(dp - dpp[-1]) <= maxDiff ) + { + lpp[-1] = curlabel; + *ws++ = Point2s(p.x-1, p.y); + } + + // pop most recent and propagate + // NB: could try least recent, maybe better convergence + p = *--ws; + } + + // assign label type + if( count <= maxSpeckleSize ) // speckle region + { + rtype[ls[j]] = 1; // small region label + ds[j] = (T)newVal; + } + else + rtype[ls[j]] = 0; // large region label + } + } + } + } + } + } } -void cv::filterSpeckles( InputOutputArray _img, double _newval, int maxSpeckleSize, - double _maxDiff, InputOutputArray __buf ) +void cv::stereo::filterSpeckles( InputOutputArray _img, double _newval, int maxSpeckleSize, + double _maxDiff, InputOutputArray __buf ) { - Mat img = _img.getMat(); - int type = img.type(); - Mat temp, &_buf = __buf.needed() ? __buf.getMatRef() : temp; - CV_Assert( type == CV_8UC1 || type == CV_16SC1 ); + Mat img = _img.getMat(); + int type = img.type(); + Mat temp, &_buf = __buf.needed() ? __buf.getMatRef() : temp; + CV_Assert( type == CV_8UC1 || type == CV_16SC1 ); - int newVal = cvRound(_newval), maxDiff = cvRound(_maxDiff); + int newVal = cvRound(_newval), maxDiff = cvRound(_maxDiff); #if IPP_VERSION_X100 >= 801 - CV_IPP_CHECK() - { - Ipp32s bufsize = 0; - IppiSize roisize = { img.cols, img.rows }; - IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp16s; - - if (!__buf.needed() && (type == CV_8UC1 || type == CV_16SC1)) - { - IppStatus status = ippiMarkSpecklesGetBufferSize(roisize, datatype, CV_MAT_CN(type), &bufsize); - Ipp8u * buffer = ippsMalloc_8u(bufsize); - - if ((int)status >= 0) - { - if (type == CV_8UC1) - status = ippiMarkSpeckles_8u_C1IR(img.ptr(), (int)img.step, roisize, - (Ipp8u)newVal, maxSpeckleSize, (Ipp8u)maxDiff, ippiNormL1, buffer); - else - status = ippiMarkSpeckles_16s_C1IR(img.ptr(), (int)img.step, roisize, - (Ipp16s)newVal, maxSpeckleSize, (Ipp16s)maxDiff, ippiNormL1, buffer); - } - - if (status >= 0) - { - CV_IMPL_ADD(CV_IMPL_IPP); - return; - } - setIppErrorStatus(); - } - } + CV_IPP_CHECK() + { + Ipp32s bufsize = 0; + IppiSize roisize = { img.cols, img.rows }; + IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp16s; + + if (!__buf.needed() && (type == CV_8UC1 || type == CV_16SC1)) + { + IppStatus status = ippiMarkSpecklesGetBufferSize(roisize, datatype, CV_MAT_CN(type), &bufsize); + Ipp8u * buffer = ippsMalloc_8u(bufsize); + + if ((int)status >= 0) + { + if (type == CV_8UC1) + status = ippiMarkSpeckles_8u_C1IR(img.ptr(), (int)img.step, roisize, + (Ipp8u)newVal, maxSpeckleSize, (Ipp8u)maxDiff, ippiNormL1, buffer); + else + status = ippiMarkSpeckles_16s_C1IR(img.ptr(), (int)img.step, roisize, + (Ipp16s)newVal, maxSpeckleSize, (Ipp16s)maxDiff, ippiNormL1, buffer); + } + + if (status >= 0) + { + CV_IMPL_ADD(CV_IMPL_IPP); + return; + } + setIppErrorStatus(); + } + } #endif - if (type == CV_8UC1) - filterSpecklesImpl(img, newVal, maxSpeckleSize, maxDiff, _buf); - else - filterSpecklesImpl(img, newVal, maxSpeckleSize, maxDiff, _buf); + if (type == CV_8UC1) + filterSpecklesImpl(img, newVal, maxSpeckleSize, maxDiff, _buf); + else + filterSpecklesImpl(img, newVal, maxSpeckleSize, maxDiff, _buf); } -void cv::validateDisparity( InputOutputArray _disp, InputArray _cost, int minDisparity, - int numberOfDisparities, int disp12MaxDiff ) +void cv::stereo::validateDisparity( InputOutputArray _disp, InputArray _cost, int minDisparity, + int numberOfDisparities, int disp12MaxDiff ) { - Mat disp = _disp.getMat(), cost = _cost.getMat(); - int cols = disp.cols, rows = disp.rows; - int minD = minDisparity, maxD = minDisparity + numberOfDisparities; - int x, minX1 = std::max(maxD, 0), maxX1 = cols + std::min(minD, 0); - AutoBuffer _disp2buf(cols*2); - int* disp2buf = _disp2buf; - int* disp2cost = disp2buf + cols; - const int DISP_SHIFT = 4, DISP_SCALE = 1 << DISP_SHIFT; - int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE; - int costType = cost.type(); - - disp12MaxDiff *= DISP_SCALE; - - CV_Assert( numberOfDisparities > 0 && disp.type() == CV_16S && - (costType == CV_16S || costType == CV_32S) && - disp.size() == cost.size() ); - - for( int y = 0; y < rows; y++ ) - { - short* dptr = disp.ptr(y); - - for( x = 0; x < cols; x++ ) - { - disp2buf[x] = INVALID_DISP_SCALED; - disp2cost[x] = INT_MAX; - } - - if( costType == CV_16S ) - { - const short* cptr = cost.ptr(y); - - for( x = minX1; x < maxX1; x++ ) - { - int d = dptr[x], c = cptr[x]; - int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT); - - if( disp2cost[x2] > c ) - { - disp2cost[x2] = c; - disp2buf[x2] = d; - } - } - } - else - { - const int* cptr = cost.ptr(y); - - for( x = minX1; x < maxX1; x++ ) - { - int d = dptr[x], c = cptr[x]; - int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT); - - if( disp2cost[x2] < c ) - { - disp2cost[x2] = c; - disp2buf[x2] = d; - } - } - } - - for( x = minX1; x < maxX1; x++ ) - { - // we round the computed disparity both towards -inf and +inf and check - // if either of the corresponding disparities in disp2 is consistent. - // This is to give the computed disparity a chance to look valid if it is. - int d = dptr[x]; - if( d == INVALID_DISP_SCALED ) - continue; - int d0 = d >> DISP_SHIFT; - int d1 = (d + DISP_SCALE-1) >> DISP_SHIFT; - int x0 = x - d0, x1 = x - d1; - if( (0 <= x0 && x0 < cols && disp2buf[x0] > INVALID_DISP_SCALED && std::abs(disp2buf[x0] - d) > disp12MaxDiff) && - (0 <= x1 && x1 < cols && disp2buf[x1] > INVALID_DISP_SCALED && std::abs(disp2buf[x1] - d) > disp12MaxDiff) ) - dptr[x] = (short)INVALID_DISP_SCALED; - } - } + Mat disp = _disp.getMat(), cost = _cost.getMat(); + int cols = disp.cols, rows = disp.rows; + int minD = minDisparity, maxD = minDisparity + numberOfDisparities; + int x, minX1 = std::max(maxD, 0), maxX1 = cols + std::min(minD, 0); + AutoBuffer _disp2buf(cols*2); + int* disp2buf = _disp2buf; + int* disp2cost = disp2buf + cols; + const int DISP_SHIFT = 4, DISP_SCALE = 1 << DISP_SHIFT; + int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE; + int costType = cost.type(); + + disp12MaxDiff *= DISP_SCALE; + + CV_Assert( numberOfDisparities > 0 && disp.type() == CV_16S && + (costType == CV_16S || costType == CV_32S) && + disp.size() == cost.size() ); + + for( int y = 0; y < rows; y++ ) + { + short* dptr = disp.ptr(y); + + for( x = 0; x < cols; x++ ) + { + disp2buf[x] = INVALID_DISP_SCALED; + disp2cost[x] = INT_MAX; + } + + if( costType == CV_16S ) + { + const short* cptr = cost.ptr(y); + + for( x = minX1; x < maxX1; x++ ) + { + int d = dptr[x], c = cptr[x]; + int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT); + + if( disp2cost[x2] > c ) + { + disp2cost[x2] = c; + disp2buf[x2] = d; + } + } + } + else + { + const int* cptr = cost.ptr(y); + + for( x = minX1; x < maxX1; x++ ) + { + int d = dptr[x], c = cptr[x]; + int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT); + + if( disp2cost[x2] < c ) + { + disp2cost[x2] = c; + disp2buf[x2] = d; + } + } + } + + for( x = minX1; x < maxX1; x++ ) + { + // we round the computed disparity both towards -inf and +inf and check + // if either of the corresponding disparities in disp2 is consistent. + // This is to give the computed disparity a chance to look valid if it is. + int d = dptr[x]; + if( d == INVALID_DISP_SCALED ) + continue; + int d0 = d >> DISP_SHIFT; + int d1 = (d + DISP_SCALE-1) >> DISP_SHIFT; + int x0 = x - d0, x1 = x - d1; + if( (0 <= x0 && x0 < cols && disp2buf[x0] > INVALID_DISP_SCALED && std::abs(disp2buf[x0] - d) > disp12MaxDiff) && + (0 <= x1 && x1 < cols && disp2buf[x1] > INVALID_DISP_SCALED && std::abs(disp2buf[x1] - d) > disp12MaxDiff) ) + dptr[x] = (short)INVALID_DISP_SCALED; + } + } } diff --git a/modules/stereo/test/test_precomp.hpp b/modules/stereo/test/test_precomp.hpp index 90a56e85a..6b659c327 100644 --- a/modules/stereo/test/test_precomp.hpp +++ b/modules/stereo/test/test_precomp.hpp @@ -11,10 +11,21 @@ #include #include "opencv2/ts.hpp" -#include "opencv2/imgproc.hpp" -#include "opencv2/stereo.hpp" #include "opencv2/imgcodecs.hpp" + + +#include "opencv2/stereo.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/features2d.hpp" +#include "opencv2/core/utility.hpp" +#include "opencv2/core/private.hpp" +#include "opencv2/core/cvdef.h" #include "opencv2/core.hpp" #include "opencv2/highgui.hpp" +#include +#include + + + #endif