From a20c5c8dd974caa55749f18d6587f82bf30c7b26 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Thu, 20 Nov 2014 18:03:57 +0300 Subject: [PATCH] Doxygen documentation for all modules --- modules/adas/include/opencv2/adas.hpp | 4 + modules/bgsegm/doc/bgsegm.bib | 17 + modules/bgsegm/include/opencv2/bgsegm.hpp | 92 ++- modules/bioinspired/doc/bioinspired.bib | 47 ++ modules/bioinspired/doc/bioinspired.rst | 2 +- .../doc/{retina => }/images/retinaInput.jpg | Bin .../images/retinaOutput_default.jpg | Bin .../images/retinaOutput_realistic.jpg | Bin modules/bioinspired/doc/retina.markdown | 223 ++++++ .../doc/{retina/index.rst => retina.rst} | 0 .../include/opencv2/bioinspired.hpp | 10 + .../include/opencv2/bioinspired/retina.hpp | 413 +++++++--- .../bioinspired/retinafasttonemapping.hpp | 77 +- .../transientareassegmentationmodule.hpp | 155 ++-- modules/ccalib/include/opencv2/ccalib.hpp | 20 +- modules/cvv/doc/cvv.rst | 4 +- .../doc/{cvv_api/index.rst => cvv_api.rst} | 0 .../doc/{cvv_gui/index.rst => cvv_gui.rst} | 0 .../include/opencv2/cvv/call_meta_data.hpp | 7 + modules/cvv/include/opencv2/cvv/cvv.hpp | 16 + .../cvv/include/opencv2/cvv/debug_mode.hpp | 16 +- modules/cvv/include/opencv2/cvv/dmatch.hpp | 32 +- modules/cvv/include/opencv2/cvv/filter.hpp | 23 +- .../cvv/include/opencv2/cvv/final_show.hpp | 20 +- .../cvv/include/opencv2/cvv/show_image.hpp | 25 +- .../datasets/doc/{datasets => }/ar_hmdb.rst | 0 .../datasets/doc/{datasets => }/ar_sports.rst | 0 modules/datasets/doc/datasets.rst | 88 +- .../doc/{datasets => }/fr_adience.rst | 0 .../datasets/doc/{datasets => }/fr_lfw.rst | 0 .../doc/{datasets => }/gr_chalearn.rst | 0 .../datasets/doc/{datasets => }/gr_skig.rst | 0 .../doc/{datasets => }/hpe_humaneva.rst | 0 .../datasets/doc/{datasets => }/hpe_parse.rst | 0 .../datasets/doc/{datasets => }/ir_affine.rst | 0 .../datasets/doc/{datasets => }/ir_robot.rst | 0 .../datasets/doc/{datasets => }/is_bsds.rst | 0 .../doc/{datasets => }/is_weizmann.rst | 0 .../datasets/doc/{datasets => }/msm_epfl.rst | 0 .../doc/{datasets => }/msm_middlebury.rst | 0 .../doc/{datasets => }/or_imagenet.rst | 0 .../datasets/doc/{datasets => }/or_mnist.rst | 0 .../datasets/doc/{datasets => }/or_sun.rst | 0 .../doc/{datasets => }/pd_caltech.rst | 0 .../doc/{datasets => }/slam_kitti.rst | 0 .../doc/{datasets => }/slam_tumindoor.rst | 0 .../datasets/doc/{datasets => }/tr_chars.rst | 0 .../datasets/doc/{datasets => }/tr_svt.rst | 0 .../include/opencv2/datasets/ar_hmdb.hpp | 5 + .../include/opencv2/datasets/ar_sports.hpp | 5 + .../include/opencv2/datasets/dataset.hpp | 439 ++++++++++ .../include/opencv2/datasets/fr_adience.hpp | 5 + .../include/opencv2/datasets/fr_lfw.hpp | 5 + .../include/opencv2/datasets/gr_chalearn.hpp | 5 + .../include/opencv2/datasets/gr_skig.hpp | 5 + .../include/opencv2/datasets/hpe_humaneva.hpp | 5 + .../include/opencv2/datasets/hpe_parse.hpp | 5 + .../include/opencv2/datasets/ir_affine.hpp | 5 + .../include/opencv2/datasets/ir_robot.hpp | 5 + .../include/opencv2/datasets/is_bsds.hpp | 5 + .../include/opencv2/datasets/is_weizmann.hpp | 5 + .../include/opencv2/datasets/msm_epfl.hpp | 5 + .../opencv2/datasets/msm_middlebury.hpp | 5 + .../include/opencv2/datasets/or_imagenet.hpp | 5 + .../include/opencv2/datasets/or_mnist.hpp | 5 + .../include/opencv2/datasets/or_sun.hpp | 5 + .../include/opencv2/datasets/pd_caltech.hpp | 5 + .../include/opencv2/datasets/slam_kitti.hpp | 5 + .../opencv2/datasets/slam_tumindoor.hpp | 5 + .../include/opencv2/datasets/tr_chars.hpp | 5 + .../include/opencv2/datasets/tr_svt.hpp | 5 + .../include/opencv2/datasets/util.hpp | 5 + modules/face/doc/changelog.markdown | 75 ++ modules/face/doc/{facerec => }/etc/at.txt | 0 modules/face/doc/face.bib | 160 ++++ modules/face/doc/face.rst | 2 +- .../face/doc/{facerec => }/facerec_api.rst | 0 .../doc/{facerec => }/facerec_changelog.rst | 0 .../doc/{facerec => }/facerec_tutorial.rst | 0 .../img/at_database_small_sample_size.png | Bin .../img/eigenface_reconstruction_opencv.png | Bin .../{facerec => }/img/eigenfaces_opencv.png | Bin .../img/fisherface_reconstruction_opencv.png | Bin .../{facerec => }/img/fisherfaces_opencv.png | Bin .../face/doc/{facerec => }/img/lbp/lbp.png | Bin .../doc/{facerec => }/img/lbp/lbp_yale.jpg | Bin .../doc/{facerec => }/img/lbp/patterns.png | Bin .../tutorial/facerec_video/facerec_video.png | Bin .../arnie_10_10_200_200.jpg | Bin .../arnie_20_20_200_200.jpg | Bin .../arnie_20_20_70_70.jpg | Bin .../arnie_30_30_200_200.jpg | Bin .../gender_classification/clooney_set.png | Bin .../gender_classification/fisherface_0.png | Bin .../fisherface_reconstruction_0.png | Bin .../tutorial/gender_classification/mean.png | Bin modules/face/doc/{facerec => }/index.rst | 0 .../face/doc/{facerec => }/src/CMakeLists.txt | 0 .../face/doc/{facerec => }/src/create_csv.py | 0 .../face/doc/{facerec => }/src/crop_face.py | 0 .../doc/{facerec => }/src/facerec_demo.cpp | 0 .../{facerec => }/src/facerec_eigenfaces.cpp | 0 .../{facerec => }/src/facerec_fisherfaces.cpp | 0 .../doc/{facerec => }/src/facerec_lbph.cpp | 0 .../{facerec => }/src/facerec_save_load.cpp | 0 .../doc/{facerec => }/src/facerec_video.cpp | 0 modules/face/doc/tutorial.markdown | 699 ++++++++++++++++ .../facerec_gender_classification.rst | 0 .../tutorial/facerec_save_load.rst | 0 .../tutorial/facerec_video_recognition.rst | 0 modules/face/include/opencv2/face.hpp | 8 + modules/face/include/opencv2/face/facerec.hpp | 373 ++++++++- modules/latentsvm/doc/latentsvm.bib | 18 + .../latentsvm/include/opencv2/latentsvm.hpp | 56 ++ .../line_descriptor/doc/line_descriptor.bib | 30 + modules/line_descriptor/doc/tutorial.markdown | 418 ++++++++++ .../include/opencv2/line_descriptor.hpp | 72 ++ .../opencv2/line_descriptor/descriptor.hpp | 549 +++++++++---- .../matlab/include/opencv2/matlab/bridge.hpp | 8 +- modules/matlab/include/opencv2/matlab/map.hpp | 8 + .../matlab/include/opencv2/matlab/mxarray.hpp | 6 + .../include/opencv2/matlab/transpose.hpp | 6 + modules/optflow/doc/optflow.bib | 39 + modules/optflow/include/opencv2/optflow.hpp | 108 ++- .../include/opencv2/optflow/motempl.hpp | 84 +- modules/reg/doc/reg.bib | 10 + modules/reg/include/opencv2/reg/map.hpp | 79 +- modules/reg/include/opencv2/reg/mapaffine.hpp | 3 + modules/reg/include/opencv2/reg/mapper.hpp | 9 +- .../include/opencv2/reg/mappergradaffine.hpp | 3 + .../include/opencv2/reg/mappergradeuclid.hpp | 4 + .../include/opencv2/reg/mappergradproj.hpp | 3 + .../include/opencv2/reg/mappergradshift.hpp | 3 + .../include/opencv2/reg/mappergradsimilar.hpp | 3 + .../reg/include/opencv2/reg/mapperpyramid.hpp | 4 +- modules/reg/include/opencv2/reg/mapprojec.hpp | 3 + modules/reg/include/opencv2/reg/mapshift.hpp | 3 + modules/rgbd/include/opencv2/rgbd.hpp | 10 + modules/rgbd/include/opencv2/rgbd/linemod.hpp | 5 +- modules/saliency/doc/saliency.bib | 24 + modules/saliency/include/opencv2/saliency.hpp | 35 + .../opencv2/saliency/saliencyBaseClasses.hpp | 20 + .../saliency/saliencySpecializedClasses.hpp | 66 +- .../surface_matching/doc/surface_matching.bib | 23 + .../include/opencv2/surface_matching.hpp | 324 ++++++++ .../include/opencv2/surface_matching/icp.hpp | 14 +- .../opencv2/surface_matching/pose_3d.hpp | 12 +- .../opencv2/surface_matching/ppf_helpers.hpp | 11 +- .../opencv2/surface_matching/ppf_match_3d.hpp | 20 +- .../opencv2/surface_matching/t_hash_int.hpp | 16 +- modules/text/include/opencv2/text.hpp | 56 ++ .../text/include/opencv2/text/erfilter.hpp | 240 +++--- modules/text/include/opencv2/text/ocr.hpp | 122 ++- modules/tracking/doc/tracking.bib | 69 ++ modules/tracking/include/opencv2/tracking.hpp | 283 +++++++ .../include/opencv2/tracking/feature.hpp | 5 + .../opencv2/tracking/onlineBoosting.hpp | 6 + .../include/opencv2/tracking/onlineMIL.hpp | 6 + .../include/opencv2/tracking/tracker.hpp | 752 +++++++++++------- modules/xfeatures2d/doc/xfeatures2d.bib | 46 ++ .../include/opencv2/xfeatures2d.hpp | 52 +- .../include/opencv2/xfeatures2d/cuda.hpp | 38 + .../include/opencv2/xfeatures2d/nonfree.hpp | 69 +- modules/ximgproc/doc/ximgproc.bib | 48 ++ modules/ximgproc/include/opencv2/ximgproc.hpp | 13 + .../include/opencv2/ximgproc/edge_filter.hpp | 201 ++++- .../include/opencv2/ximgproc/seeds.hpp | 136 +++- .../ximgproc/structured_edge_detection.hpp | 37 +- modules/xobjdetect/doc/xobjdetect.bib | 19 + .../xobjdetect/include/opencv2/xobjdetect.hpp | 194 ++--- .../xphoto/doc/{denoising => }/denoising.rst | 0 .../doc/{inpainting => }/inpainting.rst | 0 .../doc/{colorbalance => }/whitebalance.rst | 0 modules/xphoto/doc/xphoto.bib | 8 + modules/xphoto/doc/xphoto.rst | 6 +- modules/xphoto/include/opencv2/xphoto.hpp | 3 + .../opencv2/xphoto/dct_image_denoising.hpp | 37 +- .../include/opencv2/xphoto/inpainting.hpp | 42 +- .../opencv2/xphoto/simple_color_balance.hpp | 47 +- 179 files changed, 6624 insertions(+), 1182 deletions(-) create mode 100644 modules/bgsegm/doc/bgsegm.bib create mode 100644 modules/bioinspired/doc/bioinspired.bib rename modules/bioinspired/doc/{retina => }/images/retinaInput.jpg (100%) rename modules/bioinspired/doc/{retina => }/images/retinaOutput_default.jpg (100%) rename modules/bioinspired/doc/{retina => }/images/retinaOutput_realistic.jpg (100%) create mode 100644 modules/bioinspired/doc/retina.markdown rename modules/bioinspired/doc/{retina/index.rst => retina.rst} (100%) rename modules/cvv/doc/{cvv_api/index.rst => cvv_api.rst} (100%) rename modules/cvv/doc/{cvv_gui/index.rst => cvv_gui.rst} (100%) rename modules/datasets/doc/{datasets => }/ar_hmdb.rst (100%) rename modules/datasets/doc/{datasets => }/ar_sports.rst (100%) rename modules/datasets/doc/{datasets => }/fr_adience.rst (100%) rename modules/datasets/doc/{datasets => }/fr_lfw.rst (100%) rename modules/datasets/doc/{datasets => }/gr_chalearn.rst (100%) rename modules/datasets/doc/{datasets => }/gr_skig.rst (100%) rename modules/datasets/doc/{datasets => }/hpe_humaneva.rst (100%) rename modules/datasets/doc/{datasets => }/hpe_parse.rst (100%) rename modules/datasets/doc/{datasets => }/ir_affine.rst (100%) rename modules/datasets/doc/{datasets => }/ir_robot.rst (100%) rename modules/datasets/doc/{datasets => }/is_bsds.rst (100%) rename modules/datasets/doc/{datasets => }/is_weizmann.rst (100%) rename modules/datasets/doc/{datasets => }/msm_epfl.rst (100%) rename modules/datasets/doc/{datasets => }/msm_middlebury.rst (100%) rename modules/datasets/doc/{datasets => }/or_imagenet.rst (100%) rename modules/datasets/doc/{datasets => }/or_mnist.rst (100%) rename modules/datasets/doc/{datasets => }/or_sun.rst (100%) rename modules/datasets/doc/{datasets => }/pd_caltech.rst (100%) rename modules/datasets/doc/{datasets => }/slam_kitti.rst (100%) rename modules/datasets/doc/{datasets => }/slam_tumindoor.rst (100%) rename modules/datasets/doc/{datasets => }/tr_chars.rst (100%) rename modules/datasets/doc/{datasets => }/tr_svt.rst (100%) create mode 100644 modules/face/doc/changelog.markdown rename modules/face/doc/{facerec => }/etc/at.txt (100%) create mode 100644 modules/face/doc/face.bib rename modules/face/doc/{facerec => }/facerec_api.rst (100%) rename modules/face/doc/{facerec => }/facerec_changelog.rst (100%) rename modules/face/doc/{facerec => }/facerec_tutorial.rst (100%) rename modules/face/doc/{facerec => }/img/at_database_small_sample_size.png (100%) rename modules/face/doc/{facerec => }/img/eigenface_reconstruction_opencv.png (100%) rename modules/face/doc/{facerec => }/img/eigenfaces_opencv.png (100%) rename modules/face/doc/{facerec => }/img/fisherface_reconstruction_opencv.png (100%) rename modules/face/doc/{facerec => }/img/fisherfaces_opencv.png (100%) rename modules/face/doc/{facerec => }/img/lbp/lbp.png (100%) rename modules/face/doc/{facerec => }/img/lbp/lbp_yale.jpg (100%) rename modules/face/doc/{facerec => }/img/lbp/patterns.png (100%) rename modules/face/doc/{facerec => }/img/tutorial/facerec_video/facerec_video.png (100%) rename modules/face/doc/{facerec => }/img/tutorial/gender_classification/arnie_10_10_200_200.jpg (100%) rename modules/face/doc/{facerec => }/img/tutorial/gender_classification/arnie_20_20_200_200.jpg (100%) rename modules/face/doc/{facerec => }/img/tutorial/gender_classification/arnie_20_20_70_70.jpg (100%) rename modules/face/doc/{facerec => }/img/tutorial/gender_classification/arnie_30_30_200_200.jpg (100%) rename modules/face/doc/{facerec => }/img/tutorial/gender_classification/clooney_set.png (100%) rename modules/face/doc/{facerec => }/img/tutorial/gender_classification/fisherface_0.png (100%) rename modules/face/doc/{facerec => }/img/tutorial/gender_classification/fisherface_reconstruction_0.png (100%) rename modules/face/doc/{facerec => }/img/tutorial/gender_classification/mean.png (100%) rename modules/face/doc/{facerec => }/index.rst (100%) rename modules/face/doc/{facerec => }/src/CMakeLists.txt (100%) rename modules/face/doc/{facerec => }/src/create_csv.py (100%) rename modules/face/doc/{facerec => }/src/crop_face.py (100%) rename modules/face/doc/{facerec => }/src/facerec_demo.cpp (100%) rename modules/face/doc/{facerec => }/src/facerec_eigenfaces.cpp (100%) rename modules/face/doc/{facerec => }/src/facerec_fisherfaces.cpp (100%) rename modules/face/doc/{facerec => }/src/facerec_lbph.cpp (100%) rename modules/face/doc/{facerec => }/src/facerec_save_load.cpp (100%) rename modules/face/doc/{facerec => }/src/facerec_video.cpp (100%) create mode 100644 modules/face/doc/tutorial.markdown rename modules/face/doc/{facerec => }/tutorial/facerec_gender_classification.rst (100%) rename modules/face/doc/{facerec => }/tutorial/facerec_save_load.rst (100%) rename modules/face/doc/{facerec => }/tutorial/facerec_video_recognition.rst (100%) create mode 100644 modules/latentsvm/doc/latentsvm.bib create mode 100644 modules/line_descriptor/doc/line_descriptor.bib create mode 100644 modules/line_descriptor/doc/tutorial.markdown create mode 100644 modules/optflow/doc/optflow.bib create mode 100644 modules/reg/doc/reg.bib create mode 100644 modules/saliency/doc/saliency.bib create mode 100644 modules/surface_matching/doc/surface_matching.bib create mode 100644 modules/tracking/doc/tracking.bib create mode 100644 modules/xfeatures2d/doc/xfeatures2d.bib create mode 100644 modules/ximgproc/doc/ximgproc.bib create mode 100644 modules/xobjdetect/doc/xobjdetect.bib rename modules/xphoto/doc/{denoising => }/denoising.rst (100%) rename modules/xphoto/doc/{inpainting => }/inpainting.rst (100%) rename modules/xphoto/doc/{colorbalance => }/whitebalance.rst (100%) create mode 100644 modules/xphoto/doc/xphoto.bib diff --git a/modules/adas/include/opencv2/adas.hpp b/modules/adas/include/opencv2/adas.hpp index da0786918..d41ca0054 100644 --- a/modules/adas/include/opencv2/adas.hpp +++ b/modules/adas/include/opencv2/adas.hpp @@ -38,3 +38,7 @@ or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. */ + +/** @defgroup adas Advanced Driver Assistance +*/ + diff --git a/modules/bgsegm/doc/bgsegm.bib b/modules/bgsegm/doc/bgsegm.bib new file mode 100644 index 000000000..e3df27da7 --- /dev/null +++ b/modules/bgsegm/doc/bgsegm.bib @@ -0,0 +1,17 @@ +@incollection{KB2001, + title={An improved adaptive background mixture model for real-time tracking with shadow detection}, + author={KaewTraKulPong, Pakorn and Bowden, Richard}, + booktitle={Video-Based Surveillance Systems}, + pages={135--144}, + year={2002}, + publisher={Springer} +} + +@inproceedings{Gold2012, + title={Visual tracking of human visitors under variable-lighting conditions for a responsive audio art installation}, + author={Godbehere, Andrew B and Matsukawa, Akihiro and Goldberg, Ken}, + booktitle={American Control Conference (ACC), 2012}, + pages={4305--4312}, + year={2012}, + organization={IEEE} +} diff --git a/modules/bgsegm/include/opencv2/bgsegm.hpp b/modules/bgsegm/include/opencv2/bgsegm.hpp index ae544f18a..ff1e15357 100644 --- a/modules/bgsegm/include/opencv2/bgsegm.hpp +++ b/modules/bgsegm/include/opencv2/bgsegm.hpp @@ -44,21 +44,21 @@ the use of this software, even if advised of the possibility of such damage. #ifdef __cplusplus +/** @defgroup bgsegm Improved Background-Foreground Segmentation Methods +*/ + namespace cv { namespace bgsegm { -/*! - Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm +//! @addtogroup bgsegm +//! @{ - The class implements the following algorithm: - "An improved adaptive background mixture model for real-time tracking with shadow detection" - P. KadewTraKuPong and R. Bowden, - Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001." - http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf +/** @brief Gaussian Mixture-based Background/Foreground Segmentation Algorithm. -*/ +The class implements the algorithm described in @cite KB2001. + */ class CV_EXPORTS_W BackgroundSubtractorMOG : public BackgroundSubtractor { public: @@ -75,54 +75,118 @@ public: CV_WRAP virtual void setNoiseSigma(double noiseSigma) = 0; }; +/** @brief Creates mixture-of-gaussian background subtractor + +@param history Length of the history. +@param nmixtures Number of Gaussian mixtures. +@param backgroundRatio Background ratio. +@param noiseSigma Noise strength (standard deviation of the brightness or each color channel). 0 +means some automatic value. + */ CV_EXPORTS_W Ptr createBackgroundSubtractorMOG(int history=200, int nmixtures=5, double backgroundRatio=0.7, double noiseSigma=0); -/** - * Background Subtractor module. Takes a series of images and returns a sequence of mask (8UC1) - * images of the same size, where 255 indicates Foreground and 0 represents Background. - * This class implements an algorithm described in "Visual Tracking of Human Visitors under - * Variable-Lighting Conditions for a Responsive Audio Art Installation," A. Godbehere, - * A. Matsukawa, K. Goldberg, American Control Conference, Montreal, June 2012. + +/** @brief Background Subtractor module based on the algorithm given in @cite Gold2012. + + Takes a series of images and returns a sequence of mask (8UC1) + images of the same size, where 255 indicates Foreground and 0 represents Background. + This class implements an algorithm described in "Visual Tracking of Human Visitors under + Variable-Lighting Conditions for a Responsive Audio Art Installation," A. Godbehere, + A. Matsukawa, K. Goldberg, American Control Conference, Montreal, June 2012. */ class CV_EXPORTS_W BackgroundSubtractorGMG : public BackgroundSubtractor { public: + /** @brief Returns total number of distinct colors to maintain in histogram. + */ CV_WRAP virtual int getMaxFeatures() const = 0; + /** @brief Sets total number of distinct colors to maintain in histogram. + */ CV_WRAP virtual void setMaxFeatures(int maxFeatures) = 0; + /** @brief Returns the learning rate of the algorithm. + + It lies between 0.0 and 1.0. It determines how quickly features are "forgotten" from + histograms. + */ CV_WRAP virtual double getDefaultLearningRate() const = 0; + /** @brief Sets the learning rate of the algorithm. + */ CV_WRAP virtual void setDefaultLearningRate(double lr) = 0; + /** @brief Returns the number of frames used to initialize background model. + */ CV_WRAP virtual int getNumFrames() const = 0; + /** @brief Sets the number of frames used to initialize background model. + */ CV_WRAP virtual void setNumFrames(int nframes) = 0; + /** @brief Returns the parameter used for quantization of color-space. + + It is the number of discrete levels in each channel to be used in histograms. + */ CV_WRAP virtual int getQuantizationLevels() const = 0; + /** @brief Sets the parameter used for quantization of color-space + */ CV_WRAP virtual void setQuantizationLevels(int nlevels) = 0; + /** @brief Returns the prior probability that each individual pixel is a background pixel. + */ CV_WRAP virtual double getBackgroundPrior() const = 0; + /** @brief Sets the prior probability that each individual pixel is a background pixel. + */ CV_WRAP virtual void setBackgroundPrior(double bgprior) = 0; + /** @brief Returns the kernel radius used for morphological operations + */ CV_WRAP virtual int getSmoothingRadius() const = 0; + /** @brief Sets the kernel radius used for morphological operations + */ CV_WRAP virtual void setSmoothingRadius(int radius) = 0; + /** @brief Returns the value of decision threshold. + + Decision value is the value above which pixel is determined to be FG. + */ CV_WRAP virtual double getDecisionThreshold() const = 0; + /** @brief Sets the value of decision threshold. + */ CV_WRAP virtual void setDecisionThreshold(double thresh) = 0; + /** @brief Returns the status of background model update + */ CV_WRAP virtual bool getUpdateBackgroundModel() const = 0; + /** @brief Sets the status of background model update + */ CV_WRAP virtual void setUpdateBackgroundModel(bool update) = 0; + /** @brief Returns the minimum value taken on by pixels in image sequence. Usually 0. + */ CV_WRAP virtual double getMinVal() const = 0; + /** @brief Sets the minimum value taken on by pixels in image sequence. + */ CV_WRAP virtual void setMinVal(double val) = 0; + /** @brief Returns the maximum value taken on by pixels in image sequence. e.g. 1.0 or 255. + */ CV_WRAP virtual double getMaxVal() const = 0; + /** @brief Sets the maximum value taken on by pixels in image sequence. + */ CV_WRAP virtual void setMaxVal(double val) = 0; }; +/** @brief Creates a GMG Background Subtractor + +@param initializationFrames number of frames used to initialize the background models. +@param decisionThreshold Threshold value, above which it is marked foreground, else background. + */ CV_EXPORTS_W Ptr createBackgroundSubtractorGMG(int initializationFrames=120, double decisionThreshold=0.8); +//! @} + } } diff --git a/modules/bioinspired/doc/bioinspired.bib b/modules/bioinspired/doc/bioinspired.bib new file mode 100644 index 000000000..4f603244c --- /dev/null +++ b/modules/bioinspired/doc/bioinspired.bib @@ -0,0 +1,47 @@ +@article{Benoit2010, + title={Using human visual system modeling for bio-inspired low level image processing}, + author={Benoit, Alexandre and Caplier, Alice and Durette, Barth{\'e}l{\'e}my and H{\'e}rault, Jeanny}, + journal={Computer vision and Image understanding}, + volume={114}, + number={7}, + pages={758--773}, + year={2010}, + publisher={Elsevier} +} + +@inproceedings{Strat2013, + title={Retina enhanced SIFT descriptors for video indexing}, + author={Strat, Sabin Tiberius and Benoit, Alexandre and Lambert, Patrick}, + booktitle={Content-Based Multimedia Indexing (CBMI), 2013 11th International Workshop on}, + pages={201--206}, + year={2013}, + organization={IEEE} +} + +@book{Herault2010, + title={Vision: Images, Signals and Neural Networks-Models of Neural Processing in Visual Perception}, + author={Jeanny, Herault}, + year={2010}, + publisher={World Scientific} +} + +@inproceedings{Chaix2007, + title={Efficient demosaicing through recursive filtering}, + author={De Lavar{\`e}ne, Brice Chaix and Alleysson, David and Durette, Barth{\'e}l{\'e}my and H{\'e}rault, Jeanny}, + booktitle={Image Processing, 2007. ICIP 2007. IEEE International Conference on}, + volume={2}, + pages={II--189}, + year={2007}, + organization={IEEE} +} + +@article{Meylan2007, + title={Model of retinal local adaptation for the tone mapping of color filter array images}, + author={Meylan, Laurence and Alleysson, David and S{\"u}sstrunk, Sabine}, + journal={JOSA A}, + volume={24}, + number={9}, + pages={2807--2816}, + year={2007}, + publisher={Optical Society of America} +} diff --git a/modules/bioinspired/doc/bioinspired.rst b/modules/bioinspired/doc/bioinspired.rst index 6bffcdcf2..0e964596b 100644 --- a/modules/bioinspired/doc/bioinspired.rst +++ b/modules/bioinspired/doc/bioinspired.rst @@ -7,4 +7,4 @@ The module provides biological visual systems models (human visual system and ot .. toctree:: :maxdepth: 2 - Human retina documentation + Human retina documentation diff --git a/modules/bioinspired/doc/retina/images/retinaInput.jpg b/modules/bioinspired/doc/images/retinaInput.jpg similarity index 100% rename from modules/bioinspired/doc/retina/images/retinaInput.jpg rename to modules/bioinspired/doc/images/retinaInput.jpg diff --git a/modules/bioinspired/doc/retina/images/retinaOutput_default.jpg b/modules/bioinspired/doc/images/retinaOutput_default.jpg similarity index 100% rename from modules/bioinspired/doc/retina/images/retinaOutput_default.jpg rename to modules/bioinspired/doc/images/retinaOutput_default.jpg diff --git a/modules/bioinspired/doc/retina/images/retinaOutput_realistic.jpg b/modules/bioinspired/doc/images/retinaOutput_realistic.jpg similarity index 100% rename from modules/bioinspired/doc/retina/images/retinaOutput_realistic.jpg rename to modules/bioinspired/doc/images/retinaOutput_realistic.jpg diff --git a/modules/bioinspired/doc/retina.markdown b/modules/bioinspired/doc/retina.markdown new file mode 100644 index 000000000..4115f88f5 --- /dev/null +++ b/modules/bioinspired/doc/retina.markdown @@ -0,0 +1,223 @@ +Retina : a Bio mimetic human retina model {#bioinspired_retina} +========================================= + +Retina +------ + +**Note** : do not forget that the retina model is included in the following namespace : +*cv::bioinspired*. + +### Introduction + +Class which provides the main controls to the Gipsa/Listic labs human retina model. This is a non +separable spatio-temporal filter modelling the two main retina information channels : + +- foveal vision for detailled color vision : the parvocellular pathway. +- peripheral vision for sensitive transient signals detection (motion and events) : the + magnocellular pathway. + +From a general point of view, this filter whitens the image spectrum and corrects luminance thanks +to local adaptation. An other important property is its hability to filter out spatio-temporal noise +while enhancing details. This model originates from Jeanny Herault work @cite Herault2010. It has been +involved in Alexandre Benoit phd and his current research @cite Benoit2010, @cite Strat2013 (he +currently maintains this module within OpenCV). It includes the work of other Jeanny's phd student +such as @cite Chaix2007 and the log polar transformations of Barthelemy Durette described in Jeanny's +book. + +**NOTES :** + +- For ease of use in computer vision applications, the two retina channels are applied + homogeneously on all the input images. This does not follow the real retina topology but this + can still be done using the log sampling capabilities proposed within the class. +- Extend the retina description and code use in the tutorial/contrib section for complementary + explanations. + +### Preliminary illustration + +As a preliminary presentation, let's start with a visual example. We propose to apply the filter on +a low quality color jpeg image with backlight problems. Here is the considered input... *"Well, my +eyes were able to see more that this strange black shadow..."* + +![a low quality color jpeg image with backlight problems.](images/retinaInput.jpg) + +Below, the retina foveal model applied on the entire image with default parameters. Here contours +are enforced, halo effects are voluntary visible with this configuration. See parameters discussion +below and increase horizontalCellsGain near 1 to remove them. + +![the retina foveal model applied on the entire image with default parameters. Here contours are enforced, luminance is corrected and halo effects are voluntary visible with this configuration, increase horizontalCellsGain near 1 to remove them.](images/retinaOutput_default.jpg) + +Below, a second retina foveal model output applied on the entire image with a parameters setup +focused on naturalness perception. *"Hey, i now recognize my cat, looking at the mountains at the +end of the day !"*. Here contours are enforced, luminance is corrected but halos are avoided with +this configuration. The backlight effect is corrected and highlight details are still preserved. +Then, even on a low quality jpeg image, if some luminance information remains, the retina is able to +reconstruct a proper visual signal. Such configuration is also usefull for High Dynamic Range +(*HDR*) images compression to 8bit images as discussed in @cite Benoit2010 and in the demonstration +codes discussed below. As shown at the end of the page, parameters change from defaults are : + +- horizontalCellsGain=0.3 +- photoreceptorsLocalAdaptationSensitivity=ganglioncellsSensitivity=0.89. + +![the retina foveal model applied on the entire image with 'naturalness' parameters. Here contours are enforced but are avoided with this configuration, horizontalCellsGain is 0.3 and photoreceptorsLocalAdaptationSensitivity=ganglioncellsSensitivity=0.89.](images/retinaOutput_realistic.jpg) + +As observed in this preliminary demo, the retina can be settled up with various parameters, by +default, as shown on the figure above, the retina strongly reduces mean luminance energy and +enforces all details of the visual scene. Luminance energy and halo effects can be modulated +(exagerated to cancelled as shown on the two examples). In order to use your own parameters, you can +use at least one time the *write(String fs)* method which will write a proper XML file with all +default parameters. Then, tweak it on your own and reload them at any time using method +*setup(String fs)*. These methods update a *Retina::RetinaParameters* member structure that is +described hereafter. XML parameters file samples are shown at the end of the page. + +Here is an overview of the abstract Retina interface, allocate one instance with the *createRetina* +functions.: + + namespace cv{namespace bioinspired{ + + class Retina : public Algorithm + { + public: + // parameters setup instance + struct RetinaParameters; // this class is detailled later + + // main method for input frame processing (all use method, can also perform High Dynamic Range tone mapping) + void run (InputArray inputImage); + + // specific method aiming at correcting luminance only (faster High Dynamic Range tone mapping) + void applyFastToneMapping(InputArray inputImage, OutputArray outputToneMappedImage) + + // output buffers retreival methods + // -> foveal color vision details channel with luminance and noise correction + void getParvo (OutputArray retinaOutput_parvo); + void getParvoRAW (OutputArray retinaOutput_parvo);// retreive original output buffers without any normalisation + const Mat getParvoRAW () const;// retreive original output buffers without any normalisation + // -> peripheral monochrome motion and events (transient information) channel + void getMagno (OutputArray retinaOutput_magno); + void getMagnoRAW (OutputArray retinaOutput_magno); // retreive original output buffers without any normalisation + const Mat getMagnoRAW () const;// retreive original output buffers without any normalisation + + // reset retina buffers... equivalent to closing your eyes for some seconds + void clearBuffers (); + + // retreive input and output buffers sizes + Size getInputSize (); + Size getOutputSize (); + + // setup methods with specific parameters specification of global xml config file loading/write + void setup (String retinaParameterFile="", const bool applyDefaultSetupOnFailure=true); + void setup (FileStorage &fs, const bool applyDefaultSetupOnFailure=true); + void setup (RetinaParameters newParameters); + struct Retina::RetinaParameters getParameters (); + const String printSetup (); + virtual void write (String fs) const; + virtual void write (FileStorage &fs) const; + void setupOPLandIPLParvoChannel (const bool colorMode=true, const bool normaliseOutput=true, const float photoreceptorsLocalAdaptationSensitivity=0.7, const float photoreceptorsTemporalConstant=0.5, const float photoreceptorsSpatialConstant=0.53, const float horizontalCellsGain=0, const float HcellsTemporalConstant=1, const float HcellsSpatialConstant=7, const float ganglionCellsSensitivity=0.7); + void setupIPLMagnoChannel (const bool normaliseOutput=true, const float parasolCells_beta=0, const float parasolCells_tau=0, const float parasolCells_k=7, const float amacrinCellsTemporalCutFrequency=1.2, const float V0CompressionParameter=0.95, const float localAdaptintegration_tau=0, const float localAdaptintegration_k=7); + void setColorSaturation (const bool saturateColors=true, const float colorSaturationValue=4.0); + void activateMovingContoursProcessing (const bool activate); + void activateContoursProcessing (const bool activate); + }; + + // Allocators + cv::Ptr createRetina (Size inputSize); + cv::Ptr createRetina (Size inputSize, const bool colorMode, RETINA_COLORSAMPLINGMETHOD colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0); + }} // cv and bioinspired namespaces end + +### Description + +Class which allows the [Gipsa](http://www.gipsa-lab.inpg.fr) (preliminary work) / +[Listic](http://www.listic.univ-savoie.fr) (code maintainer and user) labs retina model to be used. +This class allows human retina spatio-temporal image processing to be applied on still images, +images sequences and video sequences. Briefly, here are the main human retina model properties: + +- spectral whithening (mid-frequency details enhancement) +- high frequency spatio-temporal noise reduction (temporal noise and high frequency spatial noise + are minimized) +- low frequency luminance reduction (luminance range compression) : high luminance regions do not + hide details in darker regions anymore +- local logarithmic luminance compression allows details to be enhanced even in low light + conditions + +Use : this model can be used basically for spatio-temporal video effects but also in the aim of : + +- performing texture analysis with enhanced signal to noise ratio and enhanced details robust + against input images luminance ranges (check out the parvocellular retina channel output, by + using the provided **getParvo** methods) +- performing motion analysis also taking benefit of the previously cited properties (check out the + magnocellular retina channel output, by using the provided **getMagno** methods) +- general image/video sequence description using either one or both channels. An example of the + use of Retina in a Bag of Words approach is given in @cite Strat2013. + +Literature +---------- + +For more information, refer to the following papers : + +- Model description : + +[Benoit2010] Benoit A., Caplier A., Durette B., Herault, J., "Using Human Visual System Modeling For Bio-Inspired Low Level Image Processing", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773. DOI + +- Model use in a Bag of Words approach : + +[Strat2013] Strat S., Benoit A., Lambert P., "Retina enhanced SIFT descriptors for video indexing", CBMI2013, Veszprém, Hungary, 2013. + +- Please have a look at the reference work of Jeanny Herault that you can read in his book : + +[Herault2010] Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891. + +This retina filter code includes the research contributions of phd/research collegues from which +code has been redrawn by the author : + +- take a look at the *retinacolor.hpp* module to discover Brice Chaix de Lavarene phD color + mosaicing/demosaicing and his reference paper: + +[Chaix2007] B. Chaix de Lavarene, D. Alleysson, B. Durette, J. Herault (2007). "Efficient demosaicing through recursive filtering", IEEE International Conference on Image Processing ICIP 2007 + +- take a look at *imagelogpolprojection.hpp* to discover retina spatial log sampling which + originates from Barthelemy Durette phd with Jeanny Herault. A Retina / V1 cortex projection is + also proposed and originates from Jeanny's discussions. More informations in the above cited + Jeanny Heraults's book. + +- Meylan&al work on HDR tone mapping that is implemented as a specific method within the model : + +[Meylan2007] L. Meylan , D. Alleysson, S. Susstrunk, "A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images", Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816 + +Demos and experiments ! +----------------------- + +**NOTE : Complementary to the following examples, have a look at the Retina tutorial in the +tutorial/contrib section for complementary explanations.** + +Take a look at the provided C++ examples provided with OpenCV : + +- **samples/cpp/retinademo.cpp** shows how to use the retina module for details enhancement (Parvo channel output) and transient maps observation (Magno channel output). You can play with images, video sequences and webcam video. + Typical uses are (provided your OpenCV installation is situated in folder + *OpenCVReleaseFolder*) + + - image processing : **OpenCVReleaseFolder/bin/retinademo -image myPicture.jpg** + - video processing : **OpenCVReleaseFolder/bin/retinademo -video myMovie.avi** + - webcam processing: **OpenCVReleaseFolder/bin/retinademo -video** + + **Note :** This demo generates the file *RetinaDefaultParameters.xml* which contains the + default parameters of the retina. Then, rename this as *RetinaSpecificParameters.xml*, adjust + the parameters the way you want and reload the program to check the effect. + +- **samples/cpp/OpenEXRimages\_HDR\_Retina\_toneMapping.cpp** shows how to use the retina to + perform High Dynamic Range (HDR) luminance compression + + Then, take a HDR image using bracketing with your camera and generate an OpenEXR image and + then process it using the demo. + + Typical use, supposing that you have the OpenEXR image such as *memorial.exr* (present in the + samples/cpp/ folder) + +- **OpenCVReleaseFolder/bin/OpenEXRimages\_HDR\_Retina\_toneMapping memorial.exr [optional: + 'fast']** + + Note that some sliders are made available to allow you to play with luminance compression. + + If not using the 'fast' option, then, tone mapping is performed using the full retina model + @cite Benoit2010. It includes spectral whitening that allows luminance energy to be reduced. + When using the 'fast' option, then, a simpler method is used, it is an adaptation of the + algorithm presented in @cite Meylan2007. This method gives also good results and is faster to + process but it sometimes requires some more parameters adjustement. diff --git a/modules/bioinspired/doc/retina/index.rst b/modules/bioinspired/doc/retina.rst similarity index 100% rename from modules/bioinspired/doc/retina/index.rst rename to modules/bioinspired/doc/retina.rst diff --git a/modules/bioinspired/include/opencv2/bioinspired.hpp b/modules/bioinspired/include/opencv2/bioinspired.hpp index 9bbdca726..9c7e23bd2 100644 --- a/modules/bioinspired/include/opencv2/bioinspired.hpp +++ b/modules/bioinspired/include/opencv2/bioinspired.hpp @@ -47,4 +47,14 @@ #include "opencv2/bioinspired/retina.hpp" #include "opencv2/bioinspired/retinafasttonemapping.hpp" #include "opencv2/bioinspired/transientareassegmentationmodule.hpp" + +/** @defgroup bioinspired Biologically inspired vision models and derivated tools + +The module provides biological visual systems models (human visual system and others). It also +provides derivated objects that take advantage of those bio-inspired models. + +@ref bioinspired_retina + +*/ + #endif diff --git a/modules/bioinspired/include/opencv2/bioinspired/retina.hpp b/modules/bioinspired/include/opencv2/bioinspired/retina.hpp index 783047c7f..0274837ba 100644 --- a/modules/bioinspired/include/opencv2/bioinspired/retina.hpp +++ b/modules/bioinspired/include/opencv2/bioinspired/retina.hpp @@ -65,12 +65,11 @@ #ifndef __OPENCV_BIOINSPIRED_RETINA_HPP__ #define __OPENCV_BIOINSPIRED_RETINA_HPP__ -/* - * Retina.hpp - * - * Created on: Jul 19, 2011 - * Author: Alexandre Benoit - */ +/** +@file +@date Jul 19, 2011 +@author Alexandre Benoit +*/ #include "opencv2/core.hpp" // for all OpenCV core functionalities access, including cv::Exception support @@ -78,42 +77,104 @@ namespace cv{ namespace bioinspired{ +//! @addtogroup bioinspired +//! @{ + enum { RETINA_COLOR_RANDOM, //!< each pixel position is either R, G or B in a random choice RETINA_COLOR_DIAGONAL,//!< color sampling is RGBRGBRGB..., line 2 BRGBRGBRG..., line 3, GBRGBRGBR... RETINA_COLOR_BAYER//!< standard bayer sampling }; -/** - * a wrapper class which allows the Gipsa/Listic Labs model to be used with OpenCV. - * This retina model allows spatio-temporal image processing (applied on still images, video sequences). - * As a summary, these are the retina model properties: - * => It applies a spectral whithening (mid-frequency details enhancement) - * => high frequency spatio-temporal noise reduction - * => low frequency luminance to be reduced (luminance range compression) - * => local logarithmic luminance compression allows details to be enhanced in low light conditions - * - * USE : this model can be used basically for spatio-temporal video effects but also for : - * _using the getParvo method output matrix : texture analysiswith enhanced signal to noise ratio and enhanced details robust against input images luminance ranges - * _using the getMagno method output matrix : motion analysis also with the previously cited properties - * - * for more information, reer to the following papers : - * Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011 - * Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891. - * - * The retina filter includes the research contributions of phd/research collegues from which code has been redrawn by the author : - * _take a look at the retinacolor.hpp module to discover Brice Chaix de Lavarene color mosaicing/demosaicing and the reference paper: - * ====> B. Chaix de Lavarene, D. Alleysson, B. Durette, J. Herault (2007). "Efficient demosaicing through recursive filtering", IEEE International Conference on Image Processing ICIP 2007 - * _take a look at imagelogpolprojection.hpp to discover retina spatial log sampling which originates from Barthelemy Durette phd with Jeanny Herault. A Retina / V1 cortex projection is also proposed and originates from Jeanny's discussions. - * ====> more informations in the above cited Jeanny Heraults's book. +/** @brief class which allows the Gipsa/Listic Labs model to be used with OpenCV. + +This retina model allows spatio-temporal image processing (applied on still images, video sequences). +As a summary, these are the retina model properties: +- It applies a spectral whithening (mid-frequency details enhancement) +- high frequency spatio-temporal noise reduction +- low frequency luminance to be reduced (luminance range compression) +- local logarithmic luminance compression allows details to be enhanced in low light conditions + +USE : this model can be used basically for spatio-temporal video effects but also for : + _using the getParvo method output matrix : texture analysiswith enhanced signal to noise ratio and enhanced details robust against input images luminance ranges + _using the getMagno method output matrix : motion analysis also with the previously cited properties + +for more information, reer to the following papers : +Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011 +Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891. + +The retina filter includes the research contributions of phd/research collegues from which code has been redrawn by the author : +take a look at the retinacolor.hpp module to discover Brice Chaix de Lavarene color mosaicing/demosaicing and the reference paper: +B. Chaix de Lavarene, D. Alleysson, B. Durette, J. Herault (2007). "Efficient demosaicing through recursive filtering", IEEE International Conference on Image Processing ICIP 2007 +take a look at imagelogpolprojection.hpp to discover retina spatial log sampling which originates from Barthelemy Durette phd with Jeanny Herault. A Retina / V1 cortex projection is also proposed and originates from Jeanny's discussions. +more informations in the above cited Jeanny Heraults's book. */ class CV_EXPORTS_W Retina : public Algorithm { public: - // parameters structure for better clarity, check explenations on the comments of methods : setupOPLandIPLParvoChannel and setupIPLMagnoChannel + /** @brief parameters structure + + for better clarity, check explenations on the comments of methods : setupOPLandIPLParvoChannel and setupIPLMagnoChannel + + Here is the default configuration file of the retina module. It gives results such as the first + retina output shown on the top of this page. + + @code{xml} + + + + 1 + 1 + 7.5e-01 + 9.0e-01 + 5.3e-01 + 0.01 + 0.5 + 7. + 7.5e-01 + + 1 + 0. + 0. + 7. + 2.0e+00 + 9.5e-01 + 0. + 7. + + @endcode + + Here is the 'realistic" setup used to obtain the second retina output shown on the top of this page. + + @code{xml} + + + + 1 + 1 + 8.9e-01 + 9.0e-01 + 5.3e-01 + 0.3 + 0.5 + 7. + 8.9e-01 + + 1 + 0. + 0. + 7. + 2.0e+00 + 9.5e-01 + 0. + 7. + + @endcode + */ struct CV_EXPORTS_W RetinaParameters{ - struct CV_EXPORTS_W OPLandIplParvoParameters{ // Outer Plexiform Layer (OPL) and Inner Plexiform Layer Parvocellular (IplParvo) parameters + //! Outer Plexiform Layer (OPL) and Inner Plexiform Layer Parvocellular (IplParvo) parameters + struct CV_EXPORTS_W OPLandIplParvoParameters{ OPLandIplParvoParameters():colorMode(true), normaliseOutput(true), photoreceptorsLocalAdaptationSensitivity(0.75f), @@ -126,7 +187,8 @@ public: CV_PROP_RW bool colorMode, normaliseOutput; CV_PROP_RW float photoreceptorsLocalAdaptationSensitivity, photoreceptorsTemporalConstant, photoreceptorsSpatialConstant, horizontalCellsGain, hcellsTemporalConstant, hcellsSpatialConstant, ganglionCellsSensitivity; }; - struct CV_EXPORTS_W IplMagnoParameters{ // Inner Plexiform Layer Magnocellular channel (IplMagno) + //! Inner Plexiform Layer Magnocellular channel (IplMagno) + struct CV_EXPORTS_W IplMagnoParameters{ IplMagnoParameters(): normaliseOutput(true), parasolCells_beta(0.f), @@ -143,170 +205,267 @@ public: CV_PROP_RW IplMagnoParameters IplMagno; }; - /** - * retreive retina input buffer size - */ + /** @brief Retreive retina input buffer size + @return the retina input buffer size + */ CV_WRAP virtual Size getInputSize()=0; - /** - * retreive retina output buffer size - */ + /** @brief Retreive retina output buffer size that can be different from the input if a spatial log + transformation is applied + @return the retina output buffer size + */ CV_WRAP virtual Size getOutputSize()=0; - /** - * try to open an XML retina parameters file to adjust current retina instance setup - * => if the xml file does not exist, then default setup is applied - * => warning, Exceptions are thrown if read XML file is not valid - * @param retinaParameterFile : the parameters filename - * @param applyDefaultSetupOnFailure : set to true if an error must be thrown on error + /** @brief Try to open an XML retina parameters file to adjust current retina instance setup + + - if the xml file does not exist, then default setup is applied + - warning, Exceptions are thrown if read XML file is not valid + @param retinaParameterFile the parameters filename + @param applyDefaultSetupOnFailure set to true if an error must be thrown on error + You can retreive the current parameers structure using method Retina::getParameters and update + it before running method Retina::setup */ CV_WRAP virtual void setup(String retinaParameterFile="", const bool applyDefaultSetupOnFailure=true)=0; - /** - * try to open an XML retina parameters file to adjust current retina instance setup - * => if the xml file does not exist, then default setup is applied - * => warning, Exceptions are thrown if read XML file is not valid - * @param fs : the open Filestorage which contains retina parameters - * @param applyDefaultSetupOnFailure : set to true if an error must be thrown on error - */ + /** @overload + @param fs the open Filestorage which contains retina parameters + @param applyDefaultSetupOnFailure set to true if an error must be thrown on error + */ CV_WRAP virtual void setup(cv::FileStorage &fs, const bool applyDefaultSetupOnFailure=true)=0; - /** - * try to open an XML retina parameters file to adjust current retina instance setup - * => if the xml file does not exist, then default setup is applied - * => warning, Exceptions are thrown if read XML file is not valid - * @param newParameters : a parameters structures updated with the new target configuration - */ + /** @overload + @param newParameters a parameters structures updated with the new target configuration. + */ CV_WRAP virtual void setup(RetinaParameters newParameters)=0; /** - * @return the current parameters setup + @return the current parameters setup */ CV_WRAP virtual RetinaParameters getParameters()=0; - /** - * parameters setup display method - * @return a string which contains formatted parameters information + /** @brief Outputs a string showing the used parameters setup + @return a string which contains formated parameters information */ CV_WRAP virtual const String printSetup()=0; - /** - * write xml/yml formated parameters information - * @param fs : the filename of the xml file that will be open and writen with formatted parameters information + /** @brief Write xml/yml formated parameters information + @param fs the filename of the xml file that will be open and writen with formatted parameters + information */ CV_WRAP virtual void write( String fs ) const=0; - /** - * write xml/yml formated parameters information - * @param fs : a cv::Filestorage object ready to be filled - */ + /** @overload */ CV_WRAP virtual void write( FileStorage& fs ) const=0; /** - * setup the OPL and IPL parvo channels (see biologocal model) - * OPL is referred as Outer Plexiform Layer of the retina, it allows the spatio-temporal filtering which withens the spectrum and reduces spatio-temporal noise while attenuating global luminance (low frequency energy) - * IPL parvo is the OPL next processing stage, it refers to Inner Plexiform layer of the retina, it allows high contours sensitivity in foveal vision. - * for more informations, please have a look at the paper Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011 - * @param colorMode : specifies if (true) color is processed of not (false) to then processing gray level image - * @param normaliseOutput : specifies if (true) output is rescaled between 0 and 255 of not (false) - * @param photoreceptorsLocalAdaptationSensitivity: the photoreceptors sensitivity renage is 0-1 (more log compression effect when value increases) - * @param photoreceptorsTemporalConstant: the time constant of the first order low pass filter of the photoreceptors, use it to cut high temporal frequencies (noise or fast motion), unit is frames, typical value is 1 frame - * @param photoreceptorsSpatialConstant: the spatial constant of the first order low pass filter of the photoreceptors, use it to cut high spatial frequencies (noise or thick contours), unit is pixels, typical value is 1 pixel - * @param horizontalCellsGain: gain of the horizontal cells network, if 0, then the mean value of the output is zero, if the parameter is near 1, then, the luminance is not filtered and is still reachable at the output, typicall value is 0 - * @param HcellsTemporalConstant: the time constant of the first order low pass filter of the horizontal cells, use it to cut low temporal frequencies (local luminance variations), unit is frames, typical value is 1 frame, as the photoreceptors - * @param HcellsSpatialConstant: the spatial constant of the first order low pass filter of the horizontal cells, use it to cut low spatial frequencies (local luminance), unit is pixels, typical value is 5 pixel, this value is also used for local contrast computing when computing the local contrast adaptation at the ganglion cells level (Inner Plexiform Layer parvocellular channel model) - * @param ganglionCellsSensitivity: the compression strengh of the ganglion cells local adaptation output, set a value between 160 and 250 for best results, a high value increases more the low value sensitivity... and the output saturates faster, recommended value: 230 + setup the OPL and IPL parvo channels (see biologocal model) + OPL is referred as Outer Plexiform Layer of the retina, it allows the spatio-temporal filtering which withens the spectrum and reduces spatio-temporal noise while attenuating global luminance (low frequency energy) + IPL parvo is the OPL next processing stage, it refers to Inner Plexiform layer of the retina, it allows high contours sensitivity in foveal vision. + for more informations, please have a look at the paper Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011 + @param colorMode : specifies if (true) color is processed of not (false) to then processing gray level image + @param normaliseOutput : specifies if (true) output is rescaled between 0 and 255 of not (false) + @param photoreceptorsLocalAdaptationSensitivity: the photoreceptors sensitivity renage is 0-1 (more log compression effect when value increases) + @param photoreceptorsTemporalConstant: the time constant of the first order low pass filter of the photoreceptors, use it to cut high temporal frequencies (noise or fast motion), unit is frames, typical value is 1 frame + @param photoreceptorsSpatialConstant: the spatial constant of the first order low pass filter of the photoreceptors, use it to cut high spatial frequencies (noise or thick contours), unit is pixels, typical value is 1 pixel + @param horizontalCellsGain: gain of the horizontal cells network, if 0, then the mean value of the output is zero, if the parameter is near 1, then, the luminance is not filtered and is still reachable at the output, typicall value is 0 + @param HcellsTemporalConstant: the time constant of the first order low pass filter of the horizontal cells, use it to cut low temporal frequencies (local luminance variations), unit is frames, typical value is 1 frame, as the photoreceptors + @param HcellsSpatialConstant: the spatial constant of the first order low pass filter of the horizontal cells, use it to cut low spatial frequencies (local luminance), unit is pixels, typical value is 5 pixel, this value is also used for local contrast computing when computing the local contrast adaptation at the ganglion cells level (Inner Plexiform Layer parvocellular channel model) + @param ganglionCellsSensitivity: the compression strengh of the ganglion cells local adaptation output, set a value between 160 and 250 for best results, a high value increases more the low value sensitivity... and the output saturates faster, recommended value: 230 + */ + + /** @brief Setup the OPL and IPL parvo channels (see biologocal model) + + OPL is referred as Outer Plexiform Layer of the retina, it allows the spatio-temporal filtering + which withens the spectrum and reduces spatio-temporal noise while attenuating global luminance + (low frequency energy) IPL parvo is the OPL next processing stage, it refers to a part of the + Inner Plexiform layer of the retina, it allows high contours sensitivity in foveal vision. See + reference papers for more informations. + @param colorMode specifies if (true) color is processed of not (false) to then processing gray + level image + @param normaliseOutput specifies if (true) output is rescaled between 0 and 255 of not (false) + @param photoreceptorsLocalAdaptationSensitivity the photoreceptors sensitivity renage is 0-1 + (more log compression effect when value increases) + @param photoreceptorsTemporalConstant the time constant of the first order low pass filter of + the photoreceptors, use it to cut high temporal frequencies (noise or fast motion), unit is + frames, typical value is 1 frame + @param photoreceptorsSpatialConstant the spatial constant of the first order low pass filter of + the photoreceptors, use it to cut high spatial frequencies (noise or thick contours), unit is + pixels, typical value is 1 pixel + @param horizontalCellsGain gain of the horizontal cells network, if 0, then the mean value of + the output is zero, if the parameter is near 1, then, the luminance is not filtered and is + still reachable at the output, typicall value is 0 + @param HcellsTemporalConstant the time constant of the first order low pass filter of the + horizontal cells, use it to cut low temporal frequencies (local luminance variations), unit is + frames, typical value is 1 frame, as the photoreceptors + @param HcellsSpatialConstant the spatial constant of the first order low pass filter of the + horizontal cells, use it to cut low spatial frequencies (local luminance), unit is pixels, + typical value is 5 pixel, this value is also used for local contrast computing when computing + the local contrast adaptation at the ganglion cells level (Inner Plexiform Layer parvocellular + channel model) + @param ganglionCellsSensitivity the compression strengh of the ganglion cells local adaptation + output, set a value between 0.6 and 1 for best results, a high value increases more the low + value sensitivity... and the output saturates faster, recommended value: 0.7 */ CV_WRAP virtual void setupOPLandIPLParvoChannel(const bool colorMode=true, const bool normaliseOutput = true, const float photoreceptorsLocalAdaptationSensitivity=0.7, const float photoreceptorsTemporalConstant=0.5, const float photoreceptorsSpatialConstant=0.53, const float horizontalCellsGain=0, const float HcellsTemporalConstant=1, const float HcellsSpatialConstant=7, const float ganglionCellsSensitivity=0.7)=0; - /** - * set parameters values for the Inner Plexiform Layer (IPL) magnocellular channel - * this channel processes signals outpint from OPL processing stage in peripheral vision, it allows motion information enhancement. It is decorrelated from the details channel. See reference paper for more details. - * @param normaliseOutput : specifies if (true) output is rescaled between 0 and 255 of not (false) - * @param parasolCells_beta: the low pass filter gain used for local contrast adaptation at the IPL level of the retina (for ganglion cells local adaptation), typical value is 0 - * @param parasolCells_tau: the low pass filter time constant used for local contrast adaptation at the IPL level of the retina (for ganglion cells local adaptation), unit is frame, typical value is 0 (immediate response) - * @param parasolCells_k: the low pass filter spatial constant used for local contrast adaptation at the IPL level of the retina (for ganglion cells local adaptation), unit is pixels, typical value is 5 - * @param amacrinCellsTemporalCutFrequency: the time constant of the first order high pass fiter of the magnocellular way (motion information channel), unit is frames, tipicall value is 5 - * @param V0CompressionParameter: the compression strengh of the ganglion cells local adaptation output, set a value between 160 and 250 for best results, a high value increases more the low value sensitivity... and the output saturates faster, recommended value: 200 - * @param localAdaptintegration_tau: specifies the temporal constant of the low pas filter involved in the computation of the local "motion mean" for the local adaptation computation - * @param localAdaptintegration_k: specifies the spatial constant of the low pas filter involved in the computation of the local "motion mean" for the local adaptation computation + /** @brief Set parameters values for the Inner Plexiform Layer (IPL) magnocellular channel + + this channel processes signals output from OPL processing stage in peripheral vision, it allows + motion information enhancement. It is decorrelated from the details channel. See reference + papers for more details. + + @param normaliseOutput specifies if (true) output is rescaled between 0 and 255 of not (false) + @param parasolCells_beta the low pass filter gain used for local contrast adaptation at the + IPL level of the retina (for ganglion cells local adaptation), typical value is 0 + @param parasolCells_tau the low pass filter time constant used for local contrast adaptation + at the IPL level of the retina (for ganglion cells local adaptation), unit is frame, typical + value is 0 (immediate response) + @param parasolCells_k the low pass filter spatial constant used for local contrast adaptation + at the IPL level of the retina (for ganglion cells local adaptation), unit is pixels, typical + value is 5 + @param amacrinCellsTemporalCutFrequency the time constant of the first order high pass fiter of + the magnocellular way (motion information channel), unit is frames, typical value is 1.2 + @param V0CompressionParameter the compression strengh of the ganglion cells local adaptation + output, set a value between 0.6 and 1 for best results, a high value increases more the low + value sensitivity... and the output saturates faster, recommended value: 0.95 + @param localAdaptintegration_tau specifies the temporal constant of the low pas filter + involved in the computation of the local "motion mean" for the local adaptation computation + @param localAdaptintegration_k specifies the spatial constant of the low pas filter involved + in the computation of the local "motion mean" for the local adaptation computation */ CV_WRAP virtual void setupIPLMagnoChannel(const bool normaliseOutput = true, const float parasolCells_beta=0, const float parasolCells_tau=0, const float parasolCells_k=7, const float amacrinCellsTemporalCutFrequency=1.2, const float V0CompressionParameter=0.95, const float localAdaptintegration_tau=0, const float localAdaptintegration_k=7)=0; - /** - * method which allows retina to be applied on an input image, after run, encapsulated retina module is ready to deliver its outputs using dedicated acccessors, see getParvo and getMagno methods - * @param inputImage : the input cv::Mat image to be processed, can be gray level or BGR coded in any format (from 8bit to 16bits) + /** @brief Method which allows retina to be applied on an input image, + + after run, encapsulated retina module is ready to deliver its outputs using dedicated + acccessors, see getParvo and getMagno methods + @param inputImage the input Mat image to be processed, can be gray level or BGR coded in any + format (from 8bit to 16bits) */ CV_WRAP virtual void run(InputArray inputImage)=0; - /** - * method that applies a luminance correction (initially High Dynamic Range (HDR) tone mapping) using only the 2 local adaptation stages of the retina parvo channel : photoreceptors level and ganlion cells level. Spatio temporal filtering is applied but limited to temporal smoothing and eventually high frequencies attenuation. This is a lighter method than the one available using the regular run method. It is then faster but it does not include complete temporal filtering nor retina spectral whitening. Then, it can have a more limited effect on images with a very high dynamic range. This is an adptation of the original still image HDR tone mapping algorithm of David Alleyson, Sabine Susstruck and Laurence Meylan's work, please cite: - * -> Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816 - @param inputImage the input image to process RGB or gray levels - @param outputToneMappedImage the output tone mapped image + /** @brief Method which processes an image in the aim to correct its luminance correct + backlight problems, enhance details in shadows. + + This method is designed to perform High Dynamic Range image tone mapping (compress \>8bit/pixel + images to 8bit/pixel). This is a simplified version of the Retina Parvocellular model + (simplified version of the run/getParvo methods call) since it does not include the + spatio-temporal filter modelling the Outer Plexiform Layer of the retina that performs spectral + whitening and many other stuff. However, it works great for tone mapping and in a faster way. + + Check the demos and experiments section to see examples and the way to perform tone mapping + using the original retina model and the method. + + @param inputImage the input image to process (should be coded in float format : CV_32F, + CV_32FC1, CV_32F_C3, CV_32F_C4, the 4th channel won't be considered). + @param outputToneMappedImage the output 8bit/channel tone mapped image (CV_8U or CV_8UC3 format). */ CV_WRAP virtual void applyFastToneMapping(InputArray inputImage, OutputArray outputToneMappedImage)=0; - /** - * accessor of the details channel of the retina (models foveal vision) - * @param retinaOutput_parvo : the output buffer (reallocated if necessary), this output is rescaled for standard 8bits image processing use in OpenCV + /** @brief Accessor of the details channel of the retina (models foveal vision). + + Warning, getParvoRAW methods return buffers that are not rescaled within range [0;255] while + the non RAW method allows a normalized matrix to be retrieved. + + @param retinaOutput_parvo the output buffer (reallocated if necessary), format can be : + - a Mat, this output is rescaled for standard 8bits image processing use in OpenCV + - RAW methods actually return a 1D matrix (encoding is R1, R2, ... Rn, G1, G2, ..., Gn, B1, + B2, ...Bn), this output is the original retina filter model output, without any + quantification or rescaling. + @see getParvoRAW */ CV_WRAP virtual void getParvo(OutputArray retinaOutput_parvo)=0; - /** - * accessor of the details channel of the retina (models foveal vision) - * @param retinaOutput_parvo : a cv::Mat header filled with the internal parvo buffer of the retina module. This output is the original retina filter model output, without any quantification or rescaling + /** @brief Accessor of the details channel of the retina (models foveal vision). + @see getParvo */ CV_WRAP virtual void getParvoRAW(OutputArray retinaOutput_parvo)=0; - /** - * accessor of the motion channel of the retina (models peripheral vision) - * @param retinaOutput_magno : the output buffer (reallocated if necessary), this output is rescaled for standard 8bits image processing use in OpenCV + /** @brief Accessor of the motion channel of the retina (models peripheral vision). + + Warning, getMagnoRAW methods return buffers that are not rescaled within range [0;255] while + the non RAW method allows a normalized matrix to be retrieved. + @param retinaOutput_magno the output buffer (reallocated if necessary), format can be : + - a Mat, this output is rescaled for standard 8bits image processing use in OpenCV + - RAW methods actually return a 1D matrix (encoding is M1, M2,... Mn), this output is the + original retina filter model output, without any quantification or rescaling. + @see getMagnoRAW */ CV_WRAP virtual void getMagno(OutputArray retinaOutput_magno)=0; - /** - * accessor of the motion channel of the retina (models peripheral vision) - * @param retinaOutput_magno : a cv::Mat header filled with the internal retina magno buffer of the retina module. This output is the original retina filter model output, without any quantification or rescaling + /** @brief Accessor of the motion channel of the retina (models peripheral vision). + @see getMagno */ CV_WRAP virtual void getMagnoRAW(OutputArray retinaOutput_magno)=0; - // original API level data accessors : get buffers addresses from a Mat header, similar to getParvoRAW and getMagnoRAW... + /** @overload */ CV_WRAP virtual const Mat getMagnoRAW() const=0; + /** @overload */ CV_WRAP virtual const Mat getParvoRAW() const=0; - /** - * activate color saturation as the final step of the color demultiplexing process - * -> this saturation is a sigmoide function applied to each channel of the demultiplexed image. - * @param saturateColors: boolean that activates color saturation (if true) or desactivate (if false) - * @param colorSaturationValue: the saturation factor + /** @brief Activate color saturation as the final step of the color demultiplexing process -\> this + saturation is a sigmoide function applied to each channel of the demultiplexed image. + @param saturateColors boolean that activates color saturation (if true) or desactivate (if false) + @param colorSaturationValue the saturation factor : a simple factor applied on the chrominance + buffers */ CV_WRAP virtual void setColorSaturation(const bool saturateColors=true, const float colorSaturationValue=4.0)=0; - /** - * clear all retina buffers (equivalent to opening the eyes after a long period of eye close ;o) + /** @brief Clears all retina buffers + + (equivalent to opening the eyes after a long period of eye close ;o) whatchout the temporal + transition occuring just after this method call. */ CV_WRAP virtual void clearBuffers()=0; - /** - * Activate/desactivate the Magnocellular pathway processing (motion information extraction), by default, it is activated - * @param activate: true if Magnocellular output should be activated, false if not - */ + /** @brief Activate/desactivate the Magnocellular pathway processing (motion information extraction), by + default, it is activated + @param activate true if Magnocellular output should be activated, false if not... if activated, + the Magnocellular output can be retrieved using the **getMagno** methods + */ CV_WRAP virtual void activateMovingContoursProcessing(const bool activate)=0; - /** - * Activate/desactivate the Parvocellular pathway processing (contours information extraction), by default, it is activated - * @param activate: true if Parvocellular (contours information extraction) output should be activated, false if not - */ + /** @brief Activate/desactivate the Parvocellular pathway processing (contours information extraction), by + default, it is activated + @param activate true if Parvocellular (contours information extraction) output should be + activated, false if not... if activated, the Parvocellular output can be retrieved using the + Retina::getParvo methods + */ CV_WRAP virtual void activateContoursProcessing(const bool activate)=0; }; + +//! @relates bioinspired::Retina +//! @{ + +/** @overload */ CV_EXPORTS_W Ptr createRetina(Size inputSize); +/** @brief Constructors from standardized interfaces : retreive a smart pointer to a Retina instance + +@param inputSize the input frame size +@param colorMode the chosen processing mode : with or without color processing +@param colorSamplingMethod specifies which kind of color sampling will be used : +- cv::bioinspired::RETINA_COLOR_RANDOM: each pixel position is either R, G or B in a random choice +- cv::bioinspired::RETINA_COLOR_DIAGONAL: color sampling is RGBRGBRGB..., line 2 BRGBRGBRG..., line 3, GBRGBRGBR... +- cv::bioinspired::RETINA_COLOR_BAYER: standard bayer sampling +@param useRetinaLogSampling activate retina log sampling, if true, the 2 following parameters can +be used +@param reductionFactor only usefull if param useRetinaLogSampling=true, specifies the reduction +factor of the output frame (as the center (fovea) is high resolution and corners can be +underscaled, then a reduction of the output is allowed without precision leak +@param samplingStrenght only usefull if param useRetinaLogSampling=true, specifies the strenght of +the log scale that is applied + */ CV_EXPORTS_W Ptr createRetina(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0); #ifdef HAVE_OPENCV_OCL Ptr createRetina_OCL(Size inputSize); Ptr createRetina_OCL(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const double reductionFactor=1.0, const double samplingStrenght=10.0); #endif + +//! @} + +//! @} + } } #endif /* __OPENCV_BIOINSPIRED_RETINA_HPP__ */ diff --git a/modules/bioinspired/include/opencv2/bioinspired/retinafasttonemapping.hpp b/modules/bioinspired/include/opencv2/bioinspired/retinafasttonemapping.hpp index 8ee19d833..c65709d0e 100644 --- a/modules/bioinspired/include/opencv2/bioinspired/retinafasttonemapping.hpp +++ b/modules/bioinspired/include/opencv2/bioinspired/retinafasttonemapping.hpp @@ -67,11 +67,10 @@ #ifndef __OPENCV_BIOINSPIRED_RETINAFASTTONEMAPPING_HPP__ #define __OPENCV_BIOINSPIRED_RETINAFASTTONEMAPPING_HPP__ -/* - * retinafasttonemapping.hpp - * - * Created on: May 26, 2013 - * Author: Alexandre Benoit +/** +@file +@date May 26, 2013 +@author Alexandre Benoit */ #include "opencv2/core.hpp" // for all OpenCV core functionalities access, including cv::Exception support @@ -79,43 +78,61 @@ namespace cv{ namespace bioinspired{ -/** - * a wrapper class which allows the tone mapping algorithm of Meylan&al(2007) to be used with OpenCV. - * This algorithm is already implemented in thre Retina class (retina::applyFastToneMapping) but used it does not require all the retina model to be allocated. This allows a light memory use for low memory devices (smartphones, etc. - * As a summary, these are the model properties: - * => 2 stages of local luminance adaptation with a different local neighborhood for each. - * => first stage models the retina photorecetors local luminance adaptation - * => second stage models th ganglion cells local information adaptation - * => compared to the initial publication, this class uses spatio-temporal low pass filters instead of spatial only filters. - * ====> this can help noise robustness and temporal stability for video sequence use cases. - * for more information, read to the following papers : - * Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011 - * regarding spatio-temporal filter and the bigger retina model : - * Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891. - */ +//! @addtogroup bioinspired +//! @{ + +/** @brief a wrapper class which allows the tone mapping algorithm of Meylan&al(2007) to be used with OpenCV. + +This algorithm is already implemented in thre Retina class (retina::applyFastToneMapping) but used it does not require all the retina model to be allocated. This allows a light memory use for low memory devices (smartphones, etc. +As a summary, these are the model properties: +- 2 stages of local luminance adaptation with a different local neighborhood for each. +- first stage models the retina photorecetors local luminance adaptation +- second stage models th ganglion cells local information adaptation +- compared to the initial publication, this class uses spatio-temporal low pass filters instead of spatial only filters. + this can help noise robustness and temporal stability for video sequence use cases. + +for more information, read to the following papers : +Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011 +regarding spatio-temporal filter and the bigger retina model : +Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891. +*/ class CV_EXPORTS_W RetinaFastToneMapping : public Algorithm { public: - /** - * method that applies a luminance correction (initially High Dynamic Range (HDR) tone mapping) using only the 2 local adaptation stages of the retina parvocellular channel : photoreceptors level and ganlion cells level. Spatio temporal filtering is applied but limited to temporal smoothing and eventually high frequencies attenuation. This is a lighter method than the one available using the regular retina::run method. It is then faster but it does not include complete temporal filtering nor retina spectral whitening. Then, it can have a more limited effect on images with a very high dynamic range. This is an adptation of the original still image HDR tone mapping algorithm of David Alleyson, Sabine Susstruck and Laurence Meylan's work, please cite: - * -> Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816 - @param inputImage the input image to process RGB or gray levels - @param outputToneMappedImage the output tone mapped image - */ + /** @brief applies a luminance correction (initially High Dynamic Range (HDR) tone mapping) + + using only the 2 local adaptation stages of the retina parvocellular channel : photoreceptors + level and ganlion cells level. Spatio temporal filtering is applied but limited to temporal + smoothing and eventually high frequencies attenuation. This is a lighter method than the one + available using the regular retina::run method. It is then faster but it does not include + complete temporal filtering nor retina spectral whitening. Then, it can have a more limited + effect on images with a very high dynamic range. This is an adptation of the original still + image HDR tone mapping algorithm of David Alleyson, Sabine Susstruck and Laurence Meylan's + work, please cite: -> Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local + Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of + America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816 + + @param inputImage the input image to process RGB or gray levels + @param outputToneMappedImage the output tone mapped image + */ CV_WRAP virtual void applyFastToneMapping(InputArray inputImage, OutputArray outputToneMappedImage)=0; - /** - * setup method that updates tone mapping behaviors by adjusing the local luminance computation area - * @param photoreceptorsNeighborhoodRadius the first stage local adaptation area - * @param ganglioncellsNeighborhoodRadius the second stage local adaptation area - * @param meanLuminanceModulatorK the factor applied to modulate the meanLuminance information (default is 1, see reference paper) + /** @brief updates tone mapping behaviors by adjusing the local luminance computation area + + @param photoreceptorsNeighborhoodRadius the first stage local adaptation area + @param ganglioncellsNeighborhoodRadius the second stage local adaptation area + @param meanLuminanceModulatorK the factor applied to modulate the meanLuminance information + (default is 1, see reference paper) */ CV_WRAP virtual void setup(const float photoreceptorsNeighborhoodRadius=3.f, const float ganglioncellsNeighborhoodRadius=1.f, const float meanLuminanceModulatorK=1.f)=0; }; +//! @relates bioinspired::RetinaFastToneMapping CV_EXPORTS_W Ptr createRetinaFastToneMapping(Size inputSize); +//! @} + } } #endif /* __OPENCV_BIOINSPIRED_RETINAFASTTONEMAPPING_HPP__ */ diff --git a/modules/bioinspired/include/opencv2/bioinspired/transientareassegmentationmodule.hpp b/modules/bioinspired/include/opencv2/bioinspired/transientareassegmentationmodule.hpp index 4dcb7d280..829ba8bd0 100755 --- a/modules/bioinspired/include/opencv2/bioinspired/transientareassegmentationmodule.hpp +++ b/modules/bioinspired/include/opencv2/bioinspired/transientareassegmentationmodule.hpp @@ -63,18 +63,10 @@ #define SEGMENTATIONMODULE_HPP_ /** - * @class TransientAreasSegmentationModule - * @brief class which provides a transient/moving areas segmentation module - * -> perform a locally adapted segmentation by using the retina magno input data - * @author Alexandre BENOIT, benoit.alexandre.vision@gmail.com - * Release date 2007-2013 - * Based on Alexandre BENOIT thesis: "Le système visuel humain au secours de la vision par ordinateur" - * -> 3 spatio temporal filters are used: - * a first one which filters the noise and local variations of the input motion energy - * a second (more powerfull low pass spatial filter) which gives the neighborhood motion energy - * -> the segmentation consists in the comparison of these both outputs, if the local motion energy is higher to the neighborhood otion energy, then the area is considered as moving and is segmented - * => a stronger third low pass filter helps decision by providing a smooth information about the "motion context" in a wider area - */ +@file +@date 2007-2013 +@author Alexandre BENOIT, benoit.alexandre.vision@gmail.com +*/ #include "opencv2/core.hpp" // for all OpenCV core functionalities access, including cv::Exception support @@ -83,108 +75,125 @@ namespace cv namespace bioinspired { +//! @addtogroup bioinspired +//! @{ + +/** @brief class which provides a transient/moving areas segmentation module + +perform a locally adapted segmentation by using the retina magno input data Based on Alexandre +BENOIT thesis: "Le système visuel humain au secours de la vision par ordinateur" + +3 spatio temporal filters are used: +- a first one which filters the noise and local variations of the input motion energy +- a second (more powerfull low pass spatial filter) which gives the neighborhood motion energy the +segmentation consists in the comparison of these both outputs, if the local motion energy is higher +to the neighborhood otion energy, then the area is considered as moving and is segmented +- a stronger third low pass filter helps decision by providing a smooth information about the +"motion context" in a wider area + */ + class CV_EXPORTS_W TransientAreasSegmentationModule: public Algorithm { public: - // parameters structure + //! parameters structure struct CV_EXPORTS_W SegmentationParameters{ SegmentationParameters(): thresholdON(100), thresholdOFF(100), - // local energy filtering parameters : the aim is to smooth local textures and residual noise - localEnergy_temporalConstant(0.5), // the time constant of the first order low pass filter, use it to cut high temporal frequencies (noise or fast motion), unit is frames, typical value is 0.5 frame - localEnergy_spatialConstant(5), // the spatial constant of the first order low pass filter, use it to cut high spatial frequencies (noise or thick contours), unit is pixels, typical value is 5 pixel - // local neighborhood energy filtering parameters : the aim is to get information about the energy neighborhood to perform a center surround energy analysis + localEnergy_temporalConstant(0.5), + localEnergy_spatialConstant(5), neighborhoodEnergy_temporalConstant(1), neighborhoodEnergy_spatialConstant(15), - // context neighborhood energy filtering parameters : the aim is to get information about the energy on a wide neighborhood area to filtered out local effects contextEnergy_temporalConstant(1), contextEnergy_spatialConstant(75){};// default setup - CV_PROP_RW float thresholdON, thresholdOFF; - CV_PROP_RW float localEnergy_temporalConstant, localEnergy_spatialConstant; - CV_PROP_RW float neighborhoodEnergy_temporalConstant, neighborhoodEnergy_spatialConstant; - CV_PROP_RW float contextEnergy_temporalConstant, contextEnergy_spatialConstant; + CV_PROP_RW float thresholdON; + CV_PROP_RW float thresholdOFF; + //! the time constant of the first order low pass filter, use it to cut high temporal frequencies (noise or fast motion), unit is frames, typical value is 0.5 frame + CV_PROP_RW float localEnergy_temporalConstant; + //! the spatial constant of the first order low pass filter, use it to cut high spatial frequencies (noise or thick contours), unit is pixels, typical value is 5 pixel + CV_PROP_RW float localEnergy_spatialConstant; + //! local neighborhood energy filtering parameters : the aim is to get information about the energy neighborhood to perform a center surround energy analysis + CV_PROP_RW float neighborhoodEnergy_temporalConstant; + CV_PROP_RW float neighborhoodEnergy_spatialConstant; + //! context neighborhood energy filtering parameters : the aim is to get information about the energy on a wide neighborhood area to filtered out local effects + CV_PROP_RW float contextEnergy_temporalConstant; + CV_PROP_RW float contextEnergy_spatialConstant; }; - /** - * @return the sze of the manage input and output images - */ + /** @brief return the sze of the manage input and output images + */ CV_WRAP virtual Size getSize()=0; - /** - * try to open an XML segmentation parameters file to adjust current segmentation instance setup - * => if the xml file does not exist, then default setup is applied - * => warning, Exceptions are thrown if read XML file is not valid - * @param segmentationParameterFile : the parameters filename - * @param applyDefaultSetupOnFailure : set to true if an error must be thrown on error + /** @brief try to open an XML segmentation parameters file to adjust current segmentation instance setup + + - if the xml file does not exist, then default setup is applied + - warning, Exceptions are thrown if read XML file is not valid + @param segmentationParameterFile : the parameters filename + @param applyDefaultSetupOnFailure : set to true if an error must be thrown on error */ CV_WRAP virtual void setup(String segmentationParameterFile="", const bool applyDefaultSetupOnFailure=true)=0; - /** - * try to open an XML segmentation parameters file to adjust current segmentation instance setup - * => if the xml file does not exist, then default setup is applied - * => warning, Exceptions are thrown if read XML file is not valid - * @param fs : the open Filestorage which contains segmentation parameters - * @param applyDefaultSetupOnFailure : set to true if an error must be thrown on error - */ + /** @brief try to open an XML segmentation parameters file to adjust current segmentation instance setup + + - if the xml file does not exist, then default setup is applied + - warning, Exceptions are thrown if read XML file is not valid + @param fs : the open Filestorage which contains segmentation parameters + @param applyDefaultSetupOnFailure : set to true if an error must be thrown on error + */ CV_WRAP virtual void setup(cv::FileStorage &fs, const bool applyDefaultSetupOnFailure=true)=0; - /** - * try to open an XML segmentation parameters file to adjust current segmentation instance setup - * => if the xml file does not exist, then default setup is applied - * => warning, Exceptions are thrown if read XML file is not valid - * @param newParameters : a parameters structures updated with the new target configuration + /** @brief try to open an XML segmentation parameters file to adjust current segmentation instance setup + + - if the xml file does not exist, then default setup is applied + - warning, Exceptions are thrown if read XML file is not valid + @param newParameters : a parameters structures updated with the new target configuration */ CV_WRAP virtual void setup(SegmentationParameters newParameters)=0; - /** - * @return the current parameters setup - */ + /** @brief return the current parameters setup + */ CV_WRAP virtual SegmentationParameters getParameters()=0; - /** - * parameters setup display method - * @return a string which contains formatted parameters information - */ + /** @brief parameters setup display method + @return a string which contains formatted parameters information + */ CV_WRAP virtual const String printSetup()=0; - /** - * write xml/yml formated parameters information - * @param fs : the filename of the xml file that will be open and writen with formatted parameters information - */ + /** @brief write xml/yml formated parameters information + @param fs : the filename of the xml file that will be open and writen with formatted parameters information + */ CV_WRAP virtual void write( String fs ) const=0; - /** - * write xml/yml formated parameters information - * @param fs : a cv::Filestorage object ready to be filled - */ + /** @brief write xml/yml formated parameters information + @param fs : a cv::Filestorage object ready to be filled + */ CV_WRAP virtual void write( cv::FileStorage& fs ) const=0; - /** - * main processing method, get result using methods getSegmentationPicture() - * @param inputToSegment : the image to process, it must match the instance buffer size ! - * @param channelIndex : the channel to process in case of multichannel images - */ + /** @brief main processing method, get result using methods getSegmentationPicture() + @param inputToSegment : the image to process, it must match the instance buffer size ! + @param channelIndex : the channel to process in case of multichannel images + */ CV_WRAP virtual void run(InputArray inputToSegment, const int channelIndex=0)=0; - /** - * access function - * @return the last segmentation result: a boolean picture which is resampled between 0 and 255 for a display purpose - */ + /** @brief access function + @return the last segmentation result: a boolean picture which is resampled between 0 and 255 for a display purpose + */ CV_WRAP virtual void getSegmentationPicture(OutputArray transientAreas)=0; - /** - * cleans all the buffers of the instance - */ + /** @brief cleans all the buffers of the instance + */ CV_WRAP virtual void clearAllBuffers()=0; }; - /** allocator - * @param inputSize : size of the images input to segment (output will be the same size) - */ +/** @brief allocator +@param inputSize : size of the images input to segment (output will be the same size) +@relates bioinspired::TransientAreasSegmentationModule + */ CV_EXPORTS_W Ptr createTransientAreasSegmentationModule(Size inputSize); +//! @} + }} // namespaces end : cv and bioinspired diff --git a/modules/ccalib/include/opencv2/ccalib.hpp b/modules/ccalib/include/opencv2/ccalib.hpp index 53285e2be..79df5989c 100644 --- a/modules/ccalib/include/opencv2/ccalib.hpp +++ b/modules/ccalib/include/opencv2/ccalib.hpp @@ -49,8 +49,14 @@ #include +/** @defgroup ccalib Custom Calibration Pattern for 3D reconstruction +*/ + namespace cv{ namespace ccalib{ +//! @addtogroup ccalib +//! @{ + class CV_EXPORTS CustomPattern : public Algorithm { public: @@ -66,11 +72,11 @@ public: bool isInitialized(); void getPatternPoints(OutputArray original_points); - /* + /**< Returns a vector of the original points. */ double getPixelSize(); - /* + /**< Get the pixel size of the pattern */ @@ -86,7 +92,7 @@ public: Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs, OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, int flags = 0, TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON)); - /* + /**< Calls the calirateCamera function with the same inputs. */ @@ -94,7 +100,7 @@ public: OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE); bool findRt(InputArray image, InputArray cameraMatrix, InputArray distCoeffs, OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE); - /* + /**< Uses solvePnP to find the rotation and translation of the pattern with respect to the camera frame. */ @@ -105,13 +111,13 @@ public: bool findRtRANSAC(InputArray image, InputArray cameraMatrix, InputArray distCoeffs, OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100, float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE); - /* + /**< Uses solvePnPRansac() */ void drawOrientation(InputOutputArray image, InputArray tvec, InputArray rvec, InputArray cameraMatrix, InputArray distCoeffs, double axis_length = 3, int axis_width = 2); - /* + /**< pattern_corners -> projected over the image position of the edges of the pattern. */ @@ -144,6 +150,8 @@ private: void refineKeypointsPos(const Mat& img, std::vector& kp); }; +//! @} + }} // namespace ccalib, cv #endif diff --git a/modules/cvv/doc/cvv.rst b/modules/cvv/doc/cvv.rst index 943849aaa..e60378f67 100644 --- a/modules/cvv/doc/cvv.rst +++ b/modules/cvv/doc/cvv.rst @@ -7,5 +7,5 @@ The module provides an interactive GUI to debug and incrementally design compute .. toctree:: :maxdepth: 2 - CVV API Documentation - CVV GUI Documentation + CVV API Documentation + CVV GUI Documentation diff --git a/modules/cvv/doc/cvv_api/index.rst b/modules/cvv/doc/cvv_api.rst similarity index 100% rename from modules/cvv/doc/cvv_api/index.rst rename to modules/cvv/doc/cvv_api.rst diff --git a/modules/cvv/doc/cvv_gui/index.rst b/modules/cvv/doc/cvv_gui.rst similarity index 100% rename from modules/cvv/doc/cvv_gui/index.rst rename to modules/cvv/doc/cvv_gui.rst diff --git a/modules/cvv/include/opencv2/cvv/call_meta_data.hpp b/modules/cvv/include/opencv2/cvv/call_meta_data.hpp index c5156c986..5323dec99 100644 --- a/modules/cvv/include/opencv2/cvv/call_meta_data.hpp +++ b/modules/cvv/include/opencv2/cvv/call_meta_data.hpp @@ -7,6 +7,10 @@ namespace cvv { + +//! @addtogroup cvv +//! @{ + namespace impl { @@ -49,6 +53,9 @@ struct CallMetaData const bool isKnown; }; } + +//! @} + } // namespaces #ifdef __GNUC__ diff --git a/modules/cvv/include/opencv2/cvv/cvv.hpp b/modules/cvv/include/opencv2/cvv/cvv.hpp index 720d2127e..ef9e29b0f 100644 --- a/modules/cvv/include/opencv2/cvv/cvv.hpp +++ b/modules/cvv/include/opencv2/cvv/cvv.hpp @@ -1,6 +1,22 @@ +/** +@defgroup cvv GUI for Interactive Visual Debugging of Computer Vision Programs + +Namespace for all functions is **cvv**, i.e. *cvv::showImage()*. + +Compilation: + +- For development, i.e. for cvv GUI to show up, compile your code using cvv with + *g++ -DCVVISUAL_DEBUGMODE*. +- For release, i.e. cvv calls doing nothing, compile your code without above flag. + +See cvv tutorial for a commented example application using cvv. + +*/ + #include #include #include #include #include #include + diff --git a/modules/cvv/include/opencv2/cvv/debug_mode.hpp b/modules/cvv/include/opencv2/cvv/debug_mode.hpp index bec9f3f38..e23da4d01 100644 --- a/modules/cvv/include/opencv2/cvv/debug_mode.hpp +++ b/modules/cvv/include/opencv2/cvv/debug_mode.hpp @@ -10,6 +10,9 @@ namespace cvv { +//! @addtogroup cvv +//! @{ + namespace impl { @@ -24,22 +27,25 @@ static inline bool &getDebugFlag() } // namespace impl -/** - * @brief Returns whether debug-mode is active for this TU and thread. - */ +/** @brief Returns whether debug-mode is active for this TU and thread. +*/ static inline bool debugMode() { return impl::getDebugFlag(); } -/** - * @brief Set the debug-mode for this TU and thread. +/** @brief Enable or disable cvv for current translation unit and thread + +(disabled this way has higher - but still low - overhead compared to using the compile flags). +@param active */ static inline void setDebugFlag(bool active) { impl::getDebugFlag() = active; } +//! @} + } // namespace cvv #endif diff --git a/modules/cvv/include/opencv2/cvv/dmatch.hpp b/modules/cvv/include/opencv2/cvv/dmatch.hpp index 91b033ed2..597bced31 100644 --- a/modules/cvv/include/opencv2/cvv/dmatch.hpp +++ b/modules/cvv/include/opencv2/cvv/dmatch.hpp @@ -9,9 +9,16 @@ #include "call_meta_data.hpp" #include "debug_mode.hpp" +#ifdef CV_DOXYGEN +#define CVVISUAL_DEBUGMODE +#endif + namespace cvv { +//! @addtogroup cvv +//! @{ + namespace impl { void debugDMatch(cv::InputArray img1, std::vector keypoints1, @@ -22,6 +29,22 @@ void debugDMatch(cv::InputArray img1, std::vector keypoints1, } // namespace impl #ifdef CVVISUAL_DEBUGMODE +/** @brief Add a filled in DMatch \ to debug GUI. + +The matches can are visualized for interactive inspection in different GUI views (one similar to an +interactive :draw_matches:drawMatches\<\>). + +@param img1 First image used in DMatch \. +@param keypoints1 Keypoints of first image. +@param img2 Second image used in DMatch. +@param keypoints2 Keypoints of second image. +@param matches +@param data See showImage +@param description See showImage +@param view See showImage +@param useTrainDescriptor Use DMatch \'s train descriptor index instead of query +descriptor index. + */ static inline void debugDMatch(cv::InputArray img1, std::vector keypoints1, cv::InputArray img2, std::vector keypoints2, @@ -36,6 +59,7 @@ debugDMatch(cv::InputArray img1, std::vector keypoints1, data, description, view, useTrainDescriptor); } } +/** @overload */ static inline void debugDMatch(cv::InputArray img1, std::vector keypoints1, cv::InputArray img2, std::vector keypoints2, @@ -52,9 +76,6 @@ debugDMatch(cv::InputArray img1, std::vector keypoints1, } } #else -/** - * @brief Debug a set of matches between two images. - */ static inline void debugDMatch(cv::InputArray, std::vector, cv::InputArray, std::vector, std::vector, @@ -63,9 +84,6 @@ static inline void debugDMatch(cv::InputArray, std::vector, bool = true) { } -/** - * Dito. - */ static inline void debugDMatch(cv::InputArray, std::vector, cv::InputArray, std::vector, std::vector, @@ -75,6 +93,8 @@ static inline void debugDMatch(cv::InputArray, std::vector, } #endif +//! @} + } // namespace cvv #endif diff --git a/modules/cvv/include/opencv2/cvv/filter.hpp b/modules/cvv/include/opencv2/cvv/filter.hpp index 8e6da2479..07cc3b1db 100644 --- a/modules/cvv/include/opencv2/cvv/filter.hpp +++ b/modules/cvv/include/opencv2/cvv/filter.hpp @@ -8,9 +8,16 @@ #include "call_meta_data.hpp" #include "debug_mode.hpp" +#ifdef CV_DOXYGEN +#define CVVISUAL_DEBUGMODE +#endif + namespace cvv { +//! @addtogroup cvv +//! @{ + namespace impl { // implementation outside API @@ -20,6 +27,11 @@ void debugFilter(cv::InputArray original, cv::InputArray result, } // namespace impl #ifdef CVVISUAL_DEBUGMODE +/** + * @brief Use the debug-framework to compare two images (from which the second + * is intended to be the result of + * a filter applied to the first). + */ static inline void debugFilter(cv::InputArray original, cv::InputArray result, impl::CallMetaData metaData = impl::CallMetaData(), @@ -31,6 +43,7 @@ debugFilter(cv::InputArray original, cv::InputArray result, view); } } +/** @overload */ static inline void debugFilter(cv::InputArray original, cv::InputArray result, impl::CallMetaData metaData, const ::std::string &description, @@ -43,20 +56,12 @@ static inline void debugFilter(cv::InputArray original, cv::InputArray result, } } #else -/** - * @brief Use the debug-framework to compare two images (from which the second - * is intended to be the result of - * a filter applied to the first). - */ static inline void debugFilter(cv::InputArray, cv::InputArray, impl::CallMetaData = impl::CallMetaData(), const char * = nullptr, const char * = nullptr) { } -/** - * Dito. - */ static inline void debugFilter(cv::InputArray, cv::InputArray, impl::CallMetaData, const ::std::string &, const ::std::string &) @@ -64,6 +69,8 @@ static inline void debugFilter(cv::InputArray, cv::InputArray, } #endif +//! @} + } // namespace cvv #endif diff --git a/modules/cvv/include/opencv2/cvv/final_show.hpp b/modules/cvv/include/opencv2/cvv/final_show.hpp index cb53b2862..29df5eec2 100644 --- a/modules/cvv/include/opencv2/cvv/final_show.hpp +++ b/modules/cvv/include/opencv2/cvv/final_show.hpp @@ -6,22 +6,18 @@ namespace cvv { +//! @addtogroup cvv +//! @{ + namespace impl { void finalShow(); } -/** - * @brief Passes the control to the debug-window for a last time. - * - * This function must be called once if there was any prior debug-call. After that all debug-data - * are freed. - * - * If there was no prior call it may be called once in which case it returns - * without opening a window. - * - * In either case no further debug-calls must be made (undefined behaviour!!). - * +/** @brief Passes the control to the debug-window for a last time. + +This function **must** be called *once* *after* all cvv calls if any. As an alternative create an +instance of FinalShowCaller, which calls finalShow() in its destructor (RAII-style). */ inline void finalShow() { @@ -48,6 +44,8 @@ public: } }; +//! @} + } #endif diff --git a/modules/cvv/include/opencv2/cvv/show_image.hpp b/modules/cvv/include/opencv2/cvv/show_image.hpp index e42ffd038..bf56e7e49 100644 --- a/modules/cvv/include/opencv2/cvv/show_image.hpp +++ b/modules/cvv/include/opencv2/cvv/show_image.hpp @@ -8,9 +8,16 @@ #include "call_meta_data.hpp" #include "debug_mode.hpp" +#ifdef CV_DOXYGEN +#define CVVISUAL_DEBUGMODE +#endif + namespace cvv { +//! @addtogroup cvv +//! @{ + namespace impl { // implementation outside API @@ -19,6 +26,15 @@ void showImage(cv::InputArray img, const CallMetaData &data, } // namespace impl #ifdef CVVISUAL_DEBUGMODE +/** @brief Add a single image to debug GUI (similar to imshow \<\>). + +@param img Image to show in debug GUI. +@param metaData Properly initialized CallMetaData struct, i.e. information about file, line and +function name for GUI. Use CVVISUAL_LOCATION macro. +@param description Human readable description to provide context to image. +@param view Preselect view that will be used to visualize this image in GUI. Other views can still +be selected in GUI later on. + */ static inline void showImage(cv::InputArray img, impl::CallMetaData metaData = impl::CallMetaData(), const char *description = nullptr, @@ -29,6 +45,7 @@ static inline void showImage(cv::InputArray img, impl::showImage(img, metaData, description, view); } } +/** @overload */ static inline void showImage(cv::InputArray img, impl::CallMetaData metaData, const ::std::string &description, const ::std::string &view = "") @@ -40,23 +57,19 @@ static inline void showImage(cv::InputArray img, impl::CallMetaData metaData, } } #else -/** - * Use the debug-framework to show a single image. - */ static inline void showImage(cv::InputArray, impl::CallMetaData = impl::CallMetaData(), const char * = nullptr, const char * = nullptr) { } -/** - * Dito. - */ static inline void showImage(cv::InputArray, impl::CallMetaData, const ::std::string &, const ::std::string &) { } #endif +//! @} + } // namespace cvv #endif diff --git a/modules/datasets/doc/datasets/ar_hmdb.rst b/modules/datasets/doc/ar_hmdb.rst similarity index 100% rename from modules/datasets/doc/datasets/ar_hmdb.rst rename to modules/datasets/doc/ar_hmdb.rst diff --git a/modules/datasets/doc/datasets/ar_sports.rst b/modules/datasets/doc/ar_sports.rst similarity index 100% rename from modules/datasets/doc/datasets/ar_sports.rst rename to modules/datasets/doc/ar_sports.rst diff --git a/modules/datasets/doc/datasets.rst b/modules/datasets/doc/datasets.rst index cb4cedfd0..09d9e5038 100644 --- a/modules/datasets/doc/datasets.rst +++ b/modules/datasets/doc/datasets.rst @@ -15,105 +15,105 @@ It is planned to have: .. toctree:: :hidden: - datasets/ar_hmdb - datasets/ar_sports - datasets/fr_adience - datasets/fr_lfw - datasets/gr_chalearn - datasets/gr_skig - datasets/hpe_humaneva - datasets/hpe_parse - datasets/ir_affine - datasets/ir_robot - datasets/is_bsds - datasets/is_weizmann - datasets/msm_epfl - datasets/msm_middlebury - datasets/or_imagenet - datasets/or_mnist - datasets/or_sun - datasets/pd_caltech - datasets/slam_kitti - datasets/slam_tumindoor - datasets/tr_chars - datasets/tr_svt + ar_hmdb + ar_sports + fr_adience + fr_lfw + gr_chalearn + gr_skig + hpe_humaneva + hpe_parse + ir_affine + ir_robot + is_bsds + is_weizmann + msm_epfl + msm_middlebury + or_imagenet + or_mnist + or_sun + pd_caltech + slam_kitti + slam_tumindoor + tr_chars + tr_svt Action Recognition ------------------ - :doc:`datasets/ar_hmdb` [#f1]_ + :doc:`ar_hmdb` [#f1]_ - :doc:`datasets/ar_sports` + :doc:`ar_sports` Face Recognition ---------------- - :doc:`datasets/fr_adience` + :doc:`fr_adience` - :doc:`datasets/fr_lfw` [#f1]_ + :doc:`fr_lfw` [#f1]_ Gesture Recognition ------------------- - :doc:`datasets/gr_chalearn` + :doc:`gr_chalearn` - :doc:`datasets/gr_skig` + :doc:`gr_skig` Human Pose Estimation --------------------- - :doc:`datasets/hpe_humaneva` + :doc:`hpe_humaneva` - :doc:`datasets/hpe_parse` + :doc:`hpe_parse` Image Registration ------------------ - :doc:`datasets/ir_affine` + :doc:`ir_affine` - :doc:`datasets/ir_robot` + :doc:`ir_robot` Image Segmentation ------------------ - :doc:`datasets/is_bsds` + :doc:`is_bsds` - :doc:`datasets/is_weizmann` + :doc:`is_weizmann` Multiview Stereo Matching ------------------------- - :doc:`datasets/msm_epfl` + :doc:`msm_epfl` - :doc:`datasets/msm_middlebury` + :doc:`msm_middlebury` Object Recognition ------------------ - :doc:`datasets/or_imagenet` + :doc:`or_imagenet` - :doc:`datasets/or_mnist` [#f2]_ + :doc:`or_mnist` [#f2]_ - :doc:`datasets/or_sun` + :doc:`or_sun` Pedestrian Detection -------------------- - :doc:`datasets/pd_caltech` [#f2]_ + :doc:`pd_caltech` [#f2]_ SLAM ---- - :doc:`datasets/slam_kitti` + :doc:`slam_kitti` - :doc:`datasets/slam_tumindoor` + :doc:`slam_tumindoor` Text Recognition ---------------- - :doc:`datasets/tr_chars` + :doc:`tr_chars` - :doc:`datasets/tr_svt` [#f1]_ + :doc:`tr_svt` [#f1]_ *Footnotes* diff --git a/modules/datasets/doc/datasets/fr_adience.rst b/modules/datasets/doc/fr_adience.rst similarity index 100% rename from modules/datasets/doc/datasets/fr_adience.rst rename to modules/datasets/doc/fr_adience.rst diff --git a/modules/datasets/doc/datasets/fr_lfw.rst b/modules/datasets/doc/fr_lfw.rst similarity index 100% rename from modules/datasets/doc/datasets/fr_lfw.rst rename to modules/datasets/doc/fr_lfw.rst diff --git a/modules/datasets/doc/datasets/gr_chalearn.rst b/modules/datasets/doc/gr_chalearn.rst similarity index 100% rename from modules/datasets/doc/datasets/gr_chalearn.rst rename to modules/datasets/doc/gr_chalearn.rst diff --git a/modules/datasets/doc/datasets/gr_skig.rst b/modules/datasets/doc/gr_skig.rst similarity index 100% rename from modules/datasets/doc/datasets/gr_skig.rst rename to modules/datasets/doc/gr_skig.rst diff --git a/modules/datasets/doc/datasets/hpe_humaneva.rst b/modules/datasets/doc/hpe_humaneva.rst similarity index 100% rename from modules/datasets/doc/datasets/hpe_humaneva.rst rename to modules/datasets/doc/hpe_humaneva.rst diff --git a/modules/datasets/doc/datasets/hpe_parse.rst b/modules/datasets/doc/hpe_parse.rst similarity index 100% rename from modules/datasets/doc/datasets/hpe_parse.rst rename to modules/datasets/doc/hpe_parse.rst diff --git a/modules/datasets/doc/datasets/ir_affine.rst b/modules/datasets/doc/ir_affine.rst similarity index 100% rename from modules/datasets/doc/datasets/ir_affine.rst rename to modules/datasets/doc/ir_affine.rst diff --git a/modules/datasets/doc/datasets/ir_robot.rst b/modules/datasets/doc/ir_robot.rst similarity index 100% rename from modules/datasets/doc/datasets/ir_robot.rst rename to modules/datasets/doc/ir_robot.rst diff --git a/modules/datasets/doc/datasets/is_bsds.rst b/modules/datasets/doc/is_bsds.rst similarity index 100% rename from modules/datasets/doc/datasets/is_bsds.rst rename to modules/datasets/doc/is_bsds.rst diff --git a/modules/datasets/doc/datasets/is_weizmann.rst b/modules/datasets/doc/is_weizmann.rst similarity index 100% rename from modules/datasets/doc/datasets/is_weizmann.rst rename to modules/datasets/doc/is_weizmann.rst diff --git a/modules/datasets/doc/datasets/msm_epfl.rst b/modules/datasets/doc/msm_epfl.rst similarity index 100% rename from modules/datasets/doc/datasets/msm_epfl.rst rename to modules/datasets/doc/msm_epfl.rst diff --git a/modules/datasets/doc/datasets/msm_middlebury.rst b/modules/datasets/doc/msm_middlebury.rst similarity index 100% rename from modules/datasets/doc/datasets/msm_middlebury.rst rename to modules/datasets/doc/msm_middlebury.rst diff --git a/modules/datasets/doc/datasets/or_imagenet.rst b/modules/datasets/doc/or_imagenet.rst similarity index 100% rename from modules/datasets/doc/datasets/or_imagenet.rst rename to modules/datasets/doc/or_imagenet.rst diff --git a/modules/datasets/doc/datasets/or_mnist.rst b/modules/datasets/doc/or_mnist.rst similarity index 100% rename from modules/datasets/doc/datasets/or_mnist.rst rename to modules/datasets/doc/or_mnist.rst diff --git a/modules/datasets/doc/datasets/or_sun.rst b/modules/datasets/doc/or_sun.rst similarity index 100% rename from modules/datasets/doc/datasets/or_sun.rst rename to modules/datasets/doc/or_sun.rst diff --git a/modules/datasets/doc/datasets/pd_caltech.rst b/modules/datasets/doc/pd_caltech.rst similarity index 100% rename from modules/datasets/doc/datasets/pd_caltech.rst rename to modules/datasets/doc/pd_caltech.rst diff --git a/modules/datasets/doc/datasets/slam_kitti.rst b/modules/datasets/doc/slam_kitti.rst similarity index 100% rename from modules/datasets/doc/datasets/slam_kitti.rst rename to modules/datasets/doc/slam_kitti.rst diff --git a/modules/datasets/doc/datasets/slam_tumindoor.rst b/modules/datasets/doc/slam_tumindoor.rst similarity index 100% rename from modules/datasets/doc/datasets/slam_tumindoor.rst rename to modules/datasets/doc/slam_tumindoor.rst diff --git a/modules/datasets/doc/datasets/tr_chars.rst b/modules/datasets/doc/tr_chars.rst similarity index 100% rename from modules/datasets/doc/datasets/tr_chars.rst rename to modules/datasets/doc/tr_chars.rst diff --git a/modules/datasets/doc/datasets/tr_svt.rst b/modules/datasets/doc/tr_svt.rst similarity index 100% rename from modules/datasets/doc/datasets/tr_svt.rst rename to modules/datasets/doc/tr_svt.rst diff --git a/modules/datasets/include/opencv2/datasets/ar_hmdb.hpp b/modules/datasets/include/opencv2/datasets/ar_hmdb.hpp index cd2b278b7..894158340 100644 --- a/modules/datasets/include/opencv2/datasets/ar_hmdb.hpp +++ b/modules/datasets/include/opencv2/datasets/ar_hmdb.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_ar +//! @{ + struct AR_hmdbObj : public Object { int id; @@ -69,6 +72,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/ar_sports.hpp b/modules/datasets/include/opencv2/datasets/ar_sports.hpp index 9d047a14f..7f51405fa 100644 --- a/modules/datasets/include/opencv2/datasets/ar_sports.hpp +++ b/modules/datasets/include/opencv2/datasets/ar_sports.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_ar +//! @{ + struct AR_sportsObj : public Object { std::string videoUrl; @@ -68,6 +71,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/dataset.hpp b/modules/datasets/include/opencv2/datasets/dataset.hpp index f9e94bffd..6ac524b64 100644 --- a/modules/datasets/include/opencv2/datasets/dataset.hpp +++ b/modules/datasets/include/opencv2/datasets/dataset.hpp @@ -47,11 +47,448 @@ #include +/** @defgroup datasets Framework for working with different datasets + +The datasets module includes classes for working with different datasets: load data, evaluate +different algorithms on them, contains benchmarks, etc. + +It is planned to have: + +- basic: loading code for all datasets to help start work with them. +- next stage: quick benchmarks for all datasets to show how to solve them using OpenCV and +implement evaluation code. +- finally: implement on OpenCV state-of-the-art algorithms, which solve these tasks. + +@{ +@defgroup datasets_ar Action Recognition + +### HMDB: A Large Human Motion Database + +Implements loading dataset: + +"HMDB: A Large Human Motion Database": + +Usage: +-# From link above download dataset files: `hmdb51_org.rar` & `test_train_splits.rar`. +-# Unpack them. Unpack all archives from directory: `hmdb51_org/` and remove them. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_ar_hmdb -p=/home/user/path_to_unpacked_folders/ +~~~ + +#### Benchmark + +For this dataset was implemented benchmark with accuracy: 0.107407 (using precomputed HOG/HOF +"STIP" features from site, averaging for 3 splits) + +To run this benchmark execute: +~~~ +./opencv/build/bin/example_datasets_ar_hmdb_benchmark -p=/home/user/path_to_unpacked_folders/ +~~~ + +@note +Precomputed features should be unpacked in the same folder: `/home/user/path_to_unpacked_folders/hmdb51_org_stips/`. +Also unpack all archives from directory: `hmdb51_org_stips/` and remove them. + +### Sports-1M %Dataset + +Implements loading dataset: + +"Sports-1M Dataset": + +Usage: +-# From link above download dataset files (`git clone https://code.google.com/p/sports-1m-dataset/`). +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_ar_sports -p=/home/user/path_to_downloaded_folders/ +~~~ + +@defgroup datasets_fr Face Recognition + +### Adience + +Implements loading dataset: + +"Adience": + +Usage: +-# From link above download any dataset file: `faces.tar.gz\aligned.tar.gz` and files with splits: +`fold_0_data.txt-fold_4_data.txt`, `fold_frontal_0_data.txt-fold_frontal_4_data.txt`. (For +face recognition task another splits should be created) +-# Unpack dataset file to some folder and place split files into the same folder. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_fr_adience -p=/home/user/path_to_created_folder/ +~~~ + +### Labeled Faces in the Wild + +Implements loading dataset: + +"Labeled Faces in the Wild": + +Usage: +-# From link above download any dataset file: +`lfw.tgz\lfwa.tar.gz\lfw-deepfunneled.tgz\lfw-funneled.tgz` and files with pairs: 10 test +splits: `pairs.txt` and developer train split: `pairsDevTrain.txt`. +-# Unpack dataset file and place `pairs.txt` and `pairsDevTrain.txt` in created folder. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_fr_lfw -p=/home/user/path_to_unpacked_folder/lfw2/ +~~~ + +#### Benchmark + +For this dataset was implemented benchmark with accuracy: 0.623833 +- 0.005223 (train split: +`pairsDevTrain.txt`, dataset: lfwa) + +To run this benchmark execute: +~~~ +./opencv/build/bin/example_datasets_fr_lfw_benchmark -p=/home/user/path_to_unpacked_folder/lfw2/ +~~~ + +@defgroup datasets_gr Gesture Recognition + +### ChaLearn Looking at People + +Implements loading dataset: + +"ChaLearn Looking at People": + +Usage +-# Follow instruction from site above, download files for dataset "Track 3: Gesture Recognition": +`Train1.zip`-`Train5.zip`, `Validation1.zip`-`Validation3.zip` (Register on site: www.codalab.org and +accept the terms and conditions of competition: + There are three mirrors for +downloading dataset files. When I downloaded data only mirror: "Universitat Oberta de Catalunya" +works). +-# Unpack train archives `Train1.zip`-`Train5.zip` to folder `Train/`, validation archives +`Validation1.zip`-`Validation3.zip` to folder `Validation/` +-# Unpack all archives in `Train/` & `Validation/` in the folders with the same names, for example: +`Sample0001.zip` to `Sample0001/` +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_gr_chalearn -p=/home/user/path_to_unpacked_folders/ +~~~ + +### Sheffield Kinect Gesture Dataset + +Implements loading dataset: + +"Sheffield Kinect Gesture Dataset": + +Usage: +-# From link above download dataset files: `subject1_dep.7z`-`subject6_dep.7z`, `subject1_rgb.7z`-`subject6_rgb.7z`. +-# Unpack them. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_gr_skig -p=/home/user/path_to_unpacked_folders/ +~~~ + +@defgroup datasets_hpe Human Pose Estimation + +### HumanEva Dataset + +Implements loading dataset: + +"HumanEva Dataset": + +Usage: +-# From link above download dataset files for `HumanEva-I` (tar) & `HumanEva-II`. +-# Unpack them to `HumanEva_1` & `HumanEva_2` accordingly. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_hpe_humaneva -p=/home/user/path_to_unpacked_folders/ +~~~ + +### PARSE Dataset + +Implements loading dataset: + +"PARSE Dataset": + +Usage: +-# From link above download dataset file: `people.zip`. +-# Unpack it. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_hpe_parse -p=/home/user/path_to_unpacked_folder/people_all/ +~~~ + +@defgroup datasets_ir Image Registration + +### Affine Covariant Regions Datasets + +Implements loading dataset: + +"Affine Covariant Regions Datasets": + +Usage: +-# From link above download dataset files: +`bark\bikes\boat\graf\leuven\trees\ubc\wall.tar.gz`. +-# Unpack them. +-# To load data, for example, for "bark", run: +``` +./opencv/build/bin/example_datasets_ir_affine -p=/home/user/path_to_unpacked_folder/bark/ +``` + +### Robot Data Set + +Implements loading dataset: + +"Robot Data Set, Point Feature Data Set – 2010": + +Usage: +-# From link above download dataset files: `SET001_6.tar.gz`-`SET055_60.tar.gz` +-# Unpack them to one folder. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_ir_robot -p=/home/user/path_to_unpacked_folder/ +~~~ + +@defgroup datasets_is Image Segmentation + +### The Berkeley Segmentation Dataset and Benchmark + +Implements loading dataset: + +"The Berkeley Segmentation Dataset and Benchmark": + +Usage: +-# From link above download dataset files: `BSDS300-human.tgz` & `BSDS300-images.tgz`. +-# Unpack them. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_is_bsds -p=/home/user/path_to_unpacked_folder/BSDS300/ +~~~ + +### Weizmann Segmentation Evaluation Database + +Implements loading dataset: + +"Weizmann Segmentation Evaluation Database": + +Usage: +-# From link above download dataset files: `Weizmann_Seg_DB_1obj.ZIP` & `Weizmann_Seg_DB_2obj.ZIP`. +-# Unpack them. +-# To load data, for example, for `1 object` dataset, run: +~~~ +./opencv/build/bin/example_datasets_is_weizmann -p=/home/user/path_to_unpacked_folder/1obj/ +~~~ + +@defgroup datasets_msm Multiview Stereo Matching + +### EPFL Multi-View Stereo + +Implements loading dataset: + +"EPFL Multi-View Stereo": + +Usage: +-# From link above download dataset files: +`castle_dense\castle_dense_large\castle_entry\fountain\herzjesu_dense\herzjesu_dense_large_bounding\cameras\images\p.tar.gz`. +-# Unpack them in separate folder for each object. For example, for "fountain", in folder `fountain/` : +`fountain_dense_bounding.tar.gz -> bounding/`, +`fountain_dense_cameras.tar.gz -> camera/`, +`fountain_dense_images.tar.gz -> png/`, +`fountain_dense_p.tar.gz -> P/` +-# To load data, for example, for "fountain", run: +~~~ +./opencv/build/bin/example_datasets_msm_epfl -p=/home/user/path_to_unpacked_folder/fountain/ +~~~ + +### Stereo – Middlebury Computer Vision + +Implements loading dataset: + +"Stereo – Middlebury Computer Vision": + +Usage: +-# From link above download dataset files: +`dino\dinoRing\dinoSparseRing\temple\templeRing\templeSparseRing.zip` +-# Unpack them. +-# To load data, for example "temple" dataset, run: +~~~ +./opencv/build/bin/example_datasets_msm_middlebury -p=/home/user/path_to_unpacked_folder/temple/ +~~~ + +@defgroup datasets_or Object Recognition + +### ImageNet + +Implements loading dataset: "ImageNet": + +Usage: +-# From link above download dataset files: +`ILSVRC2010_images_train.tar\ILSVRC2010_images_test.tar\ILSVRC2010_images_val.tar` & devkit: +`ILSVRC2010_devkit-1.0.tar.gz` (Implemented loading of 2010 dataset as only this dataset has ground +truth for test data, but structure for ILSVRC2014 is similar) +-# Unpack them to: `some_folder/train/`, `some_folder/test/`, `some_folder/val` & +`some_folder/ILSVRC2010_validation_ground_truth.txt`, +`some_folder/ILSVRC2010_test_ground_truth.txt`. +-# Create file with labels: `some_folder/labels.txt`, for example, using python script below (each +file's row format: `synset,labelID,description`. For example: "n07751451,18,plum"). +-# Unpack all tar files in train. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_or_imagenet -p=/home/user/some_folder/ +~~~ + +Python script to parse `meta.mat`: +~~~{py} + import scipy.io + meta_mat = scipy.io.loadmat("devkit-1.0/data/meta.mat") + + labels_dic = dict((m[0][1][0], m[0][0][0][0]-1) for m in meta_mat['synsets'] + label_names_dic = dict((m[0][1][0], m[0][2][0]) for m in meta_mat['synsets'] + + for label in labels_dic.keys(): + print "{0},{1},{2}".format(label, labels_dic[label], label_names_dic[label]) +~~~ + +### MNIST + +Implements loading dataset: + +"MNIST": + +Usage: +-# From link above download dataset files: +`t10k-images-idx3-ubyte.gz`, `t10k-labels-idx1-ubyte.gz`, `train-images-idx3-ubyte.gz`, `train-labels-idx1-ubyte.gz`. +-# Unpack them. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_or_mnist -p=/home/user/path_to_unpacked_files/ +~~~ + +### SUN Database + +Implements loading dataset: + +"SUN Database, Scene Recognition Benchmark. SUN397": + +Usage: +-# From link above download dataset file: `SUN397.tar` & file with splits: `Partitions.zip` +-# Unpack `SUN397.tar` into folder: `SUN397/` & `Partitions.zip` into folder: `SUN397/Partitions/` +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_or_sun -p=/home/user/path_to_unpacked_files/SUN397/ +~~~ + +@defgroup datasets_pd Pedestrian Detection + +### Caltech Pedestrian Detection Benchmark + +Implements loading dataset: + +"Caltech Pedestrian Detection Benchmark": + +@note First version of Caltech Pedestrian dataset loading. Code to unpack all frames from seq files +commented as their number is huge! So currently load only meta information without data. Also +ground truth isn't processed, as need to convert it from mat files first. + +Usage: +-# From link above download dataset files: `set00.tar`-`set10.tar`. +-# Unpack them to separate folder. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_pd_caltech -p=/home/user/path_to_unpacked_folders/ +~~~ + +@defgroup datasets_slam SLAM + +### KITTI Vision Benchmark + +Implements loading dataset: + +"KITTI Vision Benchmark": + +Usage: +-# From link above download "Odometry" dataset files: +`data_odometry_gray\data_odometry_color\data_odometry_velodyne\data_odometry_poses\data_odometry_calib.zip`. +-# Unpack `data_odometry_poses.zip`, it creates folder `dataset/poses/`. After that unpack +`data_odometry_gray.zip`, `data_odometry_color.zip`, `data_odometry_velodyne.zip`. Folder +`dataset/sequences/` will be created with folders `00/..21/`. Each of these folders will contain: +`image_0/`, `image_1/`, `image_2/`, `image_3/`, `velodyne/` and files `calib.txt` & `times.txt`. +These two last files will be replaced after unpacking `data_odometry_calib.zip` at the end. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_slam_kitti -p=/home/user/path_to_unpacked_folder/dataset/ +~~~ + +### TUMindoor Dataset + +Implements loading dataset: + +"TUMindoor Dataset": + +Usage: +-# From link above download dataset files: `dslr\info\ladybug\pointcloud.tar.bz2` for each dataset: +`11-11-28 (1st floor)\11-12-13 (1st floor N1)\11-12-17a (4th floor)\11-12-17b (3rd floor)\11-12-17c (Ground I)\11-12-18a (Ground II)\11-12-18b (2nd floor)` +-# Unpack them in separate folder for each dataset. +`dslr.tar.bz2 -> dslr/`, +`info.tar.bz2 -> info/`, +`ladybug.tar.bz2 -> ladybug/`, +`pointcloud.tar.bz2 -> pointcloud/`. +-# To load each dataset run: +~~~ +./opencv/build/bin/example_datasets_slam_tumindoor -p=/home/user/path_to_unpacked_folders/ +~~~ + +@defgroup datasets_tr Text Recognition + +### The Chars74K Dataset + +Implements loading dataset: + +"The Chars74K Dataset": + +Usage: +-# From link above download dataset files: +`EnglishFnt\EnglishHnd\EnglishImg\KannadaHnd\KannadaImg.tgz`, `ListsTXT.tgz`. +-# Unpack them. +-# Move `.m` files from folder `ListsTXT/` to appropriate folder. For example, +`English/list_English_Img.m` for `EnglishImg.tgz`. +-# To load data, for example "EnglishImg", run: +~~~ +./opencv/build/bin/example_datasets_tr_chars -p=/home/user/path_to_unpacked_folder/English/ +~~~ + +### The Street View Text Dataset + +Implements loading dataset: + +"The Street View Text Dataset": + +Usage: +-# From link above download dataset file: `svt.zip`. +-# Unpack it. +-# To load data run: +~~~ +./opencv/build/bin/example_datasets_tr_svt -p=/home/user/path_to_unpacked_folder/svt/svt1/ +~~~ + +#### Benchmark + +For this dataset was implemented benchmark with accuracy (mean f1): 0.217 + +To run benchmark execute: +~~~ +./opencv/build/bin/example_datasets_tr_svt_benchmark -p=/home/user/path_to_unpacked_folders/svt/svt1/ +~~~ + +@} + +*/ + namespace cv { namespace datasets { +//! @addtogroup datasets +//! @{ + struct Object { }; @@ -79,6 +516,8 @@ private: std::vector< Ptr > empty; }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/fr_adience.hpp b/modules/datasets/include/opencv2/datasets/fr_adience.hpp index 7d58b0958..c84bce1c2 100644 --- a/modules/datasets/include/opencv2/datasets/fr_adience.hpp +++ b/modules/datasets/include/opencv2/datasets/fr_adience.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_fr +//! @{ + enum genderType { male = 0, @@ -87,6 +90,8 @@ public: std::vector paths; }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/fr_lfw.hpp b/modules/datasets/include/opencv2/datasets/fr_lfw.hpp index 8e013554e..7065da7a6 100644 --- a/modules/datasets/include/opencv2/datasets/fr_lfw.hpp +++ b/modules/datasets/include/opencv2/datasets/fr_lfw.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_fr +//! @{ + struct FR_lfwObj : public Object { std::string image1, image2; @@ -68,6 +71,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/gr_chalearn.hpp b/modules/datasets/include/opencv2/datasets/gr_chalearn.hpp index 3f79f9542..a8eaa6cc4 100644 --- a/modules/datasets/include/opencv2/datasets/gr_chalearn.hpp +++ b/modules/datasets/include/opencv2/datasets/gr_chalearn.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_gr +//! @{ + struct groundTruth { int gestureID, initialFrame, lastFrame; @@ -85,6 +88,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/gr_skig.hpp b/modules/datasets/include/opencv2/datasets/gr_skig.hpp index b4b34808d..9c862243f 100644 --- a/modules/datasets/include/opencv2/datasets/gr_skig.hpp +++ b/modules/datasets/include/opencv2/datasets/gr_skig.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_gr +//! @{ + enum actionType { circle = 1, @@ -107,6 +110,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/hpe_humaneva.hpp b/modules/datasets/include/opencv2/datasets/hpe_humaneva.hpp index 6a093f0b5..5366e0d61 100644 --- a/modules/datasets/include/opencv2/datasets/hpe_humaneva.hpp +++ b/modules/datasets/include/opencv2/datasets/hpe_humaneva.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_hpe +//! @{ + struct HPE_humanevaObj : public Object { char person; // 1..4 @@ -79,6 +82,8 @@ public: static Ptr create(int num=humaneva_1); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/hpe_parse.hpp b/modules/datasets/include/opencv2/datasets/hpe_parse.hpp index 5f027a1dc..7629e2caf 100644 --- a/modules/datasets/include/opencv2/datasets/hpe_parse.hpp +++ b/modules/datasets/include/opencv2/datasets/hpe_parse.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_hpe +//! @{ + struct HPE_parseObj : public Object { std::string name; @@ -67,6 +70,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/ir_affine.hpp b/modules/datasets/include/opencv2/datasets/ir_affine.hpp index 1ee51a2a9..3b04a4b8a 100644 --- a/modules/datasets/include/opencv2/datasets/ir_affine.hpp +++ b/modules/datasets/include/opencv2/datasets/ir_affine.hpp @@ -55,6 +55,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_ir +//! @{ + struct IR_affineObj : public Object { std::string imageName; @@ -69,6 +72,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/ir_robot.hpp b/modules/datasets/include/opencv2/datasets/ir_robot.hpp index ac25b6f3c..0acfe0aca 100644 --- a/modules/datasets/include/opencv2/datasets/ir_robot.hpp +++ b/modules/datasets/include/opencv2/datasets/ir_robot.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_ir +//! @{ + // calibration matrix from calibrationFile.mat // 2.8290e+03 0.0000e+00 8.0279e+02 // 0.0000e+00 2.8285e+03 6.1618e+02 @@ -78,6 +81,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/is_bsds.hpp b/modules/datasets/include/opencv2/datasets/is_bsds.hpp index 72802e7e8..7357a6747 100644 --- a/modules/datasets/include/opencv2/datasets/is_bsds.hpp +++ b/modules/datasets/include/opencv2/datasets/is_bsds.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_is +//! @{ + struct IS_bsdsObj : public Object { std::string name; @@ -67,6 +70,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/is_weizmann.hpp b/modules/datasets/include/opencv2/datasets/is_weizmann.hpp index f374282b1..5daa42097 100644 --- a/modules/datasets/include/opencv2/datasets/is_weizmann.hpp +++ b/modules/datasets/include/opencv2/datasets/is_weizmann.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_is +//! @{ + struct IS_weizmannObj : public Object { std::string imageName; @@ -70,6 +73,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/msm_epfl.hpp b/modules/datasets/include/opencv2/datasets/msm_epfl.hpp index e1785b0b2..a08fc4bff 100644 --- a/modules/datasets/include/opencv2/datasets/msm_epfl.hpp +++ b/modules/datasets/include/opencv2/datasets/msm_epfl.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_msm +//! @{ + struct cameraParam { Matx33d mat1; @@ -79,6 +82,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/msm_middlebury.hpp b/modules/datasets/include/opencv2/datasets/msm_middlebury.hpp index 7af024e96..2fd67bf1b 100644 --- a/modules/datasets/include/opencv2/datasets/msm_middlebury.hpp +++ b/modules/datasets/include/opencv2/datasets/msm_middlebury.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_msm +//! @{ + struct MSM_middleburyObj : public Object { std::string imageName; @@ -70,6 +73,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/or_imagenet.hpp b/modules/datasets/include/opencv2/datasets/or_imagenet.hpp index 56564adba..26a8f6355 100644 --- a/modules/datasets/include/opencv2/datasets/or_imagenet.hpp +++ b/modules/datasets/include/opencv2/datasets/or_imagenet.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_or +//! @{ + struct OR_imagenetObj : public Object { int id; @@ -68,6 +71,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/or_mnist.hpp b/modules/datasets/include/opencv2/datasets/or_mnist.hpp index 9c946ffa3..ff6bd609e 100644 --- a/modules/datasets/include/opencv2/datasets/or_mnist.hpp +++ b/modules/datasets/include/opencv2/datasets/or_mnist.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_or +//! @{ + struct OR_mnistObj : public Object { char label; // 0..9 @@ -68,6 +71,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/or_sun.hpp b/modules/datasets/include/opencv2/datasets/or_sun.hpp index 09db30b8e..059c0d4c6 100644 --- a/modules/datasets/include/opencv2/datasets/or_sun.hpp +++ b/modules/datasets/include/opencv2/datasets/or_sun.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_or +//! @{ + struct OR_sunObj : public Object { int label; @@ -70,6 +73,8 @@ public: std::vector paths; }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/pd_caltech.hpp b/modules/datasets/include/opencv2/datasets/pd_caltech.hpp index 69a99359d..9ff727809 100644 --- a/modules/datasets/include/opencv2/datasets/pd_caltech.hpp +++ b/modules/datasets/include/opencv2/datasets/pd_caltech.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_pd +//! @{ + struct PD_caltechObj : public Object { //double groundTrue[][]; @@ -78,6 +81,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/slam_kitti.hpp b/modules/datasets/include/opencv2/datasets/slam_kitti.hpp index b001e8db7..1b7c408d7 100644 --- a/modules/datasets/include/opencv2/datasets/slam_kitti.hpp +++ b/modules/datasets/include/opencv2/datasets/slam_kitti.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_slam +//! @{ + struct pose { double elem[12]; @@ -76,6 +79,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/slam_tumindoor.hpp b/modules/datasets/include/opencv2/datasets/slam_tumindoor.hpp index d743cd646..758dd1306 100644 --- a/modules/datasets/include/opencv2/datasets/slam_tumindoor.hpp +++ b/modules/datasets/include/opencv2/datasets/slam_tumindoor.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_slam +//! @{ + enum imageType { LEFT = 0, @@ -76,6 +79,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/tr_chars.hpp b/modules/datasets/include/opencv2/datasets/tr_chars.hpp index 6b7f81885..c213bff19 100644 --- a/modules/datasets/include/opencv2/datasets/tr_chars.hpp +++ b/modules/datasets/include/opencv2/datasets/tr_chars.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_tr +//! @{ + struct TR_charsObj : public Object { std::string imgName; @@ -68,6 +71,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/tr_svt.hpp b/modules/datasets/include/opencv2/datasets/tr_svt.hpp index 23b583d92..6c2d533c0 100644 --- a/modules/datasets/include/opencv2/datasets/tr_svt.hpp +++ b/modules/datasets/include/opencv2/datasets/tr_svt.hpp @@ -54,6 +54,9 @@ namespace cv namespace datasets { +//! @addtogroup datasets_tr +//! @{ + struct tag { std::string value; @@ -75,6 +78,8 @@ public: static Ptr create(); }; +//! @} + } } diff --git a/modules/datasets/include/opencv2/datasets/util.hpp b/modules/datasets/include/opencv2/datasets/util.hpp index b1520b075..316de3a7a 100644 --- a/modules/datasets/include/opencv2/datasets/util.hpp +++ b/modules/datasets/include/opencv2/datasets/util.hpp @@ -57,12 +57,17 @@ namespace cv namespace datasets { +//! @addtogroup datasets +//! @{ + void CV_EXPORTS split(const std::string &s, std::vector &elems, char delim); void CV_EXPORTS createDirectory(const std::string &path); void CV_EXPORTS getDirList(const std::string &dirName, std::vector &fileNames); +//! @} + } } diff --git a/modules/face/doc/changelog.markdown b/modules/face/doc/changelog.markdown new file mode 100644 index 000000000..d5543c3c2 --- /dev/null +++ b/modules/face/doc/changelog.markdown @@ -0,0 +1,75 @@ +Face module changelog {#face_changelog} +===================== + +Release 0.05 +------------ + +This library is now included in the official OpenCV distribution (from 2.4 on). The +cv::FaceRecognizer is now an Algorithm, which better fits into the overall OpenCV API. + +To reduce the confusion on user side and minimize my work, libfacerec and OpenCV have been +synchronized and are now based on the same interfaces and implementation. + +The library now has an extensive documentation: + +- The API is explained in detail and with a lot of code examples. +- The face recognition guide I had written for Python and GNU Octave/MATLAB has been adapted to + the new OpenCV C++ cv::FaceRecognizer. +- A tutorial for gender classification with Fisherfaces. +- A tutorial for face recognition in videos (e.g. webcam). + +### Release highlights + +- There are no single highlights to pick from, this release is a highlight itself. + +Release 0.04 +------------ + +This version is fully Windows-compatible and works with OpenCV 2.3.1. Several bugfixes, but none +influenced the recognition rate. + +### Release highlights + +- A whole lot of exceptions with meaningful error messages. +- A tutorial for Windows users: + [](http://bytefish.de/blog/opencv_visual_studio_and_libfacerec) + +Release 0.03 +------------ + +Reworked the library to provide separate implementations in cpp files, because it's the preferred +way of contributing OpenCV libraries. This means the library is not header-only anymore. Slight API +changes were done, please see the documentation for details. + +### Release highlights + +- New Unit Tests (for LBP Histograms) make the library more robust. +- Added more documentation. + +Release 0.02 +------------ + +Reworked the library to provide separate implementations in cpp files, because it's the preferred +way of contributing OpenCV libraries. This means the library is not header-only anymore. Slight API +changes were done, please see the documentation for details. + +### Release highlights + +- New Unit Tests (for LBP Histograms) make the library more robust. +- Added a documentation and changelog in reStructuredText. + +Release 0.01 +------------ + +Initial release as header-only library. + +### Release highlights + +- Colormaps for OpenCV to enhance the visualization. +- Face Recognition algorithms implemented: + - Eigenfaces @cite TP91 + - Fisherfaces @cite BHK97 + - Local Binary Patterns Histograms @cite AHP04 +- Added persistence facilities to store the models with a common API. +- Unit Tests (using [gtest](http://code.google.com/p/googletest/)). +- Providing a CMakeLists.txt to enable easy cross-platform building. diff --git a/modules/face/doc/facerec/etc/at.txt b/modules/face/doc/etc/at.txt similarity index 100% rename from modules/face/doc/facerec/etc/at.txt rename to modules/face/doc/etc/at.txt diff --git a/modules/face/doc/face.bib b/modules/face/doc/face.bib new file mode 100644 index 000000000..1b9f4a7e2 --- /dev/null +++ b/modules/face/doc/face.bib @@ -0,0 +1,160 @@ +@incollection{AHP04, + title={Face recognition with local binary patterns}, + author={Ahonen, Timo and Hadid, Abdenour and Pietik{\"a}inen, Matti}, + booktitle={Computer vision-eccv 2004}, + pages={469--481}, + year={2004}, + publisher={Springer} +} + +@article{BHK97, + title={Eigenfaces vs. fisherfaces: Recognition using class specific linear projection}, + author={Belhumeur, Peter N. and Hespanha, Jo{\~a}o P and Kriegman, David}, + journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume={19}, + number={7}, + pages={711--720}, + year={1997}, + publisher={IEEE} +} + +@inproceedings{Bru92, + title={Face recognition through geometrical features}, + author={Brunelli, Roberto and Poggio, Tomaso}, + booktitle={Computer Vision—ECCV'92}, + pages={792--800}, + year={1992}, + organization={Springer} +} + +@book{Duda01, + title={Pattern classification}, + author={Duda, Richard O and Hart, Peter E and Stork, David G}, + year={2012}, + publisher={John Wiley \& Sons} +} + +@article{Fisher36, + title={The use of multiple measurements in taxonomic problems}, + author={Fisher, Ronald A}, + journal={Annals of eugenics}, + volume={7}, + number={2}, + pages={179--188}, + year={1936}, + publisher={Wiley Online Library} +} + +@article{GBK01, + title={From few to many: Illumination cone models for face recognition under variable lighting and pose}, + author={Georghiades, Athinodoros S. and Belhumeur, Peter N. and Kriegman, David}, + journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume={23}, + number={6}, + pages={643--660}, + year={2001}, + publisher={IEEE} +} + +@article{Kanade73, + title={Picture processing system by computer complex and recognition of human faces}, + author={Kanade, Takeo}, + year={1974} +} + +@article{KM01, + title={Pca versus lda}, + author={Mart{\'\i}nez, Aleix M and Kak, Avinash C}, + journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume={23}, + number={2}, + pages={228--233}, + year={2001}, + publisher={IEEE} +} + +@article{Lee05, + title={Acquiring linear subspaces for face recognition under variable lighting}, + author={Lee, Kuang-Chih and Ho, Jeffrey and Kriegman, David}, + journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume={27}, + number={5}, + pages={684--698}, + year={2005}, + publisher={IEEE} +} + +@incollection{Messer06, + title={Performance characterisation of face recognition algorithms and their sensitivity to severe illumination changes}, + author={Messer, Kieron and Kittler, Josef and Short, James and Heusch, Guillaume and Cardinaux, Fabien and Marcel, Sebastien and Rodriguez, Yann and Shan, Shiguang and Su, Yu and Gao, Wen and others}, + booktitle={Advances in Biometrics}, + pages={1--11}, + year={2005}, + publisher={Springer} +} + +@article{RJ91, + title={Small sample size effects in statistical pattern recognition: Recommendations for practitioners}, + author={Raudys, Sarunas J and Jain, Anil K.}, + journal={IEEE Transactions on pattern analysis and machine intelligence}, + volume={13}, + number={3}, + pages={252--264}, + year={1991}, + publisher={IEEE Computer Society} +} + +@article{Tan10, + title={Enhanced local texture feature sets for face recognition under difficult lighting conditions}, + author={Tan, Xiaoyang and Triggs, Bill}, + journal={Image Processing, IEEE Transactions on}, + volume={19}, + number={6}, + pages={1635--1650}, + year={2010}, + publisher={IEEE} +} + +@article{TP91, + title={Eigenfaces for recognition}, + author={Turk, Matthew and Pentland, Alex}, + journal={Journal of cognitive neuroscience}, + volume={3}, + number={1}, + pages={71--86}, + year={1991}, + publisher={MIT Press} +} + +@article{Tu06, + title={Newborns' face recognition: Role of inner and outer facial features}, + author={Turati, Chiara and Macchi Cassia, Viola and Simion, Francesca and Leo, Irene}, + journal={Child development}, + volume={77}, + number={2}, + pages={297--311}, + year={2006}, + publisher={Wiley Online Library} +} + +@article{Wiskott97, + title={Face recognition by elastic bunch graph matching}, + author={Wiskott, Laurenz and Fellous, J-M and Kuiger, N and Von Der Malsburg, Christoph}, + journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume={19}, + number={7}, + pages={775--779}, + year={1997}, + publisher={IEEE} +} + +@article{Zhao03, + title={Face recognition: A literature survey}, + author={Zhao, Wenyi and Chellappa, Rama and Phillips, P Jonathon and Rosenfeld, Azriel}, + journal={Acm Computing Surveys (CSUR)}, + volume={35}, + number={4}, + pages={399--458}, + year={2003}, + publisher={ACM} +} diff --git a/modules/face/doc/face.rst b/modules/face/doc/face.rst index 36a70aa9b..d4b21ac3c 100755 --- a/modules/face/doc/face.rst +++ b/modules/face/doc/face.rst @@ -7,4 +7,4 @@ The module contains some recently added functionality that has not been stabiliz .. toctree:: :maxdepth: 2 - FaceRecognizer Documentation + FaceRecognizer Documentation diff --git a/modules/face/doc/facerec/facerec_api.rst b/modules/face/doc/facerec_api.rst similarity index 100% rename from modules/face/doc/facerec/facerec_api.rst rename to modules/face/doc/facerec_api.rst diff --git a/modules/face/doc/facerec/facerec_changelog.rst b/modules/face/doc/facerec_changelog.rst similarity index 100% rename from modules/face/doc/facerec/facerec_changelog.rst rename to modules/face/doc/facerec_changelog.rst diff --git a/modules/face/doc/facerec/facerec_tutorial.rst b/modules/face/doc/facerec_tutorial.rst similarity index 100% rename from modules/face/doc/facerec/facerec_tutorial.rst rename to modules/face/doc/facerec_tutorial.rst diff --git a/modules/face/doc/facerec/img/at_database_small_sample_size.png b/modules/face/doc/img/at_database_small_sample_size.png similarity index 100% rename from modules/face/doc/facerec/img/at_database_small_sample_size.png rename to modules/face/doc/img/at_database_small_sample_size.png diff --git a/modules/face/doc/facerec/img/eigenface_reconstruction_opencv.png b/modules/face/doc/img/eigenface_reconstruction_opencv.png similarity index 100% rename from modules/face/doc/facerec/img/eigenface_reconstruction_opencv.png rename to modules/face/doc/img/eigenface_reconstruction_opencv.png diff --git a/modules/face/doc/facerec/img/eigenfaces_opencv.png b/modules/face/doc/img/eigenfaces_opencv.png similarity index 100% rename from modules/face/doc/facerec/img/eigenfaces_opencv.png rename to modules/face/doc/img/eigenfaces_opencv.png diff --git a/modules/face/doc/facerec/img/fisherface_reconstruction_opencv.png b/modules/face/doc/img/fisherface_reconstruction_opencv.png similarity index 100% rename from modules/face/doc/facerec/img/fisherface_reconstruction_opencv.png rename to modules/face/doc/img/fisherface_reconstruction_opencv.png diff --git a/modules/face/doc/facerec/img/fisherfaces_opencv.png b/modules/face/doc/img/fisherfaces_opencv.png similarity index 100% rename from modules/face/doc/facerec/img/fisherfaces_opencv.png rename to modules/face/doc/img/fisherfaces_opencv.png diff --git a/modules/face/doc/facerec/img/lbp/lbp.png b/modules/face/doc/img/lbp/lbp.png similarity index 100% rename from modules/face/doc/facerec/img/lbp/lbp.png rename to modules/face/doc/img/lbp/lbp.png diff --git a/modules/face/doc/facerec/img/lbp/lbp_yale.jpg b/modules/face/doc/img/lbp/lbp_yale.jpg similarity index 100% rename from modules/face/doc/facerec/img/lbp/lbp_yale.jpg rename to modules/face/doc/img/lbp/lbp_yale.jpg diff --git a/modules/face/doc/facerec/img/lbp/patterns.png b/modules/face/doc/img/lbp/patterns.png similarity index 100% rename from modules/face/doc/facerec/img/lbp/patterns.png rename to modules/face/doc/img/lbp/patterns.png diff --git a/modules/face/doc/facerec/img/tutorial/facerec_video/facerec_video.png b/modules/face/doc/img/tutorial/facerec_video/facerec_video.png similarity index 100% rename from modules/face/doc/facerec/img/tutorial/facerec_video/facerec_video.png rename to modules/face/doc/img/tutorial/facerec_video/facerec_video.png diff --git a/modules/face/doc/facerec/img/tutorial/gender_classification/arnie_10_10_200_200.jpg b/modules/face/doc/img/tutorial/gender_classification/arnie_10_10_200_200.jpg similarity index 100% rename from modules/face/doc/facerec/img/tutorial/gender_classification/arnie_10_10_200_200.jpg rename to modules/face/doc/img/tutorial/gender_classification/arnie_10_10_200_200.jpg diff --git a/modules/face/doc/facerec/img/tutorial/gender_classification/arnie_20_20_200_200.jpg b/modules/face/doc/img/tutorial/gender_classification/arnie_20_20_200_200.jpg similarity index 100% rename from modules/face/doc/facerec/img/tutorial/gender_classification/arnie_20_20_200_200.jpg rename to modules/face/doc/img/tutorial/gender_classification/arnie_20_20_200_200.jpg diff --git a/modules/face/doc/facerec/img/tutorial/gender_classification/arnie_20_20_70_70.jpg b/modules/face/doc/img/tutorial/gender_classification/arnie_20_20_70_70.jpg similarity index 100% rename from modules/face/doc/facerec/img/tutorial/gender_classification/arnie_20_20_70_70.jpg rename to modules/face/doc/img/tutorial/gender_classification/arnie_20_20_70_70.jpg diff --git a/modules/face/doc/facerec/img/tutorial/gender_classification/arnie_30_30_200_200.jpg b/modules/face/doc/img/tutorial/gender_classification/arnie_30_30_200_200.jpg similarity index 100% rename from modules/face/doc/facerec/img/tutorial/gender_classification/arnie_30_30_200_200.jpg rename to modules/face/doc/img/tutorial/gender_classification/arnie_30_30_200_200.jpg diff --git a/modules/face/doc/facerec/img/tutorial/gender_classification/clooney_set.png b/modules/face/doc/img/tutorial/gender_classification/clooney_set.png similarity index 100% rename from modules/face/doc/facerec/img/tutorial/gender_classification/clooney_set.png rename to modules/face/doc/img/tutorial/gender_classification/clooney_set.png diff --git a/modules/face/doc/facerec/img/tutorial/gender_classification/fisherface_0.png b/modules/face/doc/img/tutorial/gender_classification/fisherface_0.png similarity index 100% rename from modules/face/doc/facerec/img/tutorial/gender_classification/fisherface_0.png rename to modules/face/doc/img/tutorial/gender_classification/fisherface_0.png diff --git a/modules/face/doc/facerec/img/tutorial/gender_classification/fisherface_reconstruction_0.png b/modules/face/doc/img/tutorial/gender_classification/fisherface_reconstruction_0.png similarity index 100% rename from modules/face/doc/facerec/img/tutorial/gender_classification/fisherface_reconstruction_0.png rename to modules/face/doc/img/tutorial/gender_classification/fisherface_reconstruction_0.png diff --git a/modules/face/doc/facerec/img/tutorial/gender_classification/mean.png b/modules/face/doc/img/tutorial/gender_classification/mean.png similarity index 100% rename from modules/face/doc/facerec/img/tutorial/gender_classification/mean.png rename to modules/face/doc/img/tutorial/gender_classification/mean.png diff --git a/modules/face/doc/facerec/index.rst b/modules/face/doc/index.rst similarity index 100% rename from modules/face/doc/facerec/index.rst rename to modules/face/doc/index.rst diff --git a/modules/face/doc/facerec/src/CMakeLists.txt b/modules/face/doc/src/CMakeLists.txt similarity index 100% rename from modules/face/doc/facerec/src/CMakeLists.txt rename to modules/face/doc/src/CMakeLists.txt diff --git a/modules/face/doc/facerec/src/create_csv.py b/modules/face/doc/src/create_csv.py similarity index 100% rename from modules/face/doc/facerec/src/create_csv.py rename to modules/face/doc/src/create_csv.py diff --git a/modules/face/doc/facerec/src/crop_face.py b/modules/face/doc/src/crop_face.py similarity index 100% rename from modules/face/doc/facerec/src/crop_face.py rename to modules/face/doc/src/crop_face.py diff --git a/modules/face/doc/facerec/src/facerec_demo.cpp b/modules/face/doc/src/facerec_demo.cpp similarity index 100% rename from modules/face/doc/facerec/src/facerec_demo.cpp rename to modules/face/doc/src/facerec_demo.cpp diff --git a/modules/face/doc/facerec/src/facerec_eigenfaces.cpp b/modules/face/doc/src/facerec_eigenfaces.cpp similarity index 100% rename from modules/face/doc/facerec/src/facerec_eigenfaces.cpp rename to modules/face/doc/src/facerec_eigenfaces.cpp diff --git a/modules/face/doc/facerec/src/facerec_fisherfaces.cpp b/modules/face/doc/src/facerec_fisherfaces.cpp similarity index 100% rename from modules/face/doc/facerec/src/facerec_fisherfaces.cpp rename to modules/face/doc/src/facerec_fisherfaces.cpp diff --git a/modules/face/doc/facerec/src/facerec_lbph.cpp b/modules/face/doc/src/facerec_lbph.cpp similarity index 100% rename from modules/face/doc/facerec/src/facerec_lbph.cpp rename to modules/face/doc/src/facerec_lbph.cpp diff --git a/modules/face/doc/facerec/src/facerec_save_load.cpp b/modules/face/doc/src/facerec_save_load.cpp similarity index 100% rename from modules/face/doc/facerec/src/facerec_save_load.cpp rename to modules/face/doc/src/facerec_save_load.cpp diff --git a/modules/face/doc/facerec/src/facerec_video.cpp b/modules/face/doc/src/facerec_video.cpp similarity index 100% rename from modules/face/doc/facerec/src/facerec_video.cpp rename to modules/face/doc/src/facerec_video.cpp diff --git a/modules/face/doc/tutorial.markdown b/modules/face/doc/tutorial.markdown new file mode 100644 index 000000000..a518483d4 --- /dev/null +++ b/modules/face/doc/tutorial.markdown @@ -0,0 +1,699 @@ +Face Recognition with OpenCV {#face_tutorial} +============================ + +[TOC] + +Introduction {#face_tutorial_intro} +============ + +[OpenCV (Open Source Computer Vision)](http://opencv.org) is a popular computer vision library +started by [Intel](http://www.intel.com) in 1999. The cross-platform library sets its focus on +real-time image processing and includes patent-free implementations of the latest computer vision +algorithms. In 2008 [Willow Garage](http://www.willowgarage.com) took over support and OpenCV 2.3.1 +now comes with a programming interface to C, C++, [Python](http://www.python.org) and +[Android](http://www.android.com). OpenCV is released under a BSD license so it is used in academic +projects and commercial products alike. + +OpenCV 2.4 now comes with the very new FaceRecognizer class for face recognition, so you can start +experimenting with face recognition right away. This document is the guide I've wished for, when I +was working myself into face recognition. It shows you how to perform face recognition with +FaceRecognizer in OpenCV (with full source code listings) and gives you an introduction into the +algorithms behind. I'll also show how to create the visualizations you can find in many +publications, because a lot of people asked for. + +The currently available algorithms are: + +- Eigenfaces (see createEigenFaceRecognizer) +- Fisherfaces (see createFisherFaceRecognizer) +- Local Binary Patterns Histograms (see createLBPHFaceRecognizer) + +You don't need to copy and paste the source code examples from this page, because they are available +in the src folder coming with this documentation. If you have built OpenCV with the samples turned +on, chances are good you have them compiled already! Although it might be interesting for very +advanced users, I've decided to leave the implementation details out as I am afraid they confuse new +users. + +All code in this document is released under the [BSD +license](http://www.opensource.org/licenses/bsd-license), so feel free to use it for your projects. + +Face Recognition {#face_tutorial_facerec} +---------------- + +Face recognition is an easy task for humans. Experiments in @cite Tu06 have shown, that even one to +three day old babies are able to distinguish between known faces. So how hard could it be for a +computer? It turns out we know little about human recognition to date. Are inner features (eyes, +nose, mouth) or outer features (head shape, hairline) used for a successful face recognition? How do +we analyze an image and how does the brain encode it? It was shown by [David +Hubel](http://en.wikipedia.org/wiki/David_H._Hubel) and [Torsten +Wiesel](http://en.wikipedia.org/wiki/Torsten_Wiesel), that our brain has specialized nerve cells +responding to specific local features of a scene, such as lines, edges, angles or movement. Since we +don't see the world as scattered pieces, our visual cortex must somehow combine the different +sources of information into useful patterns. Automatic face recognition is all about extracting +those meaningful features from an image, putting them into a useful representation and performing +some kind of classification on them. + +Face recognition based on the geometric features of a face is probably the most intuitive approach +to face recognition. One of the first automated face recognition systems was described in +@cite Kanade73 : marker points (position of eyes, ears, nose, ...) were used to build a feature vector +(distance between the points, angle between them, ...). The recognition was performed by calculating +the euclidean distance between feature vectors of a probe and reference image. Such a method is +robust against changes in illumination by its nature, but has a huge drawback: the accurate +registration of the marker points is complicated, even with state of the art algorithms. Some of the +latest work on geometric face recognition was carried out in @cite Bru92. A 22-dimensional feature +vector was used and experiments on large datasets have shown, that geometrical features alone my not +carry enough information for face recognition. + +The Eigenfaces method described in @cite TP91 took a holistic approach to face recognition: A facial +image is a point from a high-dimensional image space and a lower-dimensional representation is +found, where classification becomes easy. The lower-dimensional subspace is found with Principal +Component Analysis, which identifies the axes with maximum variance. While this kind of +transformation is optimal from a reconstruction standpoint, it doesn't take any class labels into +account. Imagine a situation where the variance is generated from external sources, let it be light. +The axes with maximum variance do not necessarily contain any discriminative information at all, +hence a classification becomes impossible. So a class-specific projection with a Linear Discriminant +Analysis was applied to face recognition in @cite BHK97. The basic idea is to minimize the variance +within a class, while maximizing the variance between the classes at the same time. + +Recently various methods for a local feature extraction emerged. To avoid the high-dimensionality of +the input data only local regions of an image are described, the extracted features are (hopefully) +more robust against partial occlusion, illumation and small sample size. Algorithms used for a local +feature extraction are Gabor Wavelets (@cite Wiskott97), Discrete Cosinus Transform (@cite Messer06) and +Local Binary Patterns (@cite AHP04). It's still an open research question what's the best way to +preserve spatial information when applying a local feature extraction, because spatial information +is potentially useful information. + +Face Database {#face_tutorial_facedb} +------------- + +Let's get some data to experiment with first. I don't want to do a toy example here. We are doing +face recognition, so you'll need some face images! You can either create your own dataset or start +with one of the available face databases, +[](http://face-rec.org/databases) gives you an up-to-date overview. +Three interesting databases are (parts of the description are quoted from +[](http://face-rec.org)): + +- [AT&T Facedatabase](http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html) The AT&T + Facedatabase, sometimes also referred to as *ORL Database of Faces*, contains ten different + images of each of 40 distinct subjects. For some subjects, the images were taken at different + times, varying the lighting, facial expressions (open / closed eyes, smiling / not smiling) and + facial details (glasses / no glasses). All the images were taken against a dark homogeneous + background with the subjects in an upright, frontal position (with tolerance for some side + movement). +- [Yale Facedatabase A](http://vision.ucsd.edu/content/yale-face-database), also known as + Yalefaces. The AT&T Facedatabase is good for initial tests, but it's a fairly easy database. The + Eigenfaces method already has a 97% recognition rate on it, so you won't see any great + improvements with other algorithms. The Yale Facedatabase A (also known as Yalefaces) is a more + appropriate dataset for initial experiments, because the recognition problem is harder. The + database consists of 15 people (14 male, 1 female) each with 11 grayscale images sized + \f$320 \times 243\f$ pixel. There are changes in the light conditions (center light, left light, + right light), facial expressions (happy, normal, sad, sleepy, surprised, wink) and glasses + (glasses, no-glasses). + + The original images are not cropped and aligned. Please look into the @ref face_appendix for a + Python script, that does the job for you. + +- [Extended Yale Facedatabase B](http://vision.ucsd.edu/~leekc/ExtYaleDatabase/ExtYaleB.html) The + Extended Yale Facedatabase B contains 2414 images of 38 different people in its cropped version. + The focus of this database is set on extracting features that are robust to illumination, the + images have almost no variation in emotion/occlusion/... . I personally think, that this dataset + is too large for the experiments I perform in this document. You better use the [AT&T + Facedatabase](http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html) for intial + testing. A first version of the Yale Facedatabase B was used in @cite BHK97 to see how the + Eigenfaces and Fisherfaces method perform under heavy illumination changes. @cite Lee05 used the + same setup to take 16128 images of 28 people. The Extended Yale Facedatabase B is the merge of + the two databases, which is now known as Extended Yalefacedatabase B. + +### Preparing the data {#face_tutorial_prepare} + +Once we have acquired some data, we'll need to read it in our program. In the demo applications I +have decided to read the images from a very simple CSV file. Why? Because it's the simplest +platform-independent approach I can think of. However, if you know a simpler solution please ping me +about it. Basically all the CSV file needs to contain are lines composed of a filename followed by a +; followed by the label (as *integer number*), making up a line like this: + +~~~ +/path/to/image.ext;0 +~~~ + +Let's dissect the line. /path/to/image.ext is the path to an image, probably something like this if +you are in Windows: C:/faces/person0/image0.jpg. Then there is the separator ; and finally we assign +the label 0 to the image. Think of the label as the subject (the person) this image belongs to, so +same subjects (persons) should have the same label. + +Download the AT&T Facedatabase from AT&T Facedatabase and the corresponding CSV file from at.txt, +which looks like this (file is without ... of course): + +~~~ +./at/s1/1.pgm;0 +./at/s1/2.pgm;0 +... +./at/s2/1.pgm;1 +./at/s2/2.pgm;1 +... +./at/s40/1.pgm;39 +./at/s40/2.pgm;39 +~~~ + +Imagine I have extracted the files to D:/data/at and have downloaded the CSV file to D:/data/at.txt. +Then you would simply need to Search & Replace ./ with D:/data/. You can do that in an editor of +your choice, every sufficiently advanced editor can do this. Once you have a CSV file with valid +filenames and labels, you can run any of the demos by passing the path to the CSV file as parameter: + +~~~ +facerec_demo.exe D:/data/at.txt +~~~ + +Please, see @ref face_tutorial_appendix_csv for details on creating CSV file. + +Eigenfaces {#face_tutorial_eigenfaces} +---------- + +The problem with the image representation we are given is its high dimensionality. Two-dimensional +\f$p \times q\f$ grayscale images span a \f$m = pq\f$-dimensional vector space, so an image with +\f$100 \times 100\f$ pixels lies in a \f$10,000\f$-dimensional image space already. The question is: Are all +dimensions equally useful for us? We can only make a decision if there's any variance in data, so +what we are looking for are the components that account for most of the information. The Principal +Component Analysis (PCA) was independently proposed by [Karl +Pearson](http://en.wikipedia.org/wiki/Karl_Pearson) (1901) and [Harold +Hotelling](http://en.wikipedia.org/wiki/Harold_Hotelling) (1933) to turn a set of possibly +correlated variables into a smaller set of uncorrelated variables. The idea is, that a +high-dimensional dataset is often described by correlated variables and therefore only a few +meaningful dimensions account for most of the information. The PCA method finds the directions with +the greatest variance in the data, called principal components. + +### Algorithmic Description of Eigenfaces method {#face_tutorial_eigenfaces_algo} + +Let \f$X = \{ x_{1}, x_{2}, \ldots, x_{n} \}\f$ be a random vector with observations \f$x_i \in R^{d}\f$. + +1. Compute the mean \f$\mu\f$ + +\f[\mu = \frac{1}{n} \sum_{i=1}^{n} x_{i}\f] + +2. Compute the the Covariance Matrix S + +\f[S = \frac{1}{n} \sum_{i=1}^{n} (x_{i} - \mu) (x_{i} - \mu)^{T}`\f] + +3. Compute the eigenvalues \f$\lambda_{i}\f$ and eigenvectors \f$v_{i}\f$ of \f$S\f$ + +\f[S v_{i} = \lambda_{i} v_{i}, i=1,2,\ldots,n\f] + +4. Order the eigenvectors descending by their eigenvalue. The \f$k\f$ principal components are the + eigenvectors corresponding to the \f$k\f$ largest eigenvalues. + +The \f$k\f$ principal components of the observed vector \f$x\f$ are then given by: + +\f[y = W^{T} (x - \mu)\f] + +where \f$W = (v_{1}, v_{2}, \ldots, v_{k})\f$. + +The reconstruction from the PCA basis is given by: + +\f[x = W y + \mu\f] + +where \f$W = (v_{1}, v_{2}, \ldots, v_{k})\f$. + +The Eigenfaces method then performs face recognition by: + +- Projecting all training samples into the PCA subspace. +- Projecting the query image into the PCA subspace. +- Finding the nearest neighbor between the projected training images and the projected query + image. + +Still there's one problem left to solve. Imagine we are given \f$400\f$ images sized \f$100 \times 100\f$ +pixel. The Principal Component Analysis solves the covariance matrix \f$S = X X^{T}\f$, where +\f${size}(X) = 10000 \times 400\f$ in our example. You would end up with a \f$10000 \times 10000\f$ matrix, +roughly \f$0.8 GB\f$. Solving this problem isn't feasible, so we'll need to apply a trick. From your +linear algebra lessons you know that a \f$M \times N\f$ matrix with \f$M > N\f$ can only have \f$N - 1\f$ +non-zero eigenvalues. So it's possible to take the eigenvalue decomposition \f$S = X^{T} X\f$ of size +\f$N \times N\f$ instead: + +\f[X^{T} X v_{i} = \lambda_{i} v{i}\f] + +and get the original eigenvectors of \f$S = X X^{T}\f$ with a left multiplication of the data matrix: + +\f[X X^{T} (X v_{i}) = \lambda_{i} (X v_{i})\f] + +The resulting eigenvectors are orthogonal, to get orthonormal eigenvectors they need to be +normalized to unit length. I don't want to turn this into a publication, so please look into +@cite Duda01 for the derivation and proof of the equations. + +### Eigenfaces in OpenCV {#face_tutorial_eigenfaces_use} + +For the first source code example, I'll go through it with you. I am first giving you the whole +source code listing, and after this we'll look at the most important lines in detail. Please note: +every source code listing is commented in detail, so you should have no problems following it. + +The source code for this demo application is also available in the src folder coming with this +documentation: + +@include src/facerec_eigenfaces.cpp + +I've used the jet colormap, so you can see how the grayscale values are distributed within the +specific Eigenfaces. You can see, that the Eigenfaces do not only encode facial features, but also +the illumination in the images (see the left light in Eigenface \#4, right light in Eigenfaces \#5): + +![image](img/eigenfaces_opencv.png) + +We've already seen, that we can reconstruct a face from its lower dimensional approximation. So +let's see how many Eigenfaces are needed for a good reconstruction. I'll do a subplot with +\f$10,30,\ldots,310\f$ Eigenfaces: + +~~~{cpp} +// Display or save the image reconstruction at some predefined steps: +for(int num_components = 10; num_components < 300; num_components+=15) { + // slice the eigenvectors from the model + Mat evs = Mat(W, Range::all(), Range(0, num_components)); + Mat projection = subspaceProject(evs, mean, images[0].reshape(1,1)); + Mat reconstruction = subspaceReconstruct(evs, mean, projection); + // Normalize the result: + reconstruction = norm_0_255(reconstruction.reshape(1, images[0].rows)); + // Display or save: + if(argc == 2) { + imshow(format("eigenface_reconstruction_%d", num_components), reconstruction); + } else { + imwrite(format("%s/eigenface_reconstruction_%d.png", output_folder.c_str(), num_components), reconstruction); + } +} +~~~ + +10 Eigenvectors are obviously not sufficient for a good image reconstruction, 50 Eigenvectors may +already be sufficient to encode important facial features. You'll get a good reconstruction with +approximately 300 Eigenvectors for the AT&T Facedatabase. There are rule of thumbs how many +Eigenfaces you should choose for a successful face recognition, but it heavily depends on the input +data. @cite Zhao03 is the perfect point to start researching for this: + +![image](img/eigenface_reconstruction_opencv.png) + +Fisherfaces {#face_tutorial_fisherfaces} +----------- + +The Principal Component Analysis (PCA), which is the core of the Eigenfaces method, finds a linear +combination of features that maximizes the total variance in data. While this is clearly a powerful +way to represent data, it doesn't consider any classes and so a lot of discriminative information +*may* be lost when throwing components away. Imagine a situation where the variance in your data is +generated by an external source, let it be the light. The components identified by a PCA do not +necessarily contain any discriminative information at all, so the projected samples are smeared +together and a classification becomes impossible (see +[](http://www.bytefish.de/wiki/pca_lda_with_gnu_octave) +for an example). + +The Linear Discriminant Analysis performs a class-specific dimensionality reduction and was invented +by the great statistician [Sir R. A. Fisher](http://en.wikipedia.org/wiki/Ronald_Fisher). He +successfully used it for classifying flowers in his 1936 paper *The use of multiple measurements in +taxonomic problems* @cite Fisher36. In order to find the combination of features that separates best +between classes the Linear Discriminant Analysis maximizes the ratio of between-classes to +within-classes scatter, instead of maximizing the overall scatter. The idea is simple: same classes +should cluster tightly together, while different classes are as far away as possible from each other +in the lower-dimensional representation. This was also recognized by +[Belhumeur](http://www.cs.columbia.edu/~belhumeur/), [Hespanha](http://www.ece.ucsb.edu/~hespanha/) +and [Kriegman](http://cseweb.ucsd.edu/~kriegman/) and so they applied a Discriminant Analysis to +face recognition in @cite BHK97. + +### Algorithmic Description of Fisherfaces method {#face_tutorial_fisherfaces_algo} + +Let \f$X\f$ be a random vector with samples drawn from \f$c\f$ classes: + +\f[\begin{align*} + X & = & \{X_1,X_2,\ldots,X_c\} \\ + X_i & = & \{x_1, x_2, \ldots, x_n\} +\end{align*}\f] + +The scatter matrices \f$S_{B}\f$ and S\_{W} are calculated as: + +\f[\begin{align*} + S_{B} & = & \sum_{i=1}^{c} N_{i} (\mu_i - \mu)(\mu_i - \mu)^{T} \\ + S_{W} & = & \sum_{i=1}^{c} \sum_{x_{j} \in X_{i}} (x_j - \mu_i)(x_j - \mu_i)^{T} +\end{align*}\f] + +, where \f$\mu\f$ is the total mean: + +\f[\mu = \frac{1}{N} \sum_{i=1}^{N} x_i\f] + +And \f$\mu_i\f$ is the mean of class \f$i \in \{1,\ldots,c\}\f$: + +\f[\mu_i = \frac{1}{|X_i|} \sum_{x_j \in X_i} x_j\f] + +Fisher's classic algorithm now looks for a projection \f$W\f$, that maximizes the class separability +criterion: + +\f[W_{opt} = \operatorname{arg\,max}_{W} \frac{|W^T S_B W|}{|W^T S_W W|}\f] + +Following @cite BHK97, a solution for this optimization problem is given by solving the General +Eigenvalue Problem: + +\f[\begin{align*} + S_{B} v_{i} & = & \lambda_{i} S_w v_{i} \nonumber \\ + S_{W}^{-1} S_{B} v_{i} & = & \lambda_{i} v_{i} +\end{align*}\f] + +There's one problem left to solve: The rank of \f$S_{W}\f$ is at most \f$(N-c)\f$, with \f$N\f$ samples and \f$c\f$ +classes. In pattern recognition problems the number of samples \f$N\f$ is almost always samller than the +dimension of the input data (the number of pixels), so the scatter matrix \f$S_{W}\f$ becomes singular +(see @cite RJ91). In @cite BHK97 this was solved by performing a Principal Component Analysis on the +data and projecting the samples into the \f$(N-c)\f$-dimensional space. A Linear Discriminant Analysis +was then performed on the reduced data, because \f$S_{W}\f$ isn't singular anymore. + +The optimization problem can then be rewritten as: + +\f[\begin{align*} + W_{pca} & = & \operatorname{arg\,max}_{W} |W^T S_T W| \\ + W_{fld} & = & \operatorname{arg\,max}_{W} \frac{|W^T W_{pca}^T S_{B} W_{pca} W|}{|W^T W_{pca}^T S_{W} W_{pca} W|} +\end{align*}\f] + +The transformation matrix \f$W\f$, that projects a sample into the \f$(c-1)\f$-dimensional space is then +given by: + +\f[W = W_{fld}^{T} W_{pca}^{T}\f] + +### Fisherfaces in OpenCV {#face_tutorial_fisherfaces_use} + +The source code for this demo application is also available in the src folder coming with this +documentation: + +@include src/facerec_fisherfaces.cpp + +For this example I am going to use the Yale Facedatabase A, just because the plots are nicer. Each +Fisherface has the same length as an original image, thus it can be displayed as an image. The demo +shows (or saves) the first, at most 16 Fisherfaces: + +![image](img/fisherfaces_opencv.png) + +The Fisherfaces method learns a class-specific transformation matrix, so the they do not capture +illumination as obviously as the Eigenfaces method. The Discriminant Analysis instead finds the +facial features to discriminate between the persons. It's important to mention, that the performance +of the Fisherfaces heavily depends on the input data as well. Practically said: if you learn the +Fisherfaces for well-illuminated pictures only and you try to recognize faces in bad-illuminated +scenes, then method is likely to find the wrong components (just because those features may not be +predominant on bad illuminated images). This is somewhat logical, since the method had no chance to +learn the illumination. + +The Fisherfaces allow a reconstruction of the projected image, just like the Eigenfaces did. But +since we only identified the features to distinguish between subjects, you can't expect a nice +reconstruction of the original image. For the Fisherfaces method we'll project the sample image onto +each of the Fisherfaces instead. So you'll have a nice visualization, which feature each of the +Fisherfaces describes: + +~~~{cpp} +// Display or save the image reconstruction at some predefined steps: +for(int num_component = 0; num_component < min(16, W.cols); num_component++) { + // Slice the Fisherface from the model: + Mat ev = W.col(num_component); + Mat projection = subspaceProject(ev, mean, images[0].reshape(1,1)); + Mat reconstruction = subspaceReconstruct(ev, mean, projection); + // Normalize the result: + reconstruction = norm_0_255(reconstruction.reshape(1, images[0].rows)); + // Display or save: + if(argc == 2) { + imshow(format("fisherface_reconstruction_%d", num_component), reconstruction); + } else { + imwrite(format("%s/fisherface_reconstruction_%d.png", output_folder.c_str(), num_component), reconstruction); + } +} +~~~ + +The differences may be subtle for the human eyes, but you should be able to see some differences: + +![image](img/fisherface_reconstruction_opencv.png) + +Local Binary Patterns Histograms {#face_tutorial_lbph} +-------------------------------- + +Eigenfaces and Fisherfaces take a somewhat holistic approach to face recognition. You treat your +data as a vector somewhere in a high-dimensional image space. We all know high-dimensionality is +bad, so a lower-dimensional subspace is identified, where (probably) useful information is +preserved. The Eigenfaces approach maximizes the total scatter, which can lead to problems if the +variance is generated by an external source, because components with a maximum variance over all +classes aren't necessarily useful for classification (see +[](http://www.bytefish.de/wiki/pca_lda_with_gnu_octave)). +So to preserve some discriminative information we applied a Linear Discriminant Analysis and +optimized as described in the Fisherfaces method. The Fisherfaces method worked great... at least +for the constrained scenario we've assumed in our model. + +Now real life isn't perfect. You simply can't guarantee perfect light settings in your images or 10 +different images of a person. So what if there's only one image for each person? Our covariance +estimates for the subspace *may* be horribly wrong, so will the recognition. Remember the Eigenfaces +method had a 96% recognition rate on the AT&T Facedatabase? How many images do we actually need to +get such useful estimates? Here are the Rank-1 recognition rates of the Eigenfaces and Fisherfaces +method on the AT&T Facedatabase, which is a fairly easy image database: + +![image](img/at_database_small_sample_size.png) + +So in order to get good recognition rates you'll need at least 8(+-1) images for each person and the +Fisherfaces method doesn't really help here. The above experiment is a 10-fold cross validated +result carried out with the facerec framework at: +[](https://github.com/bytefish/facerec). This is not a +publication, so I won't back these figures with a deep mathematical analysis. Please have a look +into @cite KM01 for a detailed analysis of both methods, when it comes to small training datasets. + +So some research concentrated on extracting local features from images. The idea is to not look at +the whole image as a high-dimensional vector, but describe only local features of an object. The +features you extract this way will have a low-dimensionality implicitly. A fine idea! But you'll +soon observe the image representation we are given doesn't only suffer from illumination variations. +Think of things like scale, translation or rotation in images - your local description has to be at +least a bit robust against those things. Just like SIFT, the Local Binary Patterns methodology has +its roots in 2D texture analysis. The basic idea of Local Binary Patterns is to summarize the local +structure in an image by comparing each pixel with its neighborhood. Take a pixel as center and +threshold its neighbors against. If the intensity of the center pixel is greater-equal its neighbor, +then denote it with 1 and 0 if not. You'll end up with a binary number for each pixel, just like +11001111. So with 8 surrounding pixels you'll end up with 2\^8 possible combinations, called *Local +Binary Patterns* or sometimes referred to as *LBP codes*. The first LBP operator described in +literature actually used a fixed 3 x 3 neighborhood just like this: + +![image](img/lbp/lbp.png) + +### Algorithmic Description of LBPH method {#face_tutorial_lbph_algo} + +A more formal description of the LBP operator can be given as: + +\f[LBP(x_c, y_c) = \sum_{p=0}^{P-1} 2^p s(i_p - i_c)\f] + +, with \f$(x_c, y_c)\f$ as central pixel with intensity \f$i_c\f$; and \f$i_n\f$ being the intensity of the the +neighbor pixel. \f$s\f$ is the sign function defined as: + +\f[\begin{equation} +s(x) = +\begin{cases} +1 & \text{if \(x \geq 0\)}\\ +0 & \text{else} +\end{cases} +\end{equation}\f] + +This description enables you to capture very fine grained details in images. In fact the authors +were able to compete with state of the art results for texture classification. Soon after the +operator was published it was noted, that a fixed neighborhood fails to encode details differing in +scale. So the operator was extended to use a variable neighborhood in @cite AHP04. The idea is to +align an abritrary number of neighbors on a circle with a variable radius, which enables to capture +the following neighborhoods: + +![image](img/lbp/patterns.png) + +For a given Point \f$(x_c,y_c)\f$ the position of the neighbor \f$(x_p,y_p), p \in P\f$ can be calculated +by: + +\f[\begin{align*} +x_{p} & = & x_c + R \cos({\frac{2\pi p}{P}})\\ +y_{p} & = & y_c - R \sin({\frac{2\pi p}{P}}) +\end{align*}\f] + +Where \f$R\f$ is the radius of the circle and \f$P\f$ is the number of sample points. + +The operator is an extension to the original LBP codes, so it's sometimes called *Extended LBP* +(also referred to as *Circular LBP*) . If a points coordinate on the circle doesn't correspond to +image coordinates, the point get's interpolated. Computer science has a bunch of clever +interpolation schemes, the OpenCV implementation does a bilinear interpolation: + +\f[\begin{align*} +f(x,y) \approx \begin{bmatrix} + 1-x & x \end{bmatrix} \begin{bmatrix} + f(0,0) & f(0,1) \\ + f(1,0) & f(1,1) \end{bmatrix} \begin{bmatrix} + 1-y \\ + y \end{bmatrix}. +\end{align*}\f] + +By definition the LBP operator is robust against monotonic gray scale transformations. We can easily +verify this by looking at the LBP image of an artificially modified image (so you see what an LBP +image looks like!): + +![image](img/lbp/lbp_yale.jpg) + +So what's left to do is how to incorporate the spatial information in the face recognition model. +The representation proposed by Ahonen et. al @cite AHP04 is to divide the LBP image into \f$m\f$ local +regions and extract a histogram from each. The spatially enhanced feature vector is then obtained by +concatenating the local histograms (**not merging them**). These histograms are called *Local Binary +Patterns Histograms*. + +### Local Binary Patterns Histograms in OpenCV {#face_tutorial_lbph_use} + +The source code for this demo application is also available in the src folder coming with this +documentation: + +@include src/facerec_lbph.cpp + +Conclusion {#face_tutorial_conclusion} +---------- + +You've learned how to use the new FaceRecognizer in real applications. After reading the document +you also know how the algorithms work, so now it's time for you to experiment with the available +algorithms. Use them, improve them and let the OpenCV community participate! + +Credits {#face_tutorial_credits} +------- + +This document wouldn't be possible without the kind permission to use the face images of the *AT&T +Database of Faces* and the *Yale Facedatabase A/B*. + +### The Database of Faces {#face_tutorial_credits_db} + +__Important: when using these images, please give credit to "AT&T Laboratories, Cambridge."__ + +The Database of Faces, formerly *The ORL Database of Faces*, contains a set of face images taken +between April 1992 and April 1994. The database was used in the context of a face recognition +project carried out in collaboration with the Speech, Vision and Robotics Group of the Cambridge +University Engineering Department. + +There are ten different images of each of 40 distinct subjects. For some subjects, the images were +taken at different times, varying the lighting, facial expressions (open / closed eyes, smiling / +not smiling) and facial details (glasses / no glasses). All the images were taken against a dark +homogeneous background with the subjects in an upright, frontal position (with tolerance for some +side movement). + +The files are in PGM format. The size of each image is 92x112 pixels, with 256 grey levels per +pixel. The images are organised in 40 directories (one for each subject), which have names of the +form sX, where X indicates the subject number (between 1 and 40). In each of these directories, +there are ten different images of that subject, which have names of the form Y.pgm, where Y is the +image number for that subject (between 1 and 10). + +A copy of the database can be retrieved from: +[](http://www.cl.cam.ac.uk/research/dtg/attarchive/pub/data/att_faces.zip). + +### Yale Facedatabase A {#face_tutorial_credits_yalea} + +*With the permission of the authors I am allowed to show a small number of images (say subject 1 and +all the variations) and all images such as Fisherfaces and Eigenfaces from either Yale Facedatabase +A or the Yale Facedatabase B.* + +The Yale Face Database A (size 6.4MB) contains 165 grayscale images in GIF format of 15 individuals. +There are 11 images per subject, one per different facial expression or configuration: center-light, +w/glasses, happy, left-light, w/no glasses, normal, right-light, sad, sleepy, surprised, and wink. +(Source: +[](http://cvc.yale.edu/projects/yalefaces/yalefaces.html)) + +### Yale Facedatabase B {#face_tutorial_credits_yaleb} + +*With the permission of the authors I am allowed to show a small number of images (say subject 1 and +all the variations) and all images such as Fisherfaces and Eigenfaces from either Yale Facedatabase +A or the Yale Facedatabase B.* + +The extended Yale Face Database B contains 16128 images of 28 human subjects under 9 poses and 64 +illumination conditions. The data format of this database is the same as the Yale Face Database B. +Please refer to the homepage of the Yale Face Database B (or one copy of this page) for more +detailed information of the data format. + +You are free to use the extended Yale Face Database B for research purposes. All publications which +use this database should acknowledge the use of "the Exteded Yale Face Database B" and reference +Athinodoros Georghiades, Peter Belhumeur, and David Kriegman's paper, "From Few to Many: +Illumination Cone Models for Face Recognition under Variable Lighting and Pose", PAMI, 2001, +[[bibtex]](http://vision.ucsd.edu/~leekc/ExtYaleDatabase/athosref.html). + +The extended database as opposed to the original Yale Face Database B with 10 subjects was first +reported by Kuang-Chih Lee, Jeffrey Ho, and David Kriegman in "Acquiring Linear Subspaces for Face +Recognition under Variable Lighting, PAMI, May, 2005 +[[pdf]](http://vision.ucsd.edu/~leekc/papers/9pltsIEEE.pdf)." All test image data used in the +experiments are manually aligned, cropped, and then re-sized to 168x192 images. If you publish your +experimental results with the cropped images, please reference the PAMI2005 paper as well. (Source: +[](http://vision.ucsd.edu/~leekc/ExtYaleDatabase/ExtYaleB.html)) + +Appendix {#face_appendix} +-------- + +### Creating the CSV File {#face_tutorial_appendix_csv} + +You don't really want to create the CSV file by hand. I have prepared you a little Python script +`create_csv.py` (you find it at `src/create_csv.py` coming with this tutorial) that automatically +creates you a CSV file. If you have your images in hierarchie like this +(`/basepath//`): + +~~~~~~ +philipp@mango:~/facerec/data/at$ tree +. +|-- s1 +| |-- 1.pgm +| |-- ... +| |-- 10.pgm +|-- s2 +| |-- 1.pgm +| |-- ... +| |-- 10.pgm +... +|-- s40 +| |-- 1.pgm +| |-- ... +| |-- 10.pgm +~~~~~~ + +Then simply call `create_csv.py` with the path to the folder, just like this and you could save the +output: + +~~~~~~ +philipp@mango:~/facerec/data$ python create_csv.py +at/s13/2.pgm;0 +at/s13/7.pgm;0 +at/s13/6.pgm;0 +at/s13/9.pgm;0 +at/s13/5.pgm;0 +at/s13/3.pgm;0 +at/s13/4.pgm;0 +at/s13/10.pgm;0 +at/s13/8.pgm;0 +at/s13/1.pgm;0 +at/s17/2.pgm;1 +at/s17/7.pgm;1 +at/s17/6.pgm;1 +at/s17/9.pgm;1 +at/s17/5.pgm;1 +at/s17/3.pgm;1 +[...] +~~~~~~ + +Here is the script, if you can't find it: + +@verbinclude src/create_csv.py + +### Aligning Face Images {#face_tutorial_appendix_align} + +An accurate alignment of your image data is especially important in tasks like emotion detection, +were you need as much detail as possible. Believe me... You don't want to do this by hand. So I've +prepared you a tiny Python script. The code is really easy to use. To scale, rotate and crop the +face image you just need to call *CropFace(image, eye\_left, eye\_right, offset\_pct, dest\_sz)*, +where: + +- *eye\_left* is the position of the left eye +- *eye\_right* is the position of the right eye +- *offset\_pct* is the percent of the image you want to keep next to the eyes (horizontal, + vertical direction) +- *dest\_sz* is the size of the output image + +If you are using the same *offset\_pct* and *dest\_sz* for your images, they are all aligned at the +eyes. + +@verbinclude src/crop_face.py + +Imagine we are given [this photo of Arnold +Schwarzenegger](http://en.wikipedia.org/wiki/File:Arnold_Schwarzenegger_edit%28ws%29.jpg), which is +under a Public Domain license. The (x,y)-position of the eyes is approximately *(252,364)* for the +left and *(420,366)* for the right eye. Now you only need to define the horizontal offset, vertical +offset and the size your scaled, rotated & cropped face should have. + +Here are some examples: + +Configuration | Cropped, Scaled, Rotated Face +--------------------------------|------------------------------------------------------------------ +0.1 (10%), 0.1 (10%), (200,200) | ![](tutorial/gender_classification/arnie_10_10_200_200.jpg) +0.2 (20%), 0.2 (20%), (200,200) | ![](tutorial/gender_classification/arnie_20_20_200_200.jpg) +0.3 (30%), 0.3 (30%), (200,200) | ![](tutorial/gender_classification/arnie_30_30_200_200.jpg) +0.2 (20%), 0.2 (20%), (70,70) | ![](tutorial/gender_classification/arnie_20_20_70_70.jpg) + +### CSV for the AT&T Facedatabase {#face_tutorial_appendix_attcsv} + +@verbinclude etc/at.txt diff --git a/modules/face/doc/facerec/tutorial/facerec_gender_classification.rst b/modules/face/doc/tutorial/facerec_gender_classification.rst similarity index 100% rename from modules/face/doc/facerec/tutorial/facerec_gender_classification.rst rename to modules/face/doc/tutorial/facerec_gender_classification.rst diff --git a/modules/face/doc/facerec/tutorial/facerec_save_load.rst b/modules/face/doc/tutorial/facerec_save_load.rst similarity index 100% rename from modules/face/doc/facerec/tutorial/facerec_save_load.rst rename to modules/face/doc/tutorial/facerec_save_load.rst diff --git a/modules/face/doc/facerec/tutorial/facerec_video_recognition.rst b/modules/face/doc/tutorial/facerec_video_recognition.rst similarity index 100% rename from modules/face/doc/facerec/tutorial/facerec_video_recognition.rst rename to modules/face/doc/tutorial/facerec_video_recognition.rst diff --git a/modules/face/include/opencv2/face.hpp b/modules/face/include/opencv2/face.hpp index a5524f61c..be6decd5f 100644 --- a/modules/face/include/opencv2/face.hpp +++ b/modules/face/include/opencv2/face.hpp @@ -41,4 +41,12 @@ the use of this software, even if advised of the possibility of such damage. #include "opencv2/face/facerec.hpp" +/** +@defgroup face Face Recognition + +- @ref face_changelog +- @ref face_tutorial + +*/ + #endif diff --git a/modules/face/include/opencv2/face/facerec.hpp b/modules/face/include/opencv2/face/facerec.hpp index 605ebadfd..49291b46c 100644 --- a/modules/face/include/opencv2/face/facerec.hpp +++ b/modules/face/include/opencv2/face/facerec.hpp @@ -12,52 +12,403 @@ namespace cv { namespace face { +//! @addtogroup face +//! @{ + +/** @brief Abstract base class for all face recognition models + +All face recognition models in OpenCV are derived from the abstract base class FaceRecognizer, which +provides a unified access to all face recongition algorithms in OpenCV. + +### Description + +I'll go a bit more into detail explaining FaceRecognizer, because it doesn't look like a powerful +interface at first sight. But: Every FaceRecognizer is an Algorithm, so you can easily get/set all +model internals (if allowed by the implementation). Algorithm is a relatively new OpenCV concept, +which is available since the 2.4 release. I suggest you take a look at its description. + +Algorithm provides the following features for all derived classes: + +- So called “virtual constructor”. That is, each Algorithm derivative is registered at program + start and you can get the list of registered algorithms and create instance of a particular + algorithm by its name (see Algorithm::create). If you plan to add your own algorithms, it is + good practice to add a unique prefix to your algorithms to distinguish them from other + algorithms. +- Setting/Retrieving algorithm parameters by name. If you used video capturing functionality from + OpenCV highgui module, you are probably familar with cv::cvSetCaptureProperty, +ocvcvGetCaptureProperty, VideoCapture::set and VideoCapture::get. Algorithm provides similar + method where instead of integer id's you specify the parameter names as text Strings. See + Algorithm::set and Algorithm::get for details. +- Reading and writing parameters from/to XML or YAML files. Every Algorithm derivative can store + all its parameters and then read them back. There is no need to re-implement it each time. + +Moreover every FaceRecognizer supports the: + +- **Training** of a FaceRecognizer with FaceRecognizer::train on a given set of images (your face + database!). +- **Prediction** of a given sample image, that means a face. The image is given as a Mat. +- **Loading/Saving** the model state from/to a given XML or YAML. +- **Setting/Getting labels info**, that is stored as a string. String labels info is useful for + keeping names of the recognized people. + +@note When using the FaceRecognizer interface in combination with Python, please stick to Python 2. +Some underlying scripts like create_csv will not work in other versions, like Python 3. Setting the +Thresholds +++++++++++++++++++++++ + +Sometimes you run into the situation, when you want to apply a threshold on the prediction. A common +scenario in face recognition is to tell, whether a face belongs to the training dataset or if it is +unknown. You might wonder, why there's no public API in FaceRecognizer to set the threshold for the +prediction, but rest assured: It's supported. It just means there's no generic way in an abstract +class to provide an interface for setting/getting the thresholds of *every possible* FaceRecognizer +algorithm. The appropriate place to set the thresholds is in the constructor of the specific +FaceRecognizer and since every FaceRecognizer is a Algorithm (see above), you can get/set the +thresholds at runtime! + +Here is an example of setting a threshold for the Eigenfaces method, when creating the model: + +@code +// Let's say we want to keep 10 Eigenfaces and have a threshold value of 10.0 +int num_components = 10; +double threshold = 10.0; +// Then if you want to have a cv::FaceRecognizer with a confidence threshold, +// create the concrete implementation with the appropiate parameters: +Ptr model = createEigenFaceRecognizer(num_components, threshold); +@endcode + +Sometimes it's impossible to train the model, just to experiment with threshold values. Thanks to +Algorithm it's possible to set internal model thresholds during runtime. Let's see how we would +set/get the prediction for the Eigenface model, we've created above: + +@code +// The following line reads the threshold from the Eigenfaces model: +double current_threshold = model->getDouble("threshold"); +// And this line sets the threshold to 0.0: +model->set("threshold", 0.0); +@endcode + +If you've set the threshold to 0.0 as we did above, then: + +@code +// +Mat img = imread("person1/3.jpg", CV_LOAD_IMAGE_GRAYSCALE); +// Get a prediction from the model. Note: We've set a threshold of 0.0 above, +// since the distance is almost always larger than 0.0, you'll get -1 as +// label, which indicates, this face is unknown +int predicted_label = model->predict(img); +// ... +@endcode + +is going to yield -1 as predicted label, which states this face is unknown. + +### Getting the name of a FaceRecognizer + +Since every FaceRecognizer is a Algorithm, you can use Algorithm::name to get the name of a +FaceRecognizer: + +@code +// Create a FaceRecognizer: +Ptr model = createEigenFaceRecognizer(); +// And here's how to get its name: +String name = model->name(); +@endcode + + */ class CV_EXPORTS_W FaceRecognizer : public Algorithm { public: //! virtual destructor virtual ~FaceRecognizer() {} - // Trains a FaceRecognizer. + /** @brief Trains a FaceRecognizer with given data and associated labels. + + @param src The training images, that means the faces you want to learn. The data has to be + given as a vector\. + @param labels The labels corresponding to the images have to be given either as a vector\ + or a + + The following source code snippet shows you how to learn a Fisherfaces model on a given set of + images. The images are read with imread and pushed into a std::vector\. The labels of each + image are stored within a std::vector\ (you could also use a Mat of type CV_32SC1). Think of + the label as the subject (the person) this image belongs to, so same subjects (persons) should have + the same label. For the available FaceRecognizer you don't have to pay any attention to the order of + the labels, just make sure same persons have the same label: + + @code + // holds images and labels + vector images; + vector labels; + // images for first person + images.push_back(imread("person0/0.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(0); + images.push_back(imread("person0/1.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(0); + images.push_back(imread("person0/2.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(0); + // images for second person + images.push_back(imread("person1/0.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(1); + images.push_back(imread("person1/1.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(1); + images.push_back(imread("person1/2.jpg", CV_LOAD_IMAGE_GRAYSCALE)); labels.push_back(1); + @endcode + + Now that you have read some images, we can create a new FaceRecognizer. In this example I'll create + a Fisherfaces model and decide to keep all of the possible Fisherfaces: + + @code + // Create a new Fisherfaces model and retain all available Fisherfaces, + // this is the most common usage of this specific FaceRecognizer: + // + Ptr model = createFisherFaceRecognizer(); + @endcode + + And finally train it on the given dataset (the face images and labels): + + @code + // This is the common interface to train all of the available cv::FaceRecognizer + // implementations: + // + model->train(images, labels); + @endcode + */ CV_WRAP virtual void train(InputArrayOfArrays src, InputArray labels) = 0; - // Updates a FaceRecognizer. + /** @brief Updates a FaceRecognizer with given data and associated labels. + + @param src The training images, that means the faces you want to learn. The data has to be given + as a vector\. + @param labels The labels corresponding to the images have to be given either as a vector\ or + a + + This method updates a (probably trained) FaceRecognizer, but only if the algorithm supports it. The + Local Binary Patterns Histograms (LBPH) recognizer (see createLBPHFaceRecognizer) can be updated. + For the Eigenfaces and Fisherfaces method, this is algorithmically not possible and you have to + re-estimate the model with FaceRecognizer::train. In any case, a call to train empties the existing + model and learns a new model, while update does not delete any model data. + + @code + // Create a new LBPH model (it can be updated) and use the default parameters, + // this is the most common usage of this specific FaceRecognizer: + // + Ptr model = createLBPHFaceRecognizer(); + // This is the common interface to train all of the available cv::FaceRecognizer + // implementations: + // + model->train(images, labels); + // Some containers to hold new image: + vector newImages; + vector newLabels; + // You should add some images to the containers: + // + // ... + // + // Now updating the model is as easy as calling: + model->update(newImages,newLabels); + // This will preserve the old model data and extend the existing model + // with the new features extracted from newImages! + @endcode + + Calling update on an Eigenfaces model (see createEigenFaceRecognizer), which doesn't support + updating, will throw an error similar to: + + @code + OpenCV Error: The function/feature is not implemented (This FaceRecognizer (FaceRecognizer.Eigenfaces) does not support updating, you have to use FaceRecognizer::train to update it.) in update, file /home/philipp/git/opencv/modules/contrib/src/facerec.cpp, line 305 + terminate called after throwing an instance of 'cv::Exception' + @endcode + + @note The FaceRecognizer does not store your training images, because this would be very + memory intense and it's not the responsibility of te FaceRecognizer to do so. The caller is + responsible for maintaining the dataset, he want to work with. + */ CV_WRAP virtual void update(InputArrayOfArrays src, InputArray labels) = 0; - // Gets a prediction from a FaceRecognizer. + /** @overload */ virtual int predict(InputArray src) const = 0; - // Predicts the label and confidence for a given sample. + /** @brief Predicts a label and associated confidence (e.g. distance) for a given input image. + + @param src Sample image to get a prediction from. + @param label The predicted label for the given image. + @param confidence Associated confidence (e.g. distance) for the predicted label. + + The suffix const means that prediction does not affect the internal model state, so the method can + be safely called from within different threads. + + The following example shows how to get a prediction from a trained model: + + @code + using namespace cv; + // Do your initialization here (create the cv::FaceRecognizer model) ... + // ... + // Read in a sample image: + Mat img = imread("person1/3.jpg", CV_LOAD_IMAGE_GRAYSCALE); + // And get a prediction from the cv::FaceRecognizer: + int predicted = model->predict(img); + @endcode + + Or to get a prediction and the associated confidence (e.g. distance): + + @code + using namespace cv; + // Do your initialization here (create the cv::FaceRecognizer model) ... + // ... + Mat img = imread("person1/3.jpg", CV_LOAD_IMAGE_GRAYSCALE); + // Some variables for the predicted label and associated confidence (e.g. distance): + int predicted_label = -1; + double predicted_confidence = 0.0; + // Get the prediction and associated confidence from the model + model->predict(img, predicted_label, predicted_confidence); + @endcode + */ CV_WRAP virtual void predict(InputArray src, CV_OUT int &label, CV_OUT double &confidence) const = 0; - // Serializes this object to a given filename. + /** @brief Saves a FaceRecognizer and its model state. + + Saves this model to a given filename, either as XML or YAML. + @param filename The filename to store this FaceRecognizer to (either XML/YAML). + + Every FaceRecognizer overwrites FaceRecognizer::save(FileStorage& fs) to save the internal model + state. FaceRecognizer::save(const String& filename) saves the state of a model to the given + filename. + + The suffix const means that prediction does not affect the internal model state, so the method can + be safely called from within different threads. + */ CV_WRAP virtual void save(const String& filename) const = 0; - // Deserializes this object from a given filename. + /** @brief Loads a FaceRecognizer and its model state. + + Loads a persisted model and state from a given XML or YAML file . Every FaceRecognizer has to + overwrite FaceRecognizer::load(FileStorage& fs) to enable loading the model state. + FaceRecognizer::load(FileStorage& fs) in turn gets called by + FaceRecognizer::load(const String& filename), to ease saving a model. + */ CV_WRAP virtual void load(const String& filename) = 0; - // Serializes this object to a given cv::FileStorage. + /** @overload + Saves this model to a given FileStorage. + @param fs The FileStorage to store this FaceRecognizer to. + */ virtual void save(FileStorage& fs) const = 0; - // Deserializes this object from a given cv::FileStorage. + /** @overload */ virtual void load(const FileStorage& fs) = 0; - // Sets additional string info for the label + /** @brief Sets string info for the specified model's label. + + The string info is replaced by the provided value if it was set before for the specified label. + */ virtual void setLabelInfo(int label, const String& strInfo) = 0; - // Gets string info by label + /** @brief Gets string information by label. + + If an unknown label id is provided or there is no label information associated with the specified + label id the method returns an empty string. + */ virtual String getLabelInfo(int label) const = 0; - // Gets labels by string + /** @brief Gets vector of labels by string. + + The function searches for the labels containing the specified sub-string in the associated string + info. + */ virtual std::vector getLabelsByString(const String& str) const = 0; }; +/** +@param num_components The number of components (read: Eigenfaces) kept for this Principal +Component Analysis. As a hint: There's no rule how many components (read: Eigenfaces) should be +kept for good reconstruction capabilities. It is based on your input data, so experiment with the +number. Keeping 80 components should almost always be sufficient. +@param threshold The threshold applied in the prediction. + +### Notes: + +- Training and prediction must be done on grayscale images, use cvtColor to convert between the + color spaces. +- **THE EIGENFACES METHOD MAKES THE ASSUMPTION, THAT THE TRAINING AND TEST IMAGES ARE OF EQUAL + SIZE.** (caps-lock, because I got so many mails asking for this). You have to make sure your + input data has the correct shape, else a meaningful exception is thrown. Use resize to resize + the images. +- This model does not support updating. + +### Model internal data: + +- num_components see createEigenFaceRecognizer. +- threshold see createEigenFaceRecognizer. +- eigenvalues The eigenvalues for this Principal Component Analysis (ordered descending). +- eigenvectors The eigenvectors for this Principal Component Analysis (ordered by their + eigenvalue). +- mean The sample mean calculated from the training data. +- projections The projections of the training data. +- labels The threshold applied in the prediction. If the distance to the nearest neighbor is + larger than the threshold, this method returns -1. + */ CV_EXPORTS_W Ptr createEigenFaceRecognizer(int num_components = 0, double threshold = DBL_MAX); +/** +@param num_components The number of components (read: Fisherfaces) kept for this Linear +Discriminant Analysis with the Fisherfaces criterion. It's useful to keep all components, that +means the number of your classes c (read: subjects, persons you want to recognize). If you leave +this at the default (0) or set it to a value less-equal 0 or greater (c-1), it will be set to the +correct number (c-1) automatically. +@param threshold The threshold applied in the prediction. If the distance to the nearest neighbor +is larger than the threshold, this method returns -1. + +### Notes: + +- Training and prediction must be done on grayscale images, use cvtColor to convert between the + color spaces. +- **THE FISHERFACES METHOD MAKES THE ASSUMPTION, THAT THE TRAINING AND TEST IMAGES ARE OF EQUAL + SIZE.** (caps-lock, because I got so many mails asking for this). You have to make sure your + input data has the correct shape, else a meaningful exception is thrown. Use resize to resize + the images. +- This model does not support updating. + +### Model internal data: + +- num_components see createFisherFaceRecognizer. +- threshold see createFisherFaceRecognizer. +- eigenvalues The eigenvalues for this Linear Discriminant Analysis (ordered descending). +- eigenvectors The eigenvectors for this Linear Discriminant Analysis (ordered by their + eigenvalue). +- mean The sample mean calculated from the training data. +- projections The projections of the training data. +- labels The labels corresponding to the projections. + */ CV_EXPORTS_W Ptr createFisherFaceRecognizer(int num_components = 0, double threshold = DBL_MAX); +/** +@param radius The radius used for building the Circular Local Binary Pattern. The greater the +radius, the +@param neighbors The number of sample points to build a Circular Local Binary Pattern from. An +appropriate value is to use `8` sample points. Keep in mind: the more sample points you include, +the higher the computational cost. +@param grid_x The number of cells in the horizontal direction, 8 is a common value used in +publications. The more cells, the finer the grid, the higher the dimensionality of the resulting +feature vector. +@param grid_y The number of cells in the vertical direction, 8 is a common value used in +publications. The more cells, the finer the grid, the higher the dimensionality of the resulting +feature vector. +@param threshold The threshold applied in the prediction. If the distance to the nearest neighbor +is larger than the threshold, this method returns -1. + +### Notes: + +- The Circular Local Binary Patterns (used in training and prediction) expect the data given as + grayscale images, use cvtColor to convert between the color spaces. +- This model supports updating. + +### Model internal data: + +- radius see createLBPHFaceRecognizer. +- neighbors see createLBPHFaceRecognizer. +- grid_x see createLBPHFaceRecognizer. +- grid_y see createLBPHFaceRecognizer. +- threshold see createLBPHFaceRecognizer. +- histograms Local Binary Patterns Histograms calculated from the given training data (empty if + none was given). +- labels Labels corresponding to the calculated Local Binary Patterns Histograms. + */ CV_EXPORTS_W Ptr createLBPHFaceRecognizer(int radius=1, int neighbors=8, int grid_x=8, int grid_y=8, double threshold = DBL_MAX); bool initModule_facerec(); +//! @} + }} //namespace cv::face #endif //__OPENCV_FACEREC_HPP__ diff --git a/modules/latentsvm/doc/latentsvm.bib b/modules/latentsvm/doc/latentsvm.bib new file mode 100644 index 000000000..b25dbd2e1 --- /dev/null +++ b/modules/latentsvm/doc/latentsvm.bib @@ -0,0 +1,18 @@ +@article{Felzenszwalb2010a, + title={Object detection with discriminatively trained part-based models}, + author={Felzenszwalb, Pedro F and Girshick, Ross B and McAllester, David and Ramanan, Deva}, + journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume={32}, + number={9}, + pages={1627--1645}, + year={2010}, + publisher={IEEE} +} +@inproceedings{Felzenszwalb2010b, + title={Cascade object detection with deformable part models}, + author={Felzenszwalb, Pedro F and Girshick, Ross B and McAllester, David}, + booktitle={Computer vision and pattern recognition (CVPR), 2010 IEEE conference on}, + pages={2241--2248}, + year={2010}, + organization={IEEE} +} diff --git a/modules/latentsvm/include/opencv2/latentsvm.hpp b/modules/latentsvm/include/opencv2/latentsvm.hpp index 4f80ea7ce..40a153da3 100644 --- a/modules/latentsvm/include/opencv2/latentsvm.hpp +++ b/modules/latentsvm/include/opencv2/latentsvm.hpp @@ -55,12 +55,48 @@ #include #include +/** @defgroup latentsvm Latent SVM + +Discriminatively Trained Part Based Models for Object Detection +--------------------------------------------------------------- + +The object detector described below has been initially proposed by P.F. Felzenszwalb in +@cite Felzenszwalb2010a. It is based on a Dalal-Triggs detector that uses a single filter on histogram +of oriented gradients (HOG) features to represent an object category. This detector uses a sliding +window approach, where a filter is applied at all positions and scales of an image. The first +innovation is enriching the Dalal-Triggs model using a star-structured part-based model defined by a +"root" filter (analogous to the Dalal-Triggs filter) plus a set of parts filters and associated +deformation models. The score of one of star models at a particular position and scale within an +image is the score of the root filter at the given location plus the sum over parts of the maximum, +over placements of that part, of the part filter score on its location minus a deformation cost +easuring the deviation of the part from its ideal location relative to the root. Both root and part +filter scores are defined by the dot product between a filter (a set of weights) and a subwindow of +a feature pyramid computed from the input image. Another improvement is a representation of the +class of models by a mixture of star models. The score of a mixture model at a particular position +and scale is the maximum over components, of the score of that component model at the given +location. + +The detector was dramatically speeded-up with cascade algorithm proposed by P.F. Felzenszwalb in +@cite Felzenszwalb2010b. The algorithm prunes partial hypotheses using thresholds on their scores.The +basic idea of the algorithm is to use a hierarchy of models defined by an ordering of the original +model's parts. For a model with (n+1) parts, including the root, a sequence of (n+1) models is +obtained. The i-th model in this sequence is defined by the first i parts from the original model. +Using this hierarchy, low scoring hypotheses can be pruned after looking at the best configuration +of a subset of the parts. Hypotheses that score high under a weak model are evaluated further using +a richer model. + +In OpenCV there is an C++ implementation of Latent SVM. + +*/ + namespace cv { namespace lsvm { +/** @brief This is a C++ abstract class, it provides external user API to work with Latent SVM. + */ class CV_EXPORTS_W LSVMDetector { public: @@ -75,12 +111,32 @@ public: }; virtual bool isEmpty() const = 0; + + /** @brief Find rectangular regions in the given image that are likely to contain objects of loaded classes + (models) and corresponding confidence levels. + @param image An image. + @param objects The detections: rectangulars, scores and class IDs. + @param overlapThreshold Threshold for the non-maximum suppression algorithm. + */ virtual void detect(cv::Mat const &image, CV_OUT std::vector &objects, float overlapThreshold=0.5f ) = 0; + /** @brief Return the class (model) names that were passed in constructor or method load or extracted from + models filenames in those methods. + */ virtual std::vector const& getClassNames() const = 0; + + /** @brief Return a count of loaded models (classes). + */ virtual size_t getClassCount() const = 0; + /** @brief Load the trained models from given .xml files and return cv::Ptr\. + @param filenames A set of filenames storing the trained detectors (models). Each file contains one + model. See examples of such files here `/opencv_extra/testdata/cv/LSVMDetector/models_VOC2007/`. + @param classNames A set of trained models names. If it's empty then the name of each model will be + constructed from the name of file containing the model. E.g. the model stored in + "/home/user/cat.xml" will get the name "cat". + */ static cv::Ptr create(std::vector const &filenames, std::vector const &classNames = std::vector()); diff --git a/modules/line_descriptor/doc/line_descriptor.bib b/modules/line_descriptor/doc/line_descriptor.bib new file mode 100644 index 000000000..134ad8053 --- /dev/null +++ b/modules/line_descriptor/doc/line_descriptor.bib @@ -0,0 +1,30 @@ +@article{LBD, + title={An efficient and robust line segment matching approach based on LBD descriptor and pairwise geometric consistency}, + author={Zhang, Lilian and Koch, Reinhard}, + journal={Journal of Visual Communication and Image Representation}, + volume={24}, + number={7}, + pages={794--805}, + year={2013}, + publisher={Elsevier} +} + +@article{EDL, + title={LSD: A fast line segment detector with a false detection control}, + author={Von Gioi, R Grompone and Jakubowicz, Jeremie and Morel, Jean-Michel and Randall, Gregory}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + volume={32}, + number={4}, + pages={722--732}, + year={2010}, + publisher={Institute of Electrical and Electronics Engineers, Inc., 345 E. 47 th St. NY NY 10017-2394 USA} +} + +@inproceedings{MIH, + title={Fast search in hamming space with multi-index hashing}, + author={Norouzi, Mohammad and Punjani, Ali and Fleet, David J}, + booktitle={Computer Vision and Pattern Recognition (CVPR), 2012 IEEE Conference on}, + pages={3108--3115}, + year={2012}, + organization={IEEE} +} diff --git a/modules/line_descriptor/doc/tutorial.markdown b/modules/line_descriptor/doc/tutorial.markdown new file mode 100644 index 000000000..2b01abf7f --- /dev/null +++ b/modules/line_descriptor/doc/tutorial.markdown @@ -0,0 +1,418 @@ +Line Features Tutorial {#line_descriptor_tutorial} +====================== + +In this tutorial it will be shown how to: + +- use the *BinaryDescriptor* interface to extract lines and store them in *KeyLine* objects +- use the same interface to compute descriptors for every extracted line +- use the *BynaryDescriptorMatcher* to determine matches among descriptors obtained from different + images + +Lines extraction and descriptors computation +-------------------------------------------- + +In the following snippet of code, it is shown how to detect lines from an image. The LSD extractor +is initialized with *LSD\_REFINE\_ADV* option; remaining parameters are left to their default +values. A mask of ones is used in order to accept all extracted lines, which, at the end, are +displayed using random colors for octave 0. + +~~~{cpp} +#include + +#include "opencv2/core/utility.hpp" +#include "opencv2/core/private.hpp" +#include +#include +#include + +#include + +using namespace cv; +using namespace std; + +static const char* keys = +{ "{@image_path | | Image path }" }; + +static void help() +{ + cout << "\nThis example shows the functionalities of lines extraction " << "furnished by BinaryDescriptor class\n" + << "Please, run this sample using a command in the form\n" << "./example_line_descriptor_lines_extraction " << endl; +} + +int main( int argc, char** argv ) +{ + /* get parameters from comand line */ + CommandLineParser parser( argc, argv, keys ); + String image_path = parser.get( 0 ); + + if( image_path.empty() ) + { + help(); + return -1; + } + + /* load image */ + cv::Mat imageMat = imread( image_path, 1 ); + if( imageMat.data == NULL ) + { + std::cout << "Error, image could not be loaded. Please, check its path" << std::endl; + } + + /* create a ramdom binary mask */ + cv::Mat mask = Mat::ones( imageMat.size(), CV_8UC1 ); + + /* create a pointer to a BinaryDescriptor object with deafult parameters */ + Ptr bd = BinaryDescriptor::createBinaryDescriptor(); + + /* create a structure to store extracted lines */ + vector lines; + + /* extract lines */ + bd->detect( imageMat, lines, mask ); + + /* draw lines extracted from octave 0 */ + cv::Mat output = imageMat.clone(); + if( output.channels() == 1 ) + cvtColor( output, output, COLOR_GRAY2BGR ); + for ( size_t i = 0; i < lines.size(); i++ ) + { + KeyLine kl = lines[i]; + if( kl.octave == 0) + { + /* get a random color */ + int R = ( rand() % (int) ( 255 + 1 ) ); + int G = ( rand() % (int) ( 255 + 1 ) ); + int B = ( rand() % (int) ( 255 + 1 ) ); + + /* get extremes of line */ + Point pt1 = Point( kl.startPointX, kl.startPointY ); + Point pt2 = Point( kl.endPointX, kl.endPointY ); + + /* draw line */ + line( output, pt1, pt2, Scalar( B, G, R ), 5 ); + } + + } + + /* show lines on image */ + imshow( "Lines", output ); + waitKey(); +} +~~~ + +This is the result obtained for famous cameraman image: + +![alternate text](pics/lines_cameraman_edl.png) + +Another way to extract lines is using *LSDDetector* class; such class uses the LSD extractor to +compute lines. To obtain this result, it is sufficient to use the snippet code seen above, just +modifying it by the rows + +~~~{cpp} +/* create a pointer to an LSDDetector object */ +Ptr lsd = LSDDetector::createLSDDetector(); + +/* compute lines */ +std::vector keylines; +lsd->detect( imageMat, keylines, mask ); +~~~ + +Here's the result returned by LSD detector again on cameraman picture: + +![alternate text](pics/cameraman_lines2.png) + +Once keylines have been detected, it is possible to compute their descriptors as shown in the +following: + +~~~{cpp} +#include + +#include "opencv2/core/utility.hpp" +#include "opencv2/core/private.hpp" +#include +#include +#include + +#include + +using namespace cv; + +static const char* keys = +{ "{@image_path | | Image path }" }; + +static void help() +{ + std::cout << "\nThis example shows the functionalities of lines extraction " << "and descriptors computation furnished by BinaryDescriptor class\n" + << "Please, run this sample using a command in the form\n" << "./example_line_descriptor_compute_descriptors " + << std::endl; +} + +int main( int argc, char** argv ) +{ + /* get parameters from command line */ + CommandLineParser parser( argc, argv, keys ); + String image_path = parser.get( 0 ); + + if( image_path.empty() ) + { + help(); + return -1; + } + + /* load image */ + cv::Mat imageMat = imread( image_path, 1 ); + if( imageMat.data == NULL ) + { + std::cout << "Error, image could not be loaded. Please, check its path" << std::endl; + } + + /* create a binary mask */ + cv::Mat mask = Mat::ones( imageMat.size(), CV_8UC1 ); + + /* create a pointer to a BinaryDescriptor object with default parameters */ + Ptr bd = BinaryDescriptor::createBinaryDescriptor(); + + /* compute lines */ + std::vector keylines; + bd->detect( imageMat, keylines, mask ); + + /* compute descriptors */ + cv::Mat descriptors; + bd->compute( imageMat, keylines, descriptors ); + +} +~~~ + +Matching among descriptors +-------------------------- + +If we have extracted descriptors from two different images, it is possible to search for matches +among them. One way of doing it is matching exactly a descriptor to each input query descriptor, +choosing the one at closest distance: + +~~~{cpp} +#include + +#include "opencv2/core/utility.hpp" +#include "opencv2/core/private.hpp" +#include +#include +#include + +#include + +using namespace cv; + +static const char* keys = +{ "{@image_path1 | | Image path 1 }" + "{@image_path2 | | Image path 2 }" }; + +static void help() +{ + std::cout << "\nThis example shows the functionalities of lines extraction " << "and descriptors computation furnished by BinaryDescriptor class\n" + << "Please, run this sample using a command in the form\n" << "./example_line_descriptor_compute_descriptors " + << "" << std::endl; + +} + +int main( int argc, char** argv ) +{ + /* get parameters from comand line */ + CommandLineParser parser( argc, argv, keys ); + String image_path1 = parser.get( 0 ); + String image_path2 = parser.get( 1 ); + + if( image_path1.empty() || image_path2.empty() ) + { + help(); + return -1; + } + + /* load image */ + cv::Mat imageMat1 = imread( image_path1, 1 ); + cv::Mat imageMat2 = imread( image_path2, 1 ); + + waitKey(); + if( imageMat1.data == NULL || imageMat2.data == NULL ) + { + std::cout << "Error, images could not be loaded. Please, check their path" << std::endl; + } + + /* create binary masks */ + cv::Mat mask1 = Mat::ones( imageMat1.size(), CV_8UC1 ); + cv::Mat mask2 = Mat::ones( imageMat2.size(), CV_8UC1 ); + + /* create a pointer to a BinaryDescriptor object with default parameters */ + Ptr bd = BinaryDescriptor::createBinaryDescriptor(); + + /* compute lines */ + std::vector keylines1, keylines2; + bd->detect( imageMat1, keylines1, mask1 ); + bd->detect( imageMat2, keylines2, mask2 ); + + /* compute descriptors */ + cv::Mat descr1, descr2; + bd->compute( imageMat1, keylines1, descr1 ); + bd->compute( imageMat2, keylines2, descr2 ); + + /* create a BinaryDescriptorMatcher object */ + Ptr bdm = BinaryDescriptorMatcher::createBinaryDescriptorMatcher(); + + /* require match */ + std::vector matches; + bdm->match( descr1, descr2, matches ); + + /* plot matches */ + cv::Mat outImg; + std::vector mask( matches.size(), 1 ); + drawLineMatches( imageMat1, keylines1, imageMat2, keylines2, matches, outImg, Scalar::all( -1 ), Scalar::all( -1 ), mask, + DrawLinesMatchesFlags::DEFAULT ); + + imshow( "Matches", outImg ); + waitKey(); +} +~~~ + +Sometimes, we could be interested in searching for the closest *k* descriptors, given an input one. +This requires to modify slightly previous code: + +~~~{cpp} +/* prepare a structure to host matches */ +std::vector > matches; + +/* require knn match */ +bdm->knnMatch( descr1, descr2, matches, 6 ); +~~~ + +In the above example, the closest 6 descriptors are returned for every query. In some cases, we +could have a search radius and look for all descriptors distant at the most *r* from input query. +Previous code must me modified: + +~~~{cpp} +/* prepare a structure to host matches */ +std::vector > matches; + +/* compute matches */ +bdm->radiusMatch( queries, matches, 30 ); +~~~ + +Here's an example om matching among descriptors extratced from original cameraman image and its +downsampled (and blurred) version: + +![alternate text](pics/matching2.png) + +Querying internal database +-------------------------- + +The *BynaryDescriptorMatcher* class, owns an internal database that can be populated with +descriptors extracted from different images and queried using one of the modalities described in +previous section. Population of internal dataset can be done using the *add* function; such function +doesn't directly add new data to database, but it just stores it them locally. The real update +happens when function *train* is invoked or when any querying function is executed, since each of +them invokes *train* before querying. When queried, internal database not only returns required +descriptors, but, for every returned match, it is able to tell which image matched descriptor was +extracted from. An example of internal dataset usage is described in the following code; after +adding locally new descriptors, a radius search is invoked. This provokes local data to be +transferred to dataset, which, in turn, is then queried. + +~~~{cpp} +#include + +#include "opencv2/core/utility.hpp" +#include "opencv2/core/private.hpp" +#include +#include +#include + +#include +#include + +using namespace cv; + +static const std::string images[] = +{ "cameraman.jpg", "church.jpg", "church2.png", "einstein.jpg", "stuff.jpg" }; + +static const char* keys = +{ "{@image_path | | Image path }" }; + +static void help() +{ + std::cout << "\nThis example shows the functionalities of radius matching " << "Please, run this sample using a command in the form\n" + << "./example_line_descriptor_radius_matching /" << std::endl; +} + +int main( int argc, char** argv ) +{ + /* get parameters from comand line */ + CommandLineParser parser( argc, argv, keys ); + String pathToImages = parser.get( 0 ); + + /* create structures for hosting KeyLines and descriptors */ + int num_elements = sizeof ( images ) / sizeof ( images[0] ); + std::vector descriptorsMat; + std::vector > linesMat; + + /*create a pointer to a BinaryDescriptor object */ + Ptr bd = BinaryDescriptor::createBinaryDescriptor(); + + /* compute lines and descriptors */ + for ( int i = 0; i < num_elements; i++ ) + { + /* get path to image */ + std::stringstream image_path; + image_path << pathToImages << images[i]; + + /* load image */ + Mat loadedImage = imread( image_path.str().c_str(), 1 ); + if( loadedImage.data == NULL ) + { + std::cout << "Could not load images." << std::endl; + help(); + exit( -1 ); + } + + /* compute lines and descriptors */ + std::vector lines; + Mat computedDescr; + bd->detect( loadedImage, lines ); + bd->compute( loadedImage, lines, computedDescr ); + + descriptorsMat.push_back( computedDescr ); + linesMat.push_back( lines ); + + } + + /* compose a queries matrix */ + Mat queries; + for ( size_t j = 0; j < descriptorsMat.size(); j++ ) + { + if( descriptorsMat[j].rows >= 5 ) + queries.push_back( descriptorsMat[j].rowRange( 0, 5 ) ); + + else if( descriptorsMat[j].rows > 0 && descriptorsMat[j].rows < 5 ) + queries.push_back( descriptorsMat[j] ); + } + + std::cout << "It has been generated a matrix of " << queries.rows << " descriptors" << std::endl; + + /* create a BinaryDescriptorMatcher object */ + Ptr bdm = BinaryDescriptorMatcher::createBinaryDescriptorMatcher(); + + /* populate matcher */ + bdm->add( descriptorsMat ); + + /* compute matches */ + std::vector > matches; + bdm->radiusMatch( queries, matches, 30 ); + + /* print matches */ + for ( size_t q = 0; q < matches.size(); q++ ) + { + for ( size_t m = 0; m < matches[q].size(); m++ ) + { + DMatch dm = matches[q][m]; + std::cout << "Descriptor: " << q << " Image: " << dm.imgIdx << " Distance: " << dm.distance << std::endl; + } + } +} +~~~ diff --git a/modules/line_descriptor/include/opencv2/line_descriptor.hpp b/modules/line_descriptor/include/opencv2/line_descriptor.hpp index 42c990ef6..d01d1d577 100644 --- a/modules/line_descriptor/include/opencv2/line_descriptor.hpp +++ b/modules/line_descriptor/include/opencv2/line_descriptor.hpp @@ -44,6 +44,78 @@ #include "opencv2/line_descriptor/descriptor.hpp" +/** @defgroup line_descriptor Binary descriptors for lines extracted from an image + +Introduction +------------ + +One of the most challenging activities in computer vision is the extraction of useful information +from a given image. Such information, usually comes in the form of points that preserve some kind of +property (for instance, they are scale-invariant) and are actually representative of input image. + +The goal of this module is seeking a new kind of representative information inside an image and +providing the functionalities for its extraction and representation. In particular, differently from +previous methods for detection of relevant elements inside an image, lines are extracted in place of +points; a new class is defined ad hoc to summarize a line's properties, for reuse and plotting +purposes. + +Computation of binary descriptors +--------------------------------- + +To obtatin a binary descriptor representing a certain line detected from a certain octave of an +image, we first compute a non-binary descriptor as described in @cite LBD. Such algorithm works on +lines extracted using EDLine detector, as explained in @cite EDL. Given a line, we consider a +rectangular region centered at it and called *line support region (LSR)*. Such region is divided +into a set of bands \f$\{B_1, B_2, ..., B_m\}\f$, whose length equals the one of line. + +If we indicate with \f$\bf{d}_L\f$ the direction of line, the orthogonal and clockwise direction to line +\f$\bf{d}_{\perp}\f$ can be determined; these two directions, are used to construct a reference frame +centered in the middle point of line. The gradients of pixels \f$\bf{g'}\f$ inside LSR can be projected +to the newly determined frame, obtaining their local equivalent +\f$\bf{g'} = (\bf{g}^T \cdot \bf{d}_{\perp}, \bf{g}^T \cdot \bf{d}_L)^T \triangleq (\bf{g'}_{d_{\perp}}, \bf{g'}_{d_L})^T\f$. + +Later on, a Gaussian function is applied to all LSR's pixels along \f$\bf{d}_\perp\f$ direction; first, +we assign a global weighting coefficient \f$f_g(i) = (1/\sqrt{2\pi}\sigma_g)e^{-d^2_i/2\sigma^2_g}\f$ to +*i*-th row in LSR, where \f$d_i\f$ is the distance of *i*-th row from the center row in LSR, +\f$\sigma_g = 0.5(m \cdot w - 1)\f$ and \f$w\f$ is the width of bands (the same for every band). Secondly, +considering a band \f$B_j\f$ and its neighbor bands \f$B_{j-1}, B_{j+1}\f$, we assign a local weighting +\f$F_l(k) = (1/\sqrt{2\pi}\sigma_l)e^{-d'^2_k/2\sigma_l^2}\f$, where \f$d'_k\f$ is the distance of *k*-th +row from the center row in \f$B_j\f$ and \f$\sigma_l = w\f$. Using the global and local weights, we obtain, +at the same time, the reduction of role played by gradients far from line and of boundary effect, +respectively. + +Each band \f$B_j\f$ in LSR has an associated *band descriptor(BD)* which is computed considering +previous and next band (top and bottom bands are ignored when computing descriptor for first and +last band). Once each band has been assignen its BD, the LBD descriptor of line is simply given by + +\f[LBD = (BD_1^T, BD_2^T, ... , BD^T_m)^T.\f] + +To compute a band descriptor \f$B_j\f$, each *k*-th row in it is considered and the gradients in such +row are accumulated: + +\f[\begin{matrix} \bf{V1}^k_j = \lambda \sum\limits_{\bf{g}'_{d_\perp}>0}\bf{g}'_{d_\perp}, & \bf{V2}^k_j = \lambda \sum\limits_{\bf{g}'_{d_\perp}<0} -\bf{g}'_{d_\perp}, \\ \bf{V3}^k_j = \lambda \sum\limits_{\bf{g}'_{d_L}>0}\bf{g}'_{d_L}, & \bf{V4}^k_j = \lambda \sum\limits_{\bf{g}'_{d_L}<0} -\bf{g}'_{d_L}\end{matrix}.\f] + +with \f$\lambda = f_g(k)f_l(k)\f$. + +By stacking previous results, we obtain the *band description matrix (BDM)* + +\f[BDM_j = \left(\begin{matrix} \bf{V1}_j^1 & \bf{V1}_j^2 & \ldots & \bf{V1}_j^n \\ \bf{V2}_j^1 & \bf{V2}_j^2 & \ldots & \bf{V2}_j^n \\ \bf{V3}_j^1 & \bf{V3}_j^2 & \ldots & \bf{V3}_j^n \\ \bf{V4}_j^1 & \bf{V4}_j^2 & \ldots & \bf{V4}_j^n \end{matrix} \right) \in \mathbb{R}^{4\times n},\f] + +with \f$n\f$ the number of rows in band \f$B_j\f$: + +\f[n = \begin{cases} 2w, & j = 1||m; \\ 3w, & \mbox{else}. \end{cases}\f] + +Each \f$BD_j\f$ can be obtained using the standard deviation vector \f$S_j\f$ and mean vector \f$M_j\f$ of +\f$BDM_J\f$. Thus, finally: + +\f[LBD = (M_1^T, S_1^T, M_2^T, S_2^T, \ldots, M_m^T, S_m^T)^T \in \mathbb{R}^{8m}\f] + +Once the LBD has been obtained, it must be converted into a binary form. For such purpose, we +consider 32 possible pairs of BD inside it; each couple of BD is compared bit by bit and comparison +generates an 8 bit string. Concatenating 32 comparison strings, we get the 256-bit final binary +representation of a single LBD. +*/ + namespace cv { CV_EXPORTS bool initModule_line_descriptor( void ); diff --git a/modules/line_descriptor/include/opencv2/line_descriptor/descriptor.hpp b/modules/line_descriptor/include/opencv2/line_descriptor/descriptor.hpp index 28a900c88..263af06ea 100644 --- a/modules/line_descriptor/include/opencv2/line_descriptor/descriptor.hpp +++ b/modules/line_descriptor/include/opencv2/line_descriptor/descriptor.hpp @@ -71,177 +71,247 @@ namespace cv namespace line_descriptor { +//! @addtogroup line_descriptor +//! @{ + CV_EXPORTS bool initModule_line_descriptor(); +/** @brief A class to represent a line + +As aformentioned, it is been necessary to design a class that fully stores the information needed to +characterize completely a line and plot it on image it was extracted from, when required. + +*KeyLine* class has been created for such goal; it is mainly inspired to Feature2d's KeyPoint class, +since KeyLine shares some of *KeyPoint*'s fields, even if a part of them assumes a different +meaning, when speaking about lines. In particular: + +- the *class_id* field is used to gather lines extracted from different octaves which refer to + same line inside original image (such lines and the one they represent in original image share + the same *class_id* value) +- the *angle* field represents line's slope with respect to (positive) X axis +- the *pt* field represents line's midpoint +- the *response* field is computed as the ratio between the line's length and maximum between + image's width and height +- the *size* field is the area of the smallest rectangle containing line + +Apart from fields inspired to KeyPoint class, KeyLines stores information about extremes of line in +original image and in octave it was extracted from, about line's length and number of pixels it +covers. + */ struct CV_EXPORTS KeyLine { public: - /* orientation of the line */ - /*CV_PROP_RW*/ + /** orientation of the line */ float angle; - /* object ID, that can be used to cluster keylines by the line they represent */ - /*CV_PROP_RW*/ + /** object ID, that can be used to cluster keylines by the line they represent */ int class_id; - /* octave (pyramid layer), from which the keyline has been extracted */ - /*CV_PROP_RW*/ + /** octave (pyramid layer), from which the keyline has been extracted */ int octave; - /* coordinates of the middlepoint */ - /*CV_PROP_RW*/ + /** coordinates of the middlepoint */ Point2f pt; - /* the response, by which the strongest keylines have been selected. + /** the response, by which the strongest keylines have been selected. It's represented by the ratio between line's length and maximum between image's width and height */ - /*CV_PROP_RW*/ float response; - /* minimum area containing line */ - /*CV_PROP_RW*/ + /** minimum area containing line */ float size; - /* lines's extremes in original image */ - /*CV_PROP_RW*/ - float startPointX;/*CV_PROP_RW*/ - float startPointY;/*CV_PROP_RW*/ - float endPointX;/*CV_PROP_RW*/ + /** lines's extremes in original image */ + float startPointX; + float startPointY; + float endPointX; float endPointY; - /* line's extremes in image it was extracted from */ - /*CV_PROP_RW*/ - float sPointInOctaveX;/*CV_PROP_RW*/ - float sPointInOctaveY;/*CV_PROP_RW*/ - float ePointInOctaveX;/*CV_PROP_RW*/ + /** line's extremes in image it was extracted from */ + float sPointInOctaveX; + float sPointInOctaveY; + float ePointInOctaveX; float ePointInOctaveY; - /* the length of line */ - /*CV_PROP_RW*/ + /** the length of line */ float lineLength; - /* number of pixels covered by the line */ - /*CV_PROP_RW*/ + /** number of pixels covered by the line */ int numOfPixels; - /* constructor */ - /*CV_WRAP*/ + /** constructor */ KeyLine() { } }; +/** @brief Class implements both functionalities for detection of lines and computation of their +binary descriptor. + +Class' interface is mainly based on the ones of classical detectors and extractors, such as +Feature2d's @ref features2d_main and @ref features2d_match. Retrieved information about lines is +stored in line_descriptor::KeyLine objects. + */ class CV_EXPORTS BinaryDescriptor : public Algorithm { public: + /** @brief List of BinaryDescriptor parameters: + */ struct CV_EXPORTS Params { /*CV_WRAP*/ Params(); - /* the number of image octaves (default = 1) */ - /*CV_PROP_RW*/ + /** the number of image octaves (default = 1) */ + int numOfOctave_; - /* the width of band; (default: 7) */ - /*CV_PROP_RW*/ + /** the width of band; (default: 7) */ + int widthOfBand_; - /* image's reduction ratio in construction of Gaussian pyramids */ - /*CV_PROP_RW*/ + /** image's reduction ratio in construction of Gaussian pyramids */ int reductionRatio; - /*CV_PROP_RW*/ int ksize_; - /* read parameters from a FileNode object and store them (struct function) */ - /*CV_WRAP*/ + /** read parameters from a FileNode object and store them (struct function) */ void read( const FileNode& fn ); - /* store parameters to a FileStorage object (struct function) */ - /*CV_WRAP*/ + /** store parameters to a FileStorage object (struct function) */ void write( FileStorage& fs ) const; }; - /* constructor */ - /*CV_WRAP*/ + /** @brief Constructor + + @param parameters configuration parameters BinaryDescriptor::Params + + If no argument is provided, constructor sets default values (see comments in the code snippet in + previous section). Default values are strongly reccomended. + */ BinaryDescriptor( const BinaryDescriptor::Params ¶meters = BinaryDescriptor::Params() ); - /* constructors with smart pointers */ - /*CV_WRAP*/ - static Ptr createBinaryDescriptor();/*CV_WRAP*/ + /** @brief Create a BinaryDescriptor object with default parameters (or with the ones provided) + and return a smart pointer to it + */ + static Ptr createBinaryDescriptor(); static Ptr createBinaryDescriptor( Params parameters ); - /* destructor */ + /** destructor */ ~BinaryDescriptor(); - /* setters and getters */ - /*CV_WRAP*/ + /** @brief Get current number of octaves + */ int getNumOfOctaves();/*CV_WRAP*/ + /** @brief Set number of octaves + @param octaves number of octaves + */ void setNumOfOctaves( int octaves );/*CV_WRAP*/ + /** @brief Get current width of bands + */ int getWidthOfBand();/*CV_WRAP*/ + /** @brief Set width of bands + @param width width of bands + */ void setWidthOfBand( int width );/*CV_WRAP*/ + /** @brief Get current reduction ratio (used in Gaussian pyramids) + */ int getReductionRatio();/*CV_WRAP*/ + /** @brief Set reduction ratio (used in Gaussian pyramids) + @param rRatio reduction ratio + */ void setReductionRatio( int rRatio ); - /* reads parameters from a FileNode object and store them (class function ) */ - /*CV_WRAP*/ + /** @brief Read parameters from a FileNode object and store them + + @param fn source FileNode file + */ virtual void read( const cv::FileNode& fn ); - /* stores parameters to a FileStorage object (class function) */ - /*CV_WRAP*/ + /** @brief Store parameters to a FileStorage object + + @param fs output FileStorage file + */ virtual void write( cv::FileStorage& fs ) const; - /* requires line detection (only one image) */ - /*CV_WRAP*/ + /** @brief Requires line detection + + @param image input image + @param keypoints vector that will store extracted lines for one or more images + @param mask mask matrix to detect only KeyLines of interest + */ void detect( const Mat& image, CV_OUT std::vector& keypoints, const Mat& mask = Mat() ); - /* requires line detection (more than one image) */ - /*CV_WRAP*/ + /** @overload + + @param images input images + @param keylines set of vectors that will store extracted lines for one or more images + @param masks vector of mask matrices to detect only KeyLines of interest from each input image + */ void detect( const std::vector& images, std::vector >& keylines, const std::vector& masks = std::vector() ) const; - /* requires descriptors computation (only one image) */ - /*CV_WRAP*/ + /** @brief Requires descriptors computation + + @param image input image + @param keylines vector containing lines for which descriptors must be computed + @param descriptors + @param returnFloatDescr flag (when set to true, original non-binary descriptors are returned) + */ void compute( const Mat& image, CV_OUT CV_IN_OUT std::vector& keylines, CV_OUT Mat& descriptors, bool returnFloatDescr = false ) const; - /* requires descriptors computation (more than one image) */ - /*CV_WRAP*/ + /** @overload + + @param images input images + @param keylines set of vectors containing lines for which descriptors must be computed + @param descriptors + @param returnFloatDescr flag (when set to true, original non-binary descriptors are returned) + */ void compute( const std::vector& images, std::vector >& keylines, std::vector& descriptors, bool returnFloatDescr = false ) const; - /* returns descriptor size */ - /*CV_WRAP*/ + /** @brief Return descriptor size + */ int descriptorSize() const; - /* returns data type */ - /*CV_WRAP*/ + /** @brief Return data type + */ int descriptorType() const; - /* returns norm mode */ + /** returns norm mode */ /*CV_WRAP*/ int defaultNorm() const; - /* definition of operator () */ - //CV_WRAP_AS(detectAndCompute) + /** @brief Define operator '()' to perform detection of KeyLines and computation of descriptors in a row. + + @param image input image + @param mask mask matrix to select which lines in KeyLines must be accepted among the ones + extracted (used when *keylines* is not empty) + @param keylines vector that contains input lines (when filled, the detection part will be skipped + and input lines will be passed as input to the algorithm computing descriptors) + @param descriptors matrix that will store final descriptors + @param useProvidedKeyLines flag (when set to true, detection phase will be skipped and only + computation of descriptors will be executed, using lines provided in *keylines*) + @param returnFloatDescr flag (when set to true, original non-binary descriptors are returned) + */ virtual void operator()( InputArray image, InputArray mask, CV_OUT std::vector& keylines, OutputArray descriptors, bool useProvidedKeyLines = false, bool returnFloatDescr = false ) const; protected: - /* implementation of line detection */ + /** implementation of line detection */ virtual void detectImpl( const Mat& imageSrc, std::vector& keylines, const Mat& mask = Mat() ) const; - /* implementation of descriptors' computation */ + /** implementation of descriptors' computation */ virtual void computeImpl( const Mat& imageSrc, std::vector& keylines, Mat& descriptors, bool returnFloatDescr, bool useDetectionData ) const; - /* function inherited from Algorithm */ + /** function inherited from Algorithm */ AlgorithmInfo* info() const; private: - /* struct to represent lines extracted from an octave */ + /** struct to represent lines extracted from an octave */ struct OctaveLine { unsigned int octaveCount; //the octave which this line is detected @@ -349,7 +419,7 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm #define MLN10 2.30258509299404568402 #define log_gamma(x) ((x)>15.0?log_gamma_windschitl(x):log_gamma_lanczos(x)) - /* This class is used to detect lines from input image. + /** This class is used to detect lines from input image. * First, edges are extracted from input image following the method presented in Cihan Topal and * Cuneyt Akinlar's paper:"Edge Drawing: A Heuristic Approach to Robust Real-Time Edge Detection", 2010. * Then, lines are extracted from the edge image following the method presented in Cuneyt Akinlar and @@ -378,7 +448,7 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm */ int EDline( cv::Mat &image, LineChains &lines ); - /* extract line from image, and store them */ + /** extract line from image, and store them */ int EDline( cv::Mat &image ); cv::Mat dxImg_; //store the dxImg; @@ -454,7 +524,7 @@ class CV_EXPORTS BinaryDescriptor : public Algorithm double LeastSquaresLineFit_( unsigned int *xCors, unsigned int *yCors, unsigned int offsetS, unsigned int newOffsetS, unsigned int offsetE, std::vector &lineEquation ); - /* Validate line based on the Helmholtz principle, which basically states that + /** Validate line based on the Helmholtz principle, which basically states that * for a structure to be perceptually meaningful, the expectation of this structure * by chance must be very low. */ @@ -780,6 +850,23 @@ std::vector octaveImages; }; +/** +Lines extraction methodology +---------------------------- + +The lines extraction methodology described in the following is mainly based on @cite EDL. The +extraction starts with a Gaussian pyramid generated from an original image, downsampled N-1 times, +blurred N times, to obtain N layers (one for each octave), with layer 0 corresponding to input +image. Then, from each layer (octave) in the pyramid, lines are extracted using LSD algorithm. + +Differently from EDLine lines extractor used in original article, LSD furnishes information only +about lines extremes; thus, additional information regarding slope and equation of line are computed +via analytic methods. The number of pixels is obtained using *LineIterator*. Extracted lines are +returned in the form of KeyLine objects, but since extraction is based on a method different from +the one used in *BinaryDescriptor* class, data associated to a line's extremes in original image and +in octave it was extracted from, coincide. KeyLine's field *class_id* is used as an index to +indicate the order of extraction of a line inside a single octave. +*/ class CV_EXPORTS LSDDetector : public Algorithm { public: @@ -791,16 +878,27 @@ LSDDetector() } ; -/* constructor with smart pointer */ -/*CV_WRAP*/ +/** @brief Creates ad LSDDetector object, using smart pointers. + */ static Ptr createLSDDetector(); -/* requires line detection (only one image) */ -/*CV_WRAP*/ +/** @brief Detect lines inside an image. + +@param image input image +@param keypoints vector that will store extracted lines for one or more images +@param scale scale factor used in pyramids generation +@param numOctaves number of octaves inside pyramid +@param mask mask matrix to detect only KeyLines of interest + */ void detect( const Mat& image, CV_OUT std::vector& keypoints, int scale, int numOctaves, const Mat& mask = Mat() ); -/* requires line detection (more than one image) */ -/*CV_WRAP*/ +/** @overload +@param images input images +@param keylines set of vectors that will store extracted lines for one or more images +@param scale scale factor used in pyramids generation +@param numOctaves number of octaves inside pyramid +@param masks vector of mask matrices to detect only KeyLines of interest from each input image +*/ void detect( const std::vector& images, std::vector >& keylines, int scale, int numOctaves, const std::vector& masks = std::vector() ) const; @@ -819,72 +917,159 @@ protected: AlgorithmInfo* info() const; }; +/** @brief furnishes all functionalities for querying a dataset provided by user or internal to +class (that user must, anyway, populate) on the model of @ref features2d_match + + +Once descriptors have been extracted from an image (both they represent lines and points), it +becomes interesting to be able to match a descriptor with another one extracted from a different +image and representing the same line or point, seen from a differente perspective or on a different +scale. In reaching such goal, the main headache is designing an efficient search algorithm to +associate a query descriptor to one extracted from a dataset. In the following, a matching modality +based on *Multi-Index Hashing (MiHashing)* will be described. + +Multi-Index Hashing +------------------- + +The theory described in this section is based on @cite MIH. Given a dataset populated with binary +codes, each code is indexed *m* times into *m* different hash tables, according to *m* substrings it +has been divided into. Thus, given a query code, all the entries close to it at least in one +substring are returned by search as *neighbor candidates*. Returned entries are then checked for +validity by verifying that their full codes are not distant (in Hamming space) more than *r* bits +from query code. In details, each binary code **h** composed of *b* bits is divided into *m* +disjoint substrings \f$\mathbf{h}^{(1)}, ..., \mathbf{h}^{(m)}\f$, each with length +\f$\lfloor b/m \rfloor\f$ or \f$\lceil b/m \rceil\f$ bits. Formally, when two codes **h** and **g** differ +by at the most *r* bits, in at the least one of their *m* substrings they differ by at the most +\f$\lfloor r/m \rfloor\f$ bits. In particular, when \f$||\mathbf{h}-\mathbf{g}||_H \le r\f$ (where \f$||.||_H\f$ +is the Hamming norm), there must exist a substring *k* (with \f$1 \le k \le m\f$) such that + +\f[||\mathbf{h}^{(k)} - \mathbf{g}^{(k)}||_H \le \left\lfloor \frac{r}{m} \right\rfloor .\f] + +That means that if Hamming distance between each of the *m* substring is strictly greater than +\f$\lfloor r/m \rfloor\f$, then \f$||\mathbf{h}-\mathbf{g}||_H\f$ must be larger that *r* and that is a +contradiction. If the codes in dataset are divided into *m* substrings, then *m* tables will be +built. Given a query **q** with substrings \f$\{\mathbf{q}^{(i)}\}^m_{i=1}\f$, *i*-th hash table is +searched for entries distant at the most \f$\lfloor r/m \rfloor\f$ from \f$\mathbf{q}^{(i)}\f$ and a set of +candidates \f$\mathcal{N}_i(\mathbf{q})\f$ is obtained. The union of sets +\f$\mathcal{N}(\mathbf{q}) = \bigcup_i \mathcal{N}_i(\mathbf{q})\f$ is a superset of the *r*-neighbors +of **q**. Then, last step of algorithm is computing the Hamming distance between **q** and each +element in \f$\mathcal{N}(\mathbf{q})\f$, deleting the codes that are distant more that *r* from **q**. +*/ class CV_EXPORTS BinaryDescriptorMatcher : public Algorithm { public: -/* for every input descriptor, - find the best matching one (for a pair of images) */ -/*CV_WRAP*/ +/** @brief For every input query descriptor, retrieve the best matching one from a dataset provided from user +or from the one internal to class + +@param queryDescriptors query descriptors +@param trainDescriptors dataset of descriptors furnished by user +@param matches vector to host retrieved matches +@param mask mask to select which input descriptors must be matched to one in dataset + */ void match( const Mat& queryDescriptors, const Mat& trainDescriptors, std::vector& matches, const Mat& mask = Mat() ) const; -/* for every input descriptor, - find the best matching one (from one image to a set) */ -/*CV_WRAP*/ +/** @overload +@param queryDescriptors query descriptors +@param matches vector to host retrieved matches +@param masks vector of masks to select which input descriptors must be matched to one in dataset +(the *i*-th mask in vector indicates whether each input query can be matched with descriptors in +dataset relative to *i*-th image) +*/ void match( const Mat& queryDescriptors, std::vector& matches, const std::vector& masks = std::vector() ); -/* for every input descriptor, - find the best k matching descriptors (for a pair of images) */ -/*CV_WRAP*/ +/** @brief For every input query descriptor, retrieve the best *k* matching ones from a dataset provided from +user or from the one internal to class + +@param queryDescriptors query descriptors +@param trainDescriptors dataset of descriptors furnished by user +@param matches vector to host retrieved matches +@param k number of the closest descriptors to be returned for every input query +@param mask mask to select which input descriptors must be matched to ones in dataset +@param compactResult flag to obtain a compact result (if true, a vector that doesn't contain any +matches for a given query is not inserted in final result) + */ void knnMatch( const Mat& queryDescriptors, const Mat& trainDescriptors, std::vector >& matches, int k, const Mat& mask = Mat(), bool compactResult = false ) const; -/* for every input descriptor, - find the best k matching descriptors (from one image to a set) */ -/*CV_WRAP*/ +/** @overload +@param queryDescriptors query descriptors +@param matches vector to host retrieved matches +@param k number of the closest descriptors to be returned for every input query +@param masks vector of masks to select which input descriptors must be matched to ones in dataset +(the *i*-th mask in vector indicates whether each input query can be matched with descriptors in +dataset relative to *i*-th image) +@param compactResult flag to obtain a compact result (if true, a vector that doesn't contain any +matches for a given query is not inserted in final result) +*/ void knnMatch( const Mat& queryDescriptors, std::vector >& matches, int k, const std::vector& masks = std::vector(), bool compactResult = false ); -/* for every input descriptor, find all the ones falling in a - certain matching radius (for a pair of images) */ -/*CV_WRAP*/ +/** @brief For every input query descriptor, retrieve, from a dataset provided from user or from the one +internal to class, all the descriptors that are not further than *maxDist* from input query + +@param queryDescriptors query descriptors +@param trainDescriptors dataset of descriptors furnished by user +@param matches vector to host retrieved matches +@param maxDistance search radius +@param mask mask to select which input descriptors must be matched to ones in dataset +@param compactResult flag to obtain a compact result (if true, a vector that doesn't contain any +matches for a given query is not inserted in final result) + */ void radiusMatch( const Mat& queryDescriptors, const Mat& trainDescriptors, std::vector >& matches, float maxDistance, const Mat& mask = Mat(), bool compactResult = false ) const; -/* for every input descriptor, find all the ones falling in a - certain matching radius (from one image to a set) */ -/*CV_WRAP*/ +/** @overload +@param queryDescriptors query descriptors +@param matches vector to host retrieved matches +@param maxDistance search radius +@param masks vector of masks to select which input descriptors must be matched to ones in dataset +(the *i*-th mask in vector indicates whether each input query can be matched with descriptors in +dataset relative to *i*-th image) +@param compactResult flag to obtain a compact result (if true, a vector that doesn't contain any +matches for a given query is not inserted in final result) +*/ void radiusMatch( const Mat& queryDescriptors, std::vector >& matches, float maxDistance, const std::vector& masks = std::vector(), bool compactResult = false ); -/* store new descriptors to be inserted in dataset */ -/*CV_WRAP*/ +/** @brief Store locally new descriptors to be inserted in dataset, without updating dataset. + +@param descriptors matrices containing descriptors to be inserted into dataset + +@note Each matrix *i* in **descriptors** should contain descriptors relative to lines extracted from +*i*-th image. + */ void add( const std::vector& descriptors ); -/* store new descriptors into dataset */ -/*CV_WRAP*/ +/** @brief Update dataset by inserting into it all descriptors that were stored locally by *add* function. + +@note Every time this function is invoked, current dataset is deleted and locally stored descriptors +are inserted into dataset. The locally stored copy of just inserted descriptors is then removed. + */ void train(); -/* constructor with smart pointer */ -/*CV_WRAP*/ +/** @brief Create a BinaryDescriptorMatcher object and return a smart pointer to it. + */ static Ptr createBinaryDescriptorMatcher(); -/* clear dataset and internal data */ -/*CV_WRAP*/ +/** @brief Clear dataset and internal data + */ void clear(); -/* constructor */ -/*CV_WRAP*/ +/** @brief Constructor. + +The BinaryDescriptorMatcher constructed is able to store and manage 256-bits long entries. + */ BinaryDescriptorMatcher(); -/* destructor */ +/** destructor */ ~BinaryDescriptorMatcher() { } protected: -/* function inherited from Algorithm */ +/** function inherited from Algorithm */ AlgorithmInfo* info() const; private: @@ -892,23 +1077,23 @@ class BucketGroup { public: -/* constructor */ +/** constructor */ BucketGroup(); -/* destructor */ +/** destructor */ ~BucketGroup(); -/* insert data into the bucket */ +/** insert data into the bucket */ void insert( int subindex, UINT32 data ); -/* perform a query to the bucket */ +/** perform a query to the bucket */ UINT32* query( int subindex, int *size ); -/* utility functions */ +/** utility functions */ void insert_value( std::vector& vec, int index, UINT32 data ); void push_value( std::vector& vec, UINT32 Data ); -/* data fields */ +/** data fields */ UINT32 empty; std::vector group; @@ -920,60 +1105,60 @@ class SparseHashtable private: -/* Maximum bits per key before folding the table */ +/** Maximum bits per key before folding the table */ static const int MAX_B; -/* Bins (each bin is an Array object for duplicates of the same key) */ +/** Bins (each bin is an Array object for duplicates of the same key) */ BucketGroup *table; public: -/* constructor */ +/** constructor */ SparseHashtable(); -/* destructor */ +/** destructor */ ~SparseHashtable(); -/* initializer */ +/** initializer */ int init( int _b ); -/* insert data */ +/** insert data */ void insert( UINT64 index, UINT32 data ); -/* query data */ +/** query data */ UINT32* query( UINT64 index, int* size ); -/* Bits per index */ +/** Bits per index */ int b; -/* Number of bins */ +/** Number of bins */ UINT64 size; }; -/* class defining a sequence of bits */ +/** class defining a sequence of bits */ class bitarray { public: -/* pointer to bits sequence and sequence's length */ +/** pointer to bits sequence and sequence's length */ UINT32 *arr; UINT32 length; -/* constructor setting default values */ +/** constructor setting default values */ bitarray() { arr = NULL; length = 0; } -/* constructor setting sequence's length */ +/** constructor setting sequence's length */ bitarray( UINT64 _bits ) { init( _bits ); } -/* initializer of private fields */ +/** initializer of private fields */ void init( UINT64 _bits ) { length = (UINT32) ceil( _bits / 32.00 ); @@ -981,7 +1166,7 @@ arr = new UINT32[length]; erase(); } -/* destructor */ +/** destructor */ ~bitarray() { if( arr ) @@ -1003,7 +1188,7 @@ inline UINT8 get( UINT64 index ) return ( arr[index >> 5] & ( ( (UINT32) 0x01 ) << ( index % 32 ) ) ) != 0; } -/* reserve menory for an UINT32 */ +/** reserve menory for an UINT32 */ inline void erase() { memset( arr, 0, sizeof(UINT32) * length ); @@ -1015,91 +1200,91 @@ class Mihasher { public: -/* Bits per code */ +/** Bits per code */ int B; -/* B/8 */ +/** B/8 */ int B_over_8; -/* Bits per chunk (must be less than 64) */ +/** Bits per chunk (must be less than 64) */ int b; -/* Number of chunks */ +/** Number of chunks */ int m; -/* Number of chunks with b bits (have 1 bit more than others) */ +/** Number of chunks with b bits (have 1 bit more than others) */ int mplus; -/* Maximum hamming search radius (we use B/2 by default) */ +/** Maximum hamming search radius (we use B/2 by default) */ int D; -/* Maximum hamming search radius per substring */ +/** Maximum hamming search radius per substring */ int d; -/* Maximum results to return */ +/** Maximum results to return */ int K; -/* Number of codes */ +/** Number of codes */ UINT64 N; -/* Table of original full-length codes */ +/** Table of original full-length codes */ cv::Mat codes; -/* Counter for eliminating duplicate results (it is not thread safe) */ +/** Counter for eliminating duplicate results (it is not thread safe) */ bitarray *counter; -/* Array of m hashtables */ +/** Array of m hashtables */ SparseHashtable *H; -/* Volume of a b-bit Hamming ball with radius s (for s = 0 to d) */ +/** Volume of a b-bit Hamming ball with radius s (for s = 0 to d) */ UINT32 *xornum; -/* Used within generation of binary codes at a certain Hamming distance */ +/** Used within generation of binary codes at a certain Hamming distance */ int power[100]; -/* constructor */ +/** constructor */ Mihasher(); -/* desctructor */ +/** desctructor */ ~Mihasher(); -/* constructor 2 */ +/** constructor 2 */ Mihasher( int B, int m ); -/* K setter */ +/** K setter */ void setK( int K ); -/* populate tables */ +/** populate tables */ void populate( cv::Mat & codes, UINT32 N, int dim1codes ); -/* execute a batch query */ +/** execute a batch query */ void batchquery( UINT32 * results, UINT32 *numres/*, qstat *stats*/, const cv::Mat & q, UINT32 numq, int dim1queries ); private: -/* execute a single query */ +/** execute a single query */ void query( UINT32 * results, UINT32* numres/*, qstat *stats*/, UINT8 *q, UINT64 * chunks, UINT32 * res ); }; -/* retrieve Hamming distances */ +/** retrieve Hamming distances */ void checkKDistances( UINT32 * numres, int k, std::vector& k_distances, int row, int string_length ) const; -/* matrix to store new descriptors */ +/** matrix to store new descriptors */ Mat descriptorsMat; -/* map storing where each bunch of descriptors benins in DS */ +/** map storing where each bunch of descriptors benins in DS */ std::map indexesMap; -/* internal MiHaser representing dataset */ +/** internal MiHaser representing dataset */ Mihasher* dataset; -/* index from which next added descriptors' bunch must begin */ +/** index from which next added descriptors' bunch must begin */ int nextAddedIndex; -/* number of images whose descriptors are stored in DS */ +/** number of images whose descriptors are stored in DS */ int numImages; -/* number of descriptors in dataset */ +/** number of descriptors in dataset */ int descrInDS; }; @@ -1108,32 +1293,56 @@ int descrInDS; UTILITY FUNCTIONS -------------------------------------------------------------------------------------------- */ -/* struct for drawing options */ +/** struct for drawing options */ struct CV_EXPORTS DrawLinesMatchesFlags { enum { -DEFAULT = 0, // Output image matrix will be created (Mat::create), - // i.e. existing memory of output image may be reused. - // Two source images, matches, and single keylines - // will be drawn. -DRAW_OVER_OUTIMG = 1,// Output image matrix will not be -// created (using Mat::create). Matches will be drawn -// on existing content of output image. -NOT_DRAW_SINGLE_LINES = 2// Single keylines will not be drawn. +DEFAULT = 0, //!< Output image matrix will be created (Mat::create), + //!< i.e. existing memory of output image may be reused. + //!< Two source images, matches, and single keylines + //!< will be drawn. +DRAW_OVER_OUTIMG = 1,//!< Output image matrix will not be +//!< created (using Mat::create). Matches will be drawn +//!< on existing content of output image. +NOT_DRAW_SINGLE_LINES = 2//!< Single keylines will not be drawn. }; }; -/* draw matches between two images */ +/** @brief Draws the found matches of keylines from two images. + +@param img1 first image +@param keylines1 keylines extracted from first image +@param img2 second image +@param keylines2 keylines extracted from second image +@param matches1to2 vector of matches +@param outImg output matrix to draw on +@param matchColor drawing color for matches (chosen randomly in case of default value) +@param singleLineColor drawing color for keylines (chosen randomly in case of default value) +@param matchesMask mask to indicate which matches must be drawn +@param flags drawing flags, see DrawLinesMatchesFlags + +@note If both *matchColor* and *singleLineColor* are set to their default values, function draws +matched lines and line connecting them with same color + */ CV_EXPORTS void drawLineMatches( const Mat& img1, const std::vector& keylines1, const Mat& img2, const std::vector& keylines2, const std::vector& matches1to2, Mat& outImg, const Scalar& matchColor = Scalar::all( -1 ), const Scalar& singleLineColor = Scalar::all( -1 ), const std::vector& matchesMask = std::vector(), int flags = DrawLinesMatchesFlags::DEFAULT ); -/* draw extracted lines on original image */ +/** @brief Draws keylines. + +@param image input image +@param keylines keylines to be drawn +@param outImage output image to draw on +@param color color of lines to be drawn (if set to defaul value, color is chosen randomly) +@param flags drawing flags + */ CV_EXPORTS void drawKeylines( const Mat& image, const std::vector& keylines, Mat& outImage, const Scalar& color = Scalar::all( -1 ), int flags = DrawLinesMatchesFlags::DEFAULT ); +//! @} + } } diff --git a/modules/matlab/include/opencv2/matlab/bridge.hpp b/modules/matlab/include/opencv2/matlab/bridge.hpp index a98c06a82..4c75ac6cb 100644 --- a/modules/matlab/include/opencv2/matlab/bridge.hpp +++ b/modules/matlab/include/opencv2/matlab/bridge.hpp @@ -43,6 +43,9 @@ #ifndef OPENCV_BRIDGE_HPP_ #define OPENCV_BRIDGE_HPP_ +/** @defgroup matlab MATLAB Bridge +*/ + #include "mxarray.hpp" #include #include @@ -55,6 +58,9 @@ namespace cv { namespace bridge { +//! @addtogroup matlab +//! @{ + /* * Custom typedefs * Parsed names from the hdr_parser @@ -608,7 +614,7 @@ void deepCopyAndTranspose(const matlab::MxArray& in, cv::Mat& out) { //gemt('C', in.rows(), in.cols(), inp, in.rows(), outp, out.step1()); } - +//! @} } // namespace bridge } // namespace cv diff --git a/modules/matlab/include/opencv2/matlab/map.hpp b/modules/matlab/include/opencv2/matlab/map.hpp index 4d4fc649c..446652fb5 100644 --- a/modules/matlab/include/opencv2/matlab/map.hpp +++ b/modules/matlab/include/opencv2/matlab/map.hpp @@ -53,10 +53,16 @@ using Map = std::unordered_map; #else +//! @addtogroup matlab +//! @{ + // If we don't have C++11 support, we wrap another map implementation // in the same public API as unordered_map + +//! @cond IGNORED #include #include +//! @endcond template class Map { @@ -85,6 +91,8 @@ public: } }; +//! @} + } // namespace matlab #endif diff --git a/modules/matlab/include/opencv2/matlab/mxarray.hpp b/modules/matlab/include/opencv2/matlab/mxarray.hpp index ba8c64c49..820be93bd 100644 --- a/modules/matlab/include/opencv2/matlab/mxarray.hpp +++ b/modules/matlab/include/opencv2/matlab/mxarray.hpp @@ -76,6 +76,10 @@ extern "C" { #endif namespace matlab { + +//! @addtogroup matlab +//! @{ + // ---------------------------------------------------------------------------- // PREDECLARATIONS // ---------------------------------------------------------------------------- @@ -679,6 +683,8 @@ public: } }; +//! @} + } // namespace matlab #endif diff --git a/modules/matlab/include/opencv2/matlab/transpose.hpp b/modules/matlab/include/opencv2/matlab/transpose.hpp index 7331cd0ec..2f73297c4 100644 --- a/modules/matlab/include/opencv2/matlab/transpose.hpp +++ b/modules/matlab/include/opencv2/matlab/transpose.hpp @@ -43,6 +43,9 @@ #ifndef OPENCV_TRANSPOSE_HPP_ #define OPENCV_TRANSPOSE_HPP_ +//! @addtogroup matlab +//! @{ + template void transposeBlock(const size_t M, const size_t N, const InputScalar* src, size_t lda, OutputScalar* dst, size_t ldb) { InputScalar cache[16]; @@ -138,4 +141,7 @@ void transpose4x4(const float* src, size_t lda, float* dst, size_t } #endif + +//! @} + #endif diff --git a/modules/optflow/doc/optflow.bib b/modules/optflow/doc/optflow.bib new file mode 100644 index 000000000..e9ef09887 --- /dev/null +++ b/modules/optflow/doc/optflow.bib @@ -0,0 +1,39 @@ +@article{Bradski00, + title={Motion segmentation and pose recognition with motion history gradients}, + author={Bradski, Gary R and Davis, James W}, + journal={Machine Vision and Applications}, + volume={13}, + number={3}, + pages={174--184}, + year={2002}, + publisher={Springer} +} + +@inproceedings{Davis97, + title={The representation and recognition of human movement using temporal templates}, + author={Davis, James W and Bobick, Aaron F}, + booktitle={Computer Vision and Pattern Recognition, 1997. Proceedings., 1997 IEEE Computer Society Conference on}, + pages={928--934}, + year={1997}, + organization={IEEE} +} + +@inproceedings{Tao2012, + title={SimpleFlow: A Non-iterative, Sublinear Optical Flow Algorithm}, + author={Tao, Michael and Bai, Jiamin and Kohli, Pushmeet and Paris, Sylvain}, + booktitle={Computer Graphics Forum}, + volume={31}, + number={2pt1}, + pages={345--353}, + year={2012}, + organization={Wiley Online Library} +} + +@inproceedings{Weinzaepfel2013, + title={DeepFlow: Large displacement optical flow with deep matching}, + author={Weinzaepfel, Philippe and Revaud, Jerome and Harchaoui, Zaid and Schmid, Cordelia}, + booktitle={Computer Vision (ICCV), 2013 IEEE International Conference on}, + pages={1385--1392}, + year={2013}, + organization={IEEE} +} diff --git a/modules/optflow/include/opencv2/optflow.hpp b/modules/optflow/include/opencv2/optflow.hpp index e7427d95a..ecca2d82b 100644 --- a/modules/optflow/include/opencv2/optflow.hpp +++ b/modules/optflow/include/opencv2/optflow.hpp @@ -43,15 +43,67 @@ the use of this software, even if advised of the possibility of such damage. #include "opencv2/core.hpp" #include "opencv2/video.hpp" +/** +@defgroup optflow Optical Flow Algorithms + +Dense optical flow algorithms compute motion for each point: + +- cv::optflow::calcOpticalFlowSF +- cv::optflow::createOptFlow_DeepFlow + +Motion templates is alternative technique for detecting motion and computing its direction. +See samples/motempl.py. + +- cv::motempl::updateMotionHistory +- cv::motempl::calcMotionGradient +- cv::motempl::calcGlobalOrientation +- cv::motempl::segmentMotion + +Functions reading and writing .flo files in "Middlebury" format, see: + +- cv::optflow::readOpticalFlow +- cv::optflow::writeOpticalFlow + + */ + namespace cv { namespace optflow { -//! computes dense optical flow using Simple Flow algorithm +//! @addtogroup optflow +//! @{ + +/** @overload */ CV_EXPORTS_W void calcOpticalFlowSF( InputArray from, InputArray to, OutputArray flow, int layers, int averaging_block_size, int max_flow); +/** @brief Calculate an optical flow using "SimpleFlow" algorithm. + +@param from First 8-bit 3-channel image. +@param to Second 8-bit 3-channel image of the same size as prev +@param flow computed flow image that has the same size as prev and type CV_32FC2 +@param layers Number of layers +@param averaging_block_size Size of block through which we sum up when calculate cost function +for pixel +@param max_flow maximal flow that we search at each level +@param sigma_dist vector smooth spatial sigma parameter +@param sigma_color vector smooth color sigma parameter +@param postprocess_window window size for postprocess cross bilateral filter +@param sigma_dist_fix spatial sigma for postprocess cross bilateralf filter +@param sigma_color_fix color sigma for postprocess cross bilateral filter +@param occ_thr threshold for detecting occlusions +@param upscale_averaging_radius window size for bilateral upscale operation +@param upscale_sigma_dist spatial sigma for bilateral upscale operation +@param upscale_sigma_color color sigma for bilateral upscale operation +@param speed_up_thr threshold to detect point with irregular flow - where flow should be +recalculated after upscale + +See @cite Tao2012. And site of project - . + +@note + - An example using the simpleFlow algorithm can be found at samples/simpleflow_demo.cpp + */ CV_EXPORTS_W void calcOpticalFlowSF( InputArray from, InputArray to, OutputArray flow, int layers, int averaging_block_size, int max_flow, double sigma_dist, double sigma_color, int postprocess_window, @@ -59,24 +111,62 @@ CV_EXPORTS_W void calcOpticalFlowSF( InputArray from, InputArray to, OutputArray int upscale_averaging_radius, double upscale_sigma_dist, double upscale_sigma_color, double speed_up_thr ); -//! reads optical flow from a file, Middlebury format: -// http://vision.middlebury.edu/flow/code/flow-code/README.txt +/** @brief Read a .flo file + +@param path Path to the file to be loaded + +The function readOpticalFlow loads a flow field from a file and returns it as a single matrix. +Resulting Mat has a type CV_32FC2 - floating-point, 2-channel. First channel corresponds to the +flow in the horizontal direction (u), second - vertical (v). + */ CV_EXPORTS_W Mat readOpticalFlow( const String& path ); -//! writes optical flow to a file, Middlebury format -CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow ); +/** @brief Write a .flo to disk +@param path Path to the file to be written +@param flow Flow field to be stored +The function stores a flow field in a file, returns true on success, false otherwise. +The flow field must be a 2-channel, floating-point matrix (CV_32FC2). First channel corresponds +to the flow in the horizontal direction (u), second - vertical (v). + */ +CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow ); -// DeepFlow implementation, based on: -// P. Weinzaepfel, J. Revaud, Z. Harchaoui, and C. Schmid, “DeepFlow: Large Displacement Optical Flow with Deep Matching,” +/** @brief DeepFlow optical flow algorithm implementation. + +The class implements the DeepFlow optical flow algorithm described in @cite Weinzaepfel2013 . See +also . +Parameters - class fields - that may be modified after creating a class instance: +- member float alpha +Smoothness assumption weight +- member float delta +Color constancy assumption weight +- member float gamma +Gradient constancy weight +- member float sigma +Gaussian smoothing parameter +- member int minSize +Minimal dimension of an image in the pyramid (next, smaller images in the pyramid are generated +until one of the dimensions reaches this size) +- member float downscaleFactor +Scaling factor in the image pyramid (must be \< 1) +- member int fixedPointIterations +How many iterations on each level of the pyramid +- member int sorIterations +Iterations of Succesive Over-Relaxation (solver) +- member float omega +Relaxation factor in SOR + */ CV_EXPORTS_W Ptr createOptFlow_DeepFlow(); -// Additional interface to the SimpleFlow algorithm - calcOpticalFlowSF() +//! Additional interface to the SimpleFlow algorithm - calcOpticalFlowSF() CV_EXPORTS_W Ptr createOptFlow_SimpleFlow(); -// Additional interface to the Farneback's algorithm - calcOpticalFlowFarneback() +//! Additional interface to the Farneback's algorithm - calcOpticalFlowFarneback() CV_EXPORTS_W Ptr createOptFlow_Farneback(); + +//! @} + } //optflow } diff --git a/modules/optflow/include/opencv2/optflow/motempl.hpp b/modules/optflow/include/opencv2/optflow/motempl.hpp index 995b3d5be..b0994fad6 100644 --- a/modules/optflow/include/opencv2/optflow/motempl.hpp +++ b/modules/optflow/include/opencv2/optflow/motempl.hpp @@ -47,22 +47,100 @@ namespace cv namespace motempl { -//! updates motion history image using the current silhouette +//! @addtogroup optflow +//! @{ + +/** @brief Updates the motion history image by a moving silhouette. + +@param silhouette Silhouette mask that has non-zero pixels where the motion occurs. +@param mhi Motion history image that is updated by the function (single-channel, 32-bit +floating-point). +@param timestamp Current time in milliseconds or other units. +@param duration Maximal duration of the motion track in the same units as timestamp . + +The function updates the motion history image as follows: + +\f[\texttt{mhi} (x,y)= \forkthree{\texttt{timestamp}}{if \(\texttt{silhouette}(x,y) \ne 0\)}{0}{if \(\texttt{silhouette}(x,y) = 0\) and \(\texttt{mhi} < (\texttt{timestamp} - \texttt{duration})\)}{\texttt{mhi}(x,y)}{otherwise}\f] + +That is, MHI pixels where the motion occurs are set to the current timestamp , while the pixels +where the motion happened last time a long time ago are cleared. + +The function, together with calcMotionGradient and calcGlobalOrientation , implements a motion +templates technique described in @cite Davis97 and @cite Bradski00. + */ CV_EXPORTS_W void updateMotionHistory( InputArray silhouette, InputOutputArray mhi, double timestamp, double duration ); -//! computes the motion gradient orientation image from the motion history image +/** @brief Calculates a gradient orientation of a motion history image. + +@param mhi Motion history single-channel floating-point image. +@param mask Output mask image that has the type CV_8UC1 and the same size as mhi . Its non-zero +elements mark pixels where the motion gradient data is correct. +@param orientation Output motion gradient orientation image that has the same type and the same +size as mhi . Each pixel of the image is a motion orientation, from 0 to 360 degrees. +@param delta1 Minimal (or maximal) allowed difference between mhi values within a pixel +neighborhood. +@param delta2 Maximal (or minimal) allowed difference between mhi values within a pixel +neighborhood. That is, the function finds the minimum ( \f$m(x,y)\f$ ) and maximum ( \f$M(x,y)\f$ ) mhi +values over \f$3 \times 3\f$ neighborhood of each pixel and marks the motion orientation at \f$(x, y)\f$ +as valid only if +\f[\min ( \texttt{delta1} , \texttt{delta2} ) \le M(x,y)-m(x,y) \le \max ( \texttt{delta1} , \texttt{delta2} ).\f] +@param apertureSize Aperture size of the Sobel operator. + +The function calculates a gradient orientation at each pixel \f$(x, y)\f$ as: + +\f[\texttt{orientation} (x,y)= \arctan{\frac{d\texttt{mhi}/dy}{d\texttt{mhi}/dx}}\f] + +In fact, fastAtan2 and phase are used so that the computed angle is measured in degrees and covers +the full range 0..360. Also, the mask is filled to indicate pixels where the computed angle is +valid. + +@note + - (Python) An example on how to perform a motion template technique can be found at + opencv_source_code/samples/python2/motempl.py + */ CV_EXPORTS_W void calcMotionGradient( InputArray mhi, OutputArray mask, OutputArray orientation, double delta1, double delta2, int apertureSize = 3 ); -//! computes the global orientation of the selected motion history image part +/** @brief Calculates a global motion orientation in a selected region. + +@param orientation Motion gradient orientation image calculated by the function calcMotionGradient +@param mask Mask image. It may be a conjunction of a valid gradient mask, also calculated by +calcMotionGradient , and the mask of a region whose direction needs to be calculated. +@param mhi Motion history image calculated by updateMotionHistory . +@param timestamp Timestamp passed to updateMotionHistory . +@param duration Maximum duration of a motion track in milliseconds, passed to updateMotionHistory + +The function calculates an average motion direction in the selected region and returns the angle +between 0 degrees and 360 degrees. The average direction is computed from the weighted orientation +histogram, where a recent motion has a larger weight and the motion occurred in the past has a +smaller weight, as recorded in mhi . + */ CV_EXPORTS_W double calcGlobalOrientation( InputArray orientation, InputArray mask, InputArray mhi, double timestamp, double duration ); +/** @brief Splits a motion history image into a few parts corresponding to separate independent motions (for +example, left hand, right hand). + +@param mhi Motion history image. +@param segmask Image where the found mask should be stored, single-channel, 32-bit floating-point. +@param boundingRects Vector containing ROIs of motion connected components. +@param timestamp Current time in milliseconds or other units. +@param segThresh Segmentation threshold that is recommended to be equal to the interval between +motion history "steps" or greater. + +The function finds all of the motion segments and marks them in segmask with individual values +(1,2,...). It also computes a vector with ROIs of motion connected components. After that the motion +direction for every component can be calculated with calcGlobalOrientation using the extracted mask +of the particular component. + */ CV_EXPORTS_W void segmentMotion( InputArray mhi, OutputArray segmask, CV_OUT std::vector& boundingRects, double timestamp, double segThresh ); + +//! @} + } } diff --git a/modules/reg/doc/reg.bib b/modules/reg/doc/reg.bib new file mode 100644 index 000000000..10b142928 --- /dev/null +++ b/modules/reg/doc/reg.bib @@ -0,0 +1,10 @@ +@article{Szeliski06, + title={Image alignment and stitching: A tutorial}, + author={Szeliski, Richard}, + journal={Foundations and Trends{\textregistered} in Computer Graphics and Vision}, + volume={2}, + number={1}, + pages={1--104}, + year={2006}, + publisher={Now Publishers Inc.} +} diff --git a/modules/reg/include/opencv2/reg/map.hpp b/modules/reg/include/opencv2/reg/map.hpp index 03309c367..2ae1804bf 100644 --- a/modules/reg/include/opencv2/reg/map.hpp +++ b/modules/reg/include/opencv2/reg/map.hpp @@ -40,12 +40,86 @@ #include // Basic OpenCV structures (cv::Mat, Scalar) +/** @defgroup reg Image Registration + +The Registration module implements parametric image registration. The implemented method is direct +alignment, that is, it uses directly the pixel values for calculating the registration between a +pair of images, as opposed to feature-based registration. The implementation follows essentially the +corresponding part of @cite Szeliski06. + +Feature based methods have some advantages over pixel based methods when we are trying to register +pictures that have been shoot under different lighting conditions or exposition times, or when the +images overlap only partially. On the other hand, the main advantage of pixel-based methods when +compared to feature based methods is their better precision for some pictures (those shoot under +similar lighting conditions and that have a significative overlap), due to the fact that we are +using all the information available in the image, which allows us to achieve subpixel accuracy. This +is particularly important for certain applications like multi-frame denoising or super-resolution. + +In fact, pixel and feature registration methods can complement each other: an application could +first obtain a coarse registration using features and then refine the registration using a pixel +based method on the overlapping area of the images. The code developed allows this use case. + +The module implements classes derived from the abstract classes cv::reg::Map or cv::reg::Mapper. The +former models a coordinate transformation between two reference frames, while the later encapsulates +a way of invoking a method that calculates a Map between two images. Although the objective has been +to implement pixel based methods, the module can be extended to support other methods that can +calculate transformations between images (feature methods, optical flow, etc.). + +Each class derived from Map implements a motion model, as follows: + +- MapShift: Models a simple translation +- MapAffine: Models an affine transformation +- MapProjec: Models a projective transformation + +MapProject can also be used to model affine motion or translations, but some operations on it are +more costly, and that is the reason for defining the other two classes. + +The classes derived from Mapper are + +- MapperGradShift: Gradient based alignment for calculating translations. It produces a MapShift + (two parameters that correspond to the shift vector). +- MapperGradEuclid: Gradient based alignment for euclidean motions, that is, rotations and + translations. It calculates three parameters (angle and shift vector), although the result is + stored in a MapAffine object for convenience. +- MapperGradSimilar: Gradient based alignment for calculating similarities, which adds scaling to + the euclidean motion. It calculates four parameters (two for the anti-symmetric matrix and two + for the shift vector), although the result is stored in a MapAffine object for better + convenience. +- MapperGradAffine: Gradient based alignment for an affine motion model. The number of parameters + is six and the result is stored in a MapAffine object. +- MapperGradProj: Gradient based alignment for calculating projective transformations. The number + of parameters is eight and the result is stored in a MapProject object. +- MapperPyramid: It implements hyerarchical motion estimation using a Gaussian pyramid. Its + constructor accepts as argument any other object that implements the Mapper interface, and it is + that mapper the one called by MapperPyramid for each scale of the pyramid. + +If the motion between the images is not very small, the normal way of using these classes is to +create a MapperGrad\* object and use it as input to create a MapperPyramid, which in turn is called +to perform the calculation. However, if the motion between the images is small enough, we can use +directly the MapperGrad\* classes. Another possibility is to use first a feature based method to +perform a coarse registration and then do a refinement through MapperPyramid or directly a +MapperGrad\* object. The "calculate" method of the mappers accepts an initial estimation of the +motion as input. + +When deciding which MapperGrad to use we must take into account that mappers with more parameters +can handle more complex motions, but involve more calculations and are therefore slower. Also, if we +are confident on the motion model that is followed by the sequence, increasing the number of +parameters beyond what we need will decrease the accuracy: it is better to use the least number of +degrees of freedom that we can. + +In the module tests there are examples that show how to register a pair of images using any of the +implemented mappers. +*/ + namespace cv { namespace reg { +//! @addtogroup reg +//! @{ + +/** @brief Base class for modelling a Map between two images. -/*! - * Defines a map T from one coordinate system to another +The class is only used to define the common interface for any possible map. */ class CV_EXPORTS Map { @@ -94,6 +168,7 @@ public: virtual void scale(double factor) = 0; }; +//! @} }} // namespace cv::reg diff --git a/modules/reg/include/opencv2/reg/mapaffine.hpp b/modules/reg/include/opencv2/reg/mapaffine.hpp index a640263bf..1c9132633 100644 --- a/modules/reg/include/opencv2/reg/mapaffine.hpp +++ b/modules/reg/include/opencv2/reg/mapaffine.hpp @@ -43,6 +43,8 @@ namespace cv { namespace reg { +//! @addtogroup reg +//! @{ /*! * Defines an affine transformation @@ -96,6 +98,7 @@ private: cv::Vec shift_; }; +//! @} }} // namespace cv::reg diff --git a/modules/reg/include/opencv2/reg/mapper.hpp b/modules/reg/include/opencv2/reg/mapper.hpp index 6da086ee0..8abadd14e 100644 --- a/modules/reg/include/opencv2/reg/mapper.hpp +++ b/modules/reg/include/opencv2/reg/mapper.hpp @@ -44,8 +44,12 @@ namespace cv { namespace reg { -/* - * Encapsulates ways of calculating mappings between two images +//! @addtogroup reg +//! @{ + +/** @brief Base class for modelling an algorithm for calculating a + +The class is only used to define the common interface for any possible mapping algorithm. */ class CV_EXPORTS Mapper { @@ -101,6 +105,7 @@ protected: } }; +//! @} }} // namespace cv::reg diff --git a/modules/reg/include/opencv2/reg/mappergradaffine.hpp b/modules/reg/include/opencv2/reg/mappergradaffine.hpp index 9194008e1..08d539711 100644 --- a/modules/reg/include/opencv2/reg/mappergradaffine.hpp +++ b/modules/reg/include/opencv2/reg/mappergradaffine.hpp @@ -43,6 +43,8 @@ namespace cv { namespace reg { +//! @addtogroup reg +//! @{ /*! * Mapper for affine motion @@ -58,6 +60,7 @@ public: cv::Ptr getMap(void) const; }; +//! @} }} // namespace cv::reg diff --git a/modules/reg/include/opencv2/reg/mappergradeuclid.hpp b/modules/reg/include/opencv2/reg/mappergradeuclid.hpp index a1fb12135..29c49cb65 100644 --- a/modules/reg/include/opencv2/reg/mappergradeuclid.hpp +++ b/modules/reg/include/opencv2/reg/mappergradeuclid.hpp @@ -43,6 +43,9 @@ namespace cv { namespace reg { +//! @addtogroup reg +//! @{ + /*! * Mapper for euclidean motion: rotation plus shift */ @@ -57,6 +60,7 @@ public: cv::Ptr getMap(void) const; }; +//! @} }} // namespace cv::reg diff --git a/modules/reg/include/opencv2/reg/mappergradproj.hpp b/modules/reg/include/opencv2/reg/mappergradproj.hpp index 053875caf..f1721e820 100644 --- a/modules/reg/include/opencv2/reg/mappergradproj.hpp +++ b/modules/reg/include/opencv2/reg/mappergradproj.hpp @@ -43,6 +43,8 @@ namespace cv { namespace reg { +//! @addtogroup reg +//! @{ /*! * Gradient mapper for a projective transformation @@ -58,6 +60,7 @@ public: cv::Ptr getMap(void) const; }; +//! @} }} // namespace cv::reg diff --git a/modules/reg/include/opencv2/reg/mappergradshift.hpp b/modules/reg/include/opencv2/reg/mappergradshift.hpp index 08e6b41dc..a9f75b3a6 100644 --- a/modules/reg/include/opencv2/reg/mappergradshift.hpp +++ b/modules/reg/include/opencv2/reg/mappergradshift.hpp @@ -43,6 +43,8 @@ namespace cv { namespace reg { +//! @addtogroup reg +//! @{ /*! * Gradient mapper for a translation @@ -58,6 +60,7 @@ public: cv::Ptr getMap(void) const; }; +//! @} }} // namespace cv::reg diff --git a/modules/reg/include/opencv2/reg/mappergradsimilar.hpp b/modules/reg/include/opencv2/reg/mappergradsimilar.hpp index 596554fe7..ea45ab912 100644 --- a/modules/reg/include/opencv2/reg/mappergradsimilar.hpp +++ b/modules/reg/include/opencv2/reg/mappergradsimilar.hpp @@ -43,6 +43,8 @@ namespace cv { namespace reg { +//! @addtogroup reg +//! @{ /*! * Calculates a similarity transformation between to images (scale, rotation, and shift) @@ -58,6 +60,7 @@ public: cv::Ptr getMap(void) const; }; +//! @} }} // namespace cv::reg diff --git a/modules/reg/include/opencv2/reg/mapperpyramid.hpp b/modules/reg/include/opencv2/reg/mapperpyramid.hpp index a5eb6ed85..33440bdae 100644 --- a/modules/reg/include/opencv2/reg/mapperpyramid.hpp +++ b/modules/reg/include/opencv2/reg/mapperpyramid.hpp @@ -44,6 +44,8 @@ namespace cv { namespace reg { +//! @addtogroup reg +//! @{ /*! * Calculates a map using a gaussian pyramid @@ -69,7 +71,7 @@ private: const Mapper& baseMapper_; /*!< Mapper used in inner level */ }; - +//! @} }} // namespace cv::reg diff --git a/modules/reg/include/opencv2/reg/mapprojec.hpp b/modules/reg/include/opencv2/reg/mapprojec.hpp index cceb25e72..57ef14600 100644 --- a/modules/reg/include/opencv2/reg/mapprojec.hpp +++ b/modules/reg/include/opencv2/reg/mapprojec.hpp @@ -44,6 +44,8 @@ namespace cv { namespace reg { +//! @addtogroup reg +//! @{ /*! * Defines an transformation that consists on a projective transformation @@ -96,6 +98,7 @@ private: cv::Matx projTr_; /*< Projection matrix */ }; +//! @} }} // namespace cv::reg diff --git a/modules/reg/include/opencv2/reg/mapshift.hpp b/modules/reg/include/opencv2/reg/mapshift.hpp index 77906e211..e5f54a4ce 100644 --- a/modules/reg/include/opencv2/reg/mapshift.hpp +++ b/modules/reg/include/opencv2/reg/mapshift.hpp @@ -44,6 +44,8 @@ namespace cv { namespace reg { +//! @addtogroup reg +//! @{ /*! * Defines an transformation that consists on a simple displacement @@ -87,6 +89,7 @@ private: cv::Vec shift_; /*< Displacement */ }; +//! @} }} // namespace cv::reg diff --git a/modules/rgbd/include/opencv2/rgbd.hpp b/modules/rgbd/include/opencv2/rgbd.hpp index 62947ae21..9edd23953 100644 --- a/modules/rgbd/include/opencv2/rgbd.hpp +++ b/modules/rgbd/include/opencv2/rgbd.hpp @@ -41,10 +41,17 @@ #include #include +/** @defgroup rgbd RGB-Depth Processing +*/ + namespace cv { namespace rgbd { + +//! @addtogroup rgbd +//! @{ + /** Checks if the value is a valid depth. For CV_16U or CV_16S, the convention is to be invalid if it is * a limit. For a float/double, we just check if it is a NaN * @param depth the depth to check for validity @@ -650,6 +657,9 @@ namespace rgbd // TODO Depth interpolation // Curvature // Get rescaleDepth return dubles if asked for + +//! @} + } /* namespace rgbd */ } /* namespace cv */ diff --git a/modules/rgbd/include/opencv2/rgbd/linemod.hpp b/modules/rgbd/include/opencv2/rgbd/linemod.hpp index 46d869926..ac5629169 100644 --- a/modules/rgbd/include/opencv2/rgbd/linemod.hpp +++ b/modules/rgbd/include/opencv2/rgbd/linemod.hpp @@ -54,7 +54,8 @@ namespace cv { namespace linemod { -/// @todo Convert doxy comments to rst +//! @addtogroup rgbd +//! @{ /** * \brief Discriminant feature described by its location and label. @@ -449,6 +450,8 @@ CV_EXPORTS Ptr getDefaultLINE(); */ CV_EXPORTS Ptr getDefaultLINEMOD(); +//! @} + } // namespace linemod } // namespace cv diff --git a/modules/saliency/doc/saliency.bib b/modules/saliency/doc/saliency.bib new file mode 100644 index 000000000..6fd82ea1b --- /dev/null +++ b/modules/saliency/doc/saliency.bib @@ -0,0 +1,24 @@ +@inproceedings{BING, + title={BING: Binarized normed gradients for objectness estimation at 300fps}, + author={Cheng, Ming-Ming and Zhang, Ziming and Lin, Wen-Yan and Torr, Philip}, + booktitle={IEEE CVPR}, + year={2014} +} + +@inproceedings{BinWangApr2014, + title={A fast self-tuning background subtraction algorithm}, + author={Wang, Bin and Dudek, Piotr}, + booktitle={Computer Vision and Pattern Recognition Workshops (CVPRW), 2014 IEEE Conference on}, + pages={401--404}, + year={2014}, + organization={IEEE} +} + +@inproceedings{SR, + title={Saliency detection: A spectral residual approach}, + author={Hou, Xiaodi and Zhang, Liqing}, + booktitle={Computer Vision and Pattern Recognition, 2007. CVPR'07. IEEE Conference on}, + pages={1--8}, + year={2007}, + organization={IEEE} +} diff --git a/modules/saliency/include/opencv2/saliency.hpp b/modules/saliency/include/opencv2/saliency.hpp index f051c66a9..c86e11dd3 100644 --- a/modules/saliency/include/opencv2/saliency.hpp +++ b/modules/saliency/include/opencv2/saliency.hpp @@ -45,6 +45,41 @@ #include "opencv2/saliency/saliencyBaseClasses.hpp" #include "opencv2/saliency/saliencySpecializedClasses.hpp" +/** @defgroup saliency Saliency API + +Many computer vision applications may benefit from understanding where humans focus given a scene. +Other than cognitively understanding the way human perceive images and scenes, finding salient +regions and objects in the images helps various tasks such as speeding up object detection, object +recognition, object tracking and content-aware image editing. + +About the saliency, there is a rich literature but the development is very fragmented. The principal +purpose of this API is to give a unique interface, a unique framework for use and plug sever +saliency algorithms, also with very different nature and methodology, but they share the same +purpose, organizing algorithms into three main categories: + +**Static Saliency**: algorithms belonging to this category, exploit different image features that +allow to detect salient objects in a non dynamic scenarios. + +**Motion Saliency**: algorithms belonging to this category, are particularly focused to detect +salient objects over time (hence also over frame), then there is a temporal component sealing +cosider that allows to detect "moving" objects as salient, meaning therefore also the more general +sense of detection the changes in the scene. + +**Objectness**: Objectness is usually represented as a value which reflects how likely an image +window covers an object of any category. Algorithms belonging to this category, avoid making +decisions early on, by proposing a small number of category-independent proposals, that are expected +to cover all objects in an image. Being able to perceive objects before identifying them is closely +related to bottom up visual attention (saliency). + +![Saliency diagram](pics/saliency.png) + +To see how API works, try tracker demo: + + +@note This API has been designed with PlantUML. If you modify this API please change UML. + +*/ + namespace cv { namespace saliency diff --git a/modules/saliency/include/opencv2/saliency/saliencyBaseClasses.hpp b/modules/saliency/include/opencv2/saliency/saliencyBaseClasses.hpp index 0e5c2127f..21aaab9b1 100644 --- a/modules/saliency/include/opencv2/saliency/saliencyBaseClasses.hpp +++ b/modules/saliency/include/opencv2/saliency/saliencyBaseClasses.hpp @@ -54,6 +54,9 @@ namespace cv namespace saliency { +//! @addtogroup saliency +//! @{ + /************************************ Saliency Base Class ************************************/ class CV_EXPORTS Saliency : public virtual Algorithm @@ -94,6 +97,21 @@ class CV_EXPORTS StaticSaliency : public virtual Saliency { public: + /** @brief This function perform a binary map of given saliency map. This is obtained in this + way: + + In a first step, to improve the definition of interest areas and facilitate identification of + targets, a segmentation by clustering is performed, using *K-means algorithm*. Then, to gain a + binary representation of clustered saliency map, since values of the map can vary according to + the characteristics of frame under analysis, it is not convenient to use a fixed threshold. So, + *Otsu’s algorithm* is used, which assumes that the image to be thresholded contains two classes + of pixels or bi-modal histograms (e.g. foreground and back-ground pixels); later on, the + algorithm calculates the optimal threshold separating those two classes, so that their + intra-class variance is minimal. + + @param saliencyMap the saliency map obtained through one of the specialized algorithms + @param binaryMap the binary map + */ bool computeBinaryMap( const Mat& saliencyMap, Mat& binaryMap ); protected: virtual bool computeSaliencyImpl( const InputArray image, OutputArray saliencyMap )=0; @@ -118,6 +136,8 @@ class CV_EXPORTS Objectness : public virtual Saliency }; +//! @} + } /* namespace saliency */ } /* namespace cv */ diff --git a/modules/saliency/include/opencv2/saliency/saliencySpecializedClasses.hpp b/modules/saliency/include/opencv2/saliency/saliencySpecializedClasses.hpp index 74aa2c88d..a826ba1b6 100644 --- a/modules/saliency/include/opencv2/saliency/saliencySpecializedClasses.hpp +++ b/modules/saliency/include/opencv2/saliency/saliencySpecializedClasses.hpp @@ -53,11 +53,17 @@ namespace cv namespace saliency { +//! @addtogroup saliency +//! @{ + /************************************ Specific Static Saliency Specialized Classes ************************************/ -/** - * \brief Saliency based on algorithms described in [1] - * [1]Hou, Xiaodi, and Liqing Zhang. "Saliency detection: A spectral residual approach." Computer Vision and Pattern Recognition, 2007. CVPR'07. IEEE Conference on. IEEE, 2007. +/** @brief the Spectral Residual approach from @cite SR + +Starting from the principle of natural image statistics, this method simulate the behavior of +pre-attentive visual search. The algorithm analyze the log spectrum of each image and obtain the +spectral residual. Then transform the spectral residual to spatial domain to obtain the saliency +map, which suggests the positions of proto-objects. */ class CV_EXPORTS StaticSaliencySpectralResidual : public StaticSaliency { @@ -71,7 +77,7 @@ public: protected: bool computeSaliencyImpl( const InputArray image, OutputArray saliencyMap ); - AlgorithmInfo* info() const;CV_PROP_RW + AlgorithmInfo* info() const; int resImWidth; int resImHeight; @@ -86,17 +92,34 @@ protected: * [2] B. Wang and P. Dudek "A Fast Self-tuning Background Subtraction Algorithm", in proc of IEEE Workshop on Change Detection, 2014 * */ - +/** @brief the Fast Self-tuning Background Subtraction Algorithm from @cite BinWangApr2014 + */ class CV_EXPORTS MotionSaliencyBinWangApr2014 : public MotionSaliency { public: MotionSaliencyBinWangApr2014(); virtual ~MotionSaliencyBinWangApr2014(); + /** @brief This is a utility function that allows to set the correct size (taken from the input image) in the + corresponding variables that will be used to size the data structures of the algorithm. + @param W width of input image + @param H height of input image + */ void setImagesize( int W, int H ); + /** @brief This function allows the correct initialization of all data structures that will be used by the + algorithm. + */ bool init(); protected: + /** @brief Performs all the operations and calls all internal functions necessary for the accomplishment of the + Fast Self-tuning Background Subtraction Algorithm algorithm. + @param image input image. According to the needs of this specialized algorithm, the param image is a + single *Mat*. + @param saliencyMap Saliency Map. Is a binarized map that, in accordance with the nature of the algorithm, highlights the moving objects or areas of change in the scene. + The saliency map is given by a single *Mat* (one for each frame of an hypothetical video + stream). + */ bool computeSaliencyImpl( const InputArray image, OutputArray saliencyMap ); AlgorithmInfo* info() const; @@ -141,6 +164,9 @@ private: * \brief Objectness algorithms based on [3] * [3] Cheng, Ming-Ming, et al. "BING: Binarized normed gradients for objectness estimation at 300fps." IEEE CVPR. 2014. */ + +/** @brief the Binarized normed gradients algorithm from @cite BING + */ class CV_EXPORTS ObjectnessBING : public Objectness { public: @@ -151,11 +177,39 @@ public: void read(); void write() const; + /** @brief Return the list of the rectangles' objectness value, + + in the same order as the *vector\ objectnessBoundingBox* returned by the algorithm (in + computeSaliencyImpl function). The bigger value these scores are, it is more likely to be an + object window. + */ std::vector getobjectnessValues(); + + /** @brief This is a utility function that allows to set the correct path from which the algorithm will load + the trained model. + @param trainingPath trained model path + */ void setTrainingPath( std::string trainingPath ); + + /** @brief This is a utility function that allows to set an arbitrary path in which the algorithm will save the + optional results + + (ie writing on file the total number and the list of rectangles returned by objectess, one for + each row). + @param resultsDir results' folder path + */ void setBBResDir( std::string resultsDir ); protected: + /** @brief Performs all the operations and calls all internal functions necessary for the + accomplishment of the Binarized normed gradients algorithm. + + @param image input image. According to the needs of this specialized algorithm, the param image is a + single *Mat* + @param objectnessBoundingBox objectness Bounding Box vector. According to the result given by this + specialized algorithm, the objectnessBoundingBox is a *vector\*. Each bounding box is + represented by a *Vec4i* for (minX, minY, maxX, maxY). + */ bool computeSaliencyImpl( const InputArray image, OutputArray objectnessBoundingBox ); AlgorithmInfo* info() const; @@ -292,6 +346,8 @@ private: }; +//! @} + } /* namespace saliency */ } /* namespace cv */ diff --git a/modules/surface_matching/doc/surface_matching.bib b/modules/surface_matching/doc/surface_matching.bib new file mode 100644 index 000000000..0d68be296 --- /dev/null +++ b/modules/surface_matching/doc/surface_matching.bib @@ -0,0 +1,23 @@ +@inproceedings{drost2010, + title={3d object detection and localization using multimodal point pair features}, + author={Drost, Bertram and Ilic, Slobodan}, + booktitle={3D Imaging, Modeling, Processing, Visualization and Transmission (3DIMPVT), 2012 Second International Conference on}, + pages={9--16}, + year={2012}, + organization={IEEE} +} +@inproceedings{pickyicp, + title={A refined ICP algorithm for robust 3-D correspondence estimation}, + author={Zin{\ss}er, Timo and Schmidt, Jochen and Niemann, Heinrich}, + booktitle={Image Processing, 2003. ICIP 2003. Proceedings. 2003 International Conference on}, + volume={2}, + pages={II--695}, + year={2003}, + organization={IEEE} +} +@article{koklimlow, + title={Linear least-squares optimization for point-to-plane icp surface registration}, + author={Low, Kok-Lim}, + journal={Chapel Hill, University of North Carolina}, + year={2004} +} diff --git a/modules/surface_matching/include/opencv2/surface_matching.hpp b/modules/surface_matching/include/opencv2/surface_matching.hpp index fedef452d..160afe112 100644 --- a/modules/surface_matching/include/opencv2/surface_matching.hpp +++ b/modules/surface_matching/include/opencv2/surface_matching.hpp @@ -41,4 +41,328 @@ #include "surface_matching/ppf_match_3d.hpp" #include "surface_matching/icp.hpp" +/** @defgroup surface_matching Surface Matching + +Introduction to Surface Matching +-------------------------------- + +Cameras and similar devices with the capability of sensation of 3D structure are becoming more +common. Thus, using depth and intensity information for matching 3D objects (or parts) are of +crucial importance for computer vision. Applications range from industrial control to guiding +everyday actions for visually impaired people. The task in recognition and pose estimation in range +images aims to identify and localize a queried 3D free-form object by matching it to the acquired +database. + +From an industrial perspective, enabling robots to automatically locate and pick up randomly placed +and oriented objects from a bin is an important challenge in factory automation, replacing tedious +and heavy manual labor. A system should be able to recognize and locate objects with a predefined +shape and estimate the position with the precision necessary for a gripping robot to pick it up. +This is where vision guided robotics takes the stage. Similar tools are also capable of guiding +robots (and even people) through unstructured environments, leading to automated navigation. These +properties make 3D matching from point clouds a ubiquitous necessity. Within this context, I will +now describe the OpenCV implementation of a 3D object recognition and pose estimation algorithm +using 3D features. + +Surface Matching Algorithm Through 3D Features +---------------------------------------------- + +The state of the algorithms in order to achieve the task 3D matching is heavily based on +@cite drost2010, which is one of the first and main practical methods presented in this area. The +approach is composed of extracting 3D feature points randomly from depth images or generic point +clouds, indexing them and later in runtime querying them efficiently. Only the 3D structure is +considered, and a trivial hash table is used for feature queries. + +While being fully aware that utilization of the nice CAD model structure in order to achieve a smart +point sampling, I will be leaving that aside now in order to respect the generalizability of the +methods (Typically for such algorithms training on a CAD model is not needed, and a point cloud +would be sufficient). Below is the outline of the entire algorithm: + +![Outline of the Algorithm](surface_matching/pics/outline.jpg) + +As explained, the algorithm relies on the extraction and indexing of point pair features, which are +defined as follows: + +\f[\bf{{F}}(\bf{{m1}}, \bf{{m2}}) = (||\bf{{d}}||_2, <(\bf{{n1}},\bf{{d}}), <(\bf{{n2}},\bf{{d}}), <(\bf{{n1}},\bf{{n2}}))\f] + +where \f$\bf{{m1}}\f$ and \f$\bf{{m2}}\f$ are feature two selected points on the model (or scene), +\f$\bf{{d}}\f$ is the difference vector, \f$\bf{{n1}}\f$ and \f$\bf{{n2}}\f$ are the normals at \f$\bf{{m1}}\f$ and +\f$\bf{m2}\f$. During the training stage, this vector is quantized, indexed. In the test stage, same +features are extracted from the scene and compared to the database. With a few tricks like +separation of the rotational components, the pose estimation part can also be made efficient (check +the reference for more details). A Hough-like voting and clustering is employed to estimate the +object pose. To cluster the poses, the raw pose hypotheses are sorted in decreasing order of the +number of votes. From the highest vote, a new cluster is created. If the next pose hypothesis is +close to one of the existing clusters, the hypothesis is added to the cluster and the cluster center +is updated as the average of the pose hypotheses within the cluster. If the next hypothesis is not +close to any of the clusters, it creates a new cluster. The proximity testing is done with fixed +thresholds in translation and rotation. Distance computation and averaging for translation are +performed in the 3D Euclidean space, while those for rotation are performed using quaternion +representation. After clustering, the clusters are sorted in decreasing order of the total number of +votes which determines confidence of the estimated poses. + +This pose is further refined using \f$ICP\f$ in order to obtain the final pose. + +PPF presented above depends largely on robust computation of angles between 3D vectors. Even though +not reported in the paper, the naive way of doing this (\f$\theta = cos^{-1}({\bf{a}}\cdot{\bf{b}})\f$ +remains numerically unstable. A better way to do this is then use inverse tangents, like: + +\f[<(\bf{n1},\bf{n2})=tan^{-1}(||{\bf{n1} \wedge \bf{n2}}||_2, \bf{n1} \cdot \bf{n2})\f] + +Rough Computation of Object Pose Given PPF +------------------------------------------ + +Let me summarize the following notation: + +- \f$p^i_m\f$: \f$i^{th}\f$ point of the model (\f$p^j_m\f$ accordingly) +- \f$n^i_m\f$: Normal of the \f$i^{th}\f$ point of the model (\f$n^j_m\f$ accordingly) +- \f$p^i_s\f$: \f$i^{th}\f$ point of the scene (\f$p^j_s\f$ accordingly) +- \f$n^i_s\f$: Normal of the \f$i^{th}\f$ point of the scene (\f$n^j_s\f$ accordingly) +- \f$T_{m\rightarrow g}\f$: The transformation required to translate \f$p^i_m\f$ to the origin and rotate + its normal \f$n^i_m\f$ onto the \f$x\f$-axis. +- \f$R_{m\rightarrow g}\f$: Rotational component of \f$T_{m\rightarrow g}\f$. +- \f$t_{m\rightarrow g}\f$: Translational component of \f$T_{m\rightarrow g}\f$. +- \f$(p^i_m)^{'}\f$: \f$i^{th}\f$ point of the model transformed by \f$T_{m\rightarrow g}\f$. (\f$(p^j_m)^{'}\f$ + accordingly). +- \f${\bf{R_{m\rightarrow g}}}\f$: Axis angle representation of rotation \f$R_{m\rightarrow g}\f$. +- \f$\theta_{m\rightarrow g}\f$: The angular component of the axis angle representation + \f${\bf{R_{m\rightarrow g}}}\f$. + +The transformation in a point pair feature is computed by first finding the transformation +\f$T_{m\rightarrow g}\f$ from the first point, and applying the same transformation to the second one. +Transforming each point, together with the normal, to the ground plane leaves us with an angle to +find out, during a comparison with a new point pair. + +We could now simply start writing + +\f[(p^i_m)^{'} = T_{m\rightarrow g} p^i_m\f] + +where + +\f[T_{m\rightarrow g} = -t_{m\rightarrow g}R_{m\rightarrow g}\f] + +Note that this is nothing but a stacked transformation. The translational component +\f$t_{m\rightarrow g}\f$ reads + +\f[t_{m\rightarrow g} = -R_{m\rightarrow g}p^i_m\f] + +and the rotational being + +\f[\theta_{m\rightarrow g} = \cos^{-1}(n^i_m \cdot {\bf{x}})\\ + {\bf{R_{m\rightarrow g}}} = n^i_m \wedge {\bf{x}}\f] + +in axis angle format. Note that bold refers to the vector form. After this transformation, the +feature vectors of the model are registered onto the ground plane X and the angle with respect to +\f$x=0\f$ is called \f$\alpha_m\f$. Similarly, for the scene, it is called \f$\alpha_s\f$. + +### Hough-like Voting Scheme + +As shown in the outline, PPF (point pair features) are extracted from the model, quantized, stored +in the hashtable and indexed, during the training stage. During the runtime however, the similar +operation is perfomed on the input scene with the exception that this time a similarity lookup over +the hashtable is performed, instead of an insertion. This lookup also allows us to compute a +transformation to the ground plane for the scene pairs. After this point, computing the rotational +component of the pose reduces to computation of the difference \f$\alpha=\alpha_m-\alpha_s\f$. This +component carries the cue about the object pose. A Hough-like voting scheme is performed over the +local model coordinate vector and \f$\alpha\f$. The highest poses achieved for every scene point lets us +recover the object pose. + +### Source Code for PPF Matching + +~~~{cpp} +// pc is the loaded point cloud of the model +// (Nx6) and pcTest is a loaded point cloud of +// the scene (Mx6) +ppf_match_3d::PPF3DDetector detector(0.03, 0.05); +detector.trainModel(pc); +vector results; +detector.match(pcTest, results, 1.0/10.0, 0.05); +cout << "Poses: " << endl; +// print the poses +for (size_t i=0; iprintPose(); +} +~~~ + +Pose Registration via ICP +------------------------- + +The matching process terminates with the attainment of the pose. However, due to the multiple +matching points, erroneous hypothesis, pose averaging and etc. such pose is very open to noise and +many times is far from being perfect. Although the visual results obtained in that stage are +pleasing, the quantitative evaluation shows \f$~10\f$ degrees variation (error), which is an acceptable +level of matching. Many times, the requirement might be set well beyond this margin and it is +desired to refine the computed pose. + +Furthermore, in typical RGBD scenes and point clouds, 3D structure can capture only less than half +of the model due to the visibility in the scene. Therefore, a robust pose refinement algorithm, +which can register occluded and partially visible shapes quickly and correctly is not an unrealistic +wish. + +At this point, a trivial option would be to use the well known iterative closest point algorithm . +However, utilization of the basic ICP leads to slow convergence, bad registration, outlier +sensitivity and failure to register partial shapes. Thus, it is definitely not suited to the +problem. For this reason, many variants have been proposed . Different variants contribute to +different stages of the pose estimation process. + +ICP is composed of \f$6\f$ stages and the improvements I propose for each stage is summarized below. + +### Sampling + +To improve convergence speed and computation time, it is common to use less points than the model +actually has. However, sampling the correct points to register is an issue in itself. The naive way +would be to sample uniformly and hope to get a reasonable subset. More smarter ways try to identify +the critical points, which are found to highly contribute to the registration process. Gelfand et. +al. exploit the covariance matrix in order to constrain the eigenspace, so that a set of points +which affect both translation and rotation are used. This is a clever way of subsampling, which I +will optionally be using in the implementation. + +### Correspondence Search + +As the name implies, this step is actually the assignment of the points in the data and the model in +a closest point fashion. Correct assignments will lead to a correct pose, where wrong assignments +strongly degrade the result. In general, KD-trees are used in the search of nearest neighbors, to +increase the speed. However this is not an optimality guarantee and many times causes wrong points +to be matched. Luckily the assignments are corrected over iterations. + +To overcome some of the limitations, Picky ICP @cite pickyicp and BC-ICP (ICP using bi-unique +correspondences) are two well-known methods. Picky ICP first finds the correspondences in the +old-fashioned way and then among the resulting corresponding pairs, if more than one scene point +\f$p_i\f$ is assigned to the same model point \f$m_j\f$, it selects \f$p_i\f$ that corresponds to the minimum +distance. BC-ICP on the other hand, allows multiple correspondences first and then resolves the +assignments by establishing bi-unique correspondences. It also defines a novel no-correspondence +outlier, which intrinsically eases the process of identifying outliers. + +For reference, both methods are used. Because P-ICP is a bit faster, with not-so-significant +performance drawback, it will be the method of choice in refinment of correspondences. + +### Weighting of Pairs + +In my implementation, I currently do not use a weighting scheme. But the common approaches involve +*normal compatibility* (\f$w_i=n^1_i\cdot n^2_j\f$) or assigning lower weights to point pairs with +greater distances (\f$w=1-\frac{||dist(m_i,s_i)||_2}{dist_{max}}\f$). + +### Rejection of Pairs + +The rejections are done using a dynamic thresholding based on a robust estimate of the standard +deviation. In other words, in each iteration, I find the MAD estimate of the Std. Dev. I denote this +as \f$mad_i\f$. I reject the pairs with distances \f$d_i>\tau mad_i\f$. Here \f$\tau\f$ is the threshold of +rejection and by default set to \f$3\f$. The weighting is applied prior to Picky refinement, explained +in the previous stage. + +### Error Metric + +As described in , a linearization of point to plane as in @cite koklimlow error metric is used. This +both speeds up the registration process and improves convergence. + +### Minimization + +Even though many non-linear optimizers (such as Levenberg Mardquardt) are proposed, due to the +linearization in the previous step, pose estimation reduces to solving a linear system of equations. +This is what I do exactly using cv::solve with DECOMP_SVD option. + +### ICP Algorithm + +Having described the steps above, here I summarize the layout of the ICP algorithm. + +#### Efficient ICP Through Point Cloud Pyramids + +While the up-to-now-proposed variants deal well with some outliers and bad initializations, they +require significant number of iterations. Yet, multi-resolution scheme can help reducing the number +of iterations by allowing the registration to start from a coarse level and propagate to the lower +and finer levels. Such approach both improves the performances and enhances the runtime. + +The search is done through multiple levels, in a hierarchical fashion. The registration starts with +a very coarse set of samples of the model. Iteratively, the points are densified and sought. After +each iteration the previously estimated pose is used as an initial pose and refined with the ICP. + +#### Visual Results + +##### Results on Synthetic Data + +In all of the results, the pose is initiated by PPF and the rest is left as: +\f$[\theta_x, \theta_y, \theta_z, t_x, t_y, t_z]=[0]\f$ + +### Source Code for Pose Refinement Using ICP + +~~~{cpp} +ICP icp(200, 0.001f, 2.5f, 8); +// Using the previously declared pc and pcTest +// This will perform registration for every pose +// contained in results +icp.registerModelToScene(pc, pcTest, results); + +// results now contain the refined poses +~~~ + +Results +------- + +This section is dedicated to the results of surface matching (point-pair-feature matching and a +following ICP refinement): + +![Several matches of a single frog model using ppf + icp](surface_matching/pics/gsoc_forg_matches.jpg) + +Matches of different models for Mian dataset is presented below: + +![Matches of different models for Mian dataset](surface_matching/pics/snapshot27.jpg) + +You might checkout the video on [youTube here](http://www.youtube.com/watch?v=uFnqLFznuZU). + +A Complete Sample +----------------- + +### Parameter Tuning + +Surface matching module treats its parameters relative to the model diameter (diameter of the axis +parallel bounding box), whenever it can. This makes the parameters independent from the model size. +This is why, both model and scene cloud were subsampled such that all points have a minimum distance +of \f$RelativeSamplingStep*DimensionRange\f$, where \f$DimensionRange\f$ is the distance along a given +dimension. All three dimensions are sampled in similar manner. For example, if +\f$RelativeSamplingStep\f$ is set to 0.05 and the diameter of model is 1m (1000mm), the points sampled +from the object's surface will be approximately 50 mm apart. From another point of view, if the +sampling RelativeSamplingStep is set to 0.05, at most \f$20x20x20 = 8000\f$ model points are generated +(depending on how the model fills in the volume). Consequently this results in at most 8000x8000 +pairs. In practice, because the models are not uniformly distributed over a rectangular prism, much +less points are to be expected. Decreasing this value, results in more model points and thus a more +accurate representation. However, note that number of point pair features to be computed is now +quadratically increased as the complexity is O(N\^2). This is especially a concern for 32 bit +systems, where large models can easily overshoot the available memory. Typically, values in the +range of 0.025 - 0.05 seem adequate for most of the applications, where the default value is 0.03. +(Note that there is a difference in this paremeter with the one presented in @cite drost2010. In +@cite drost2010 a uniform cuboid is used for quantization and model diameter is used for reference of +sampling. In my implementation, the cuboid is a rectangular prism, and each dimension is quantized +independently. I do not take reference from the diameter but along the individual dimensions. + +It would very wise to remove the outliers from the model and prepare an ideal model initially. This +is because, the outliers directly affect the relative computations and degrade the matching +accuracy. + +During runtime stage, the scene is again sampled by \f$RelativeSamplingStep\f$, as described above. +However this time, only a portion of the scene points are used as reference. This portion is +controlled by the parameter \f$RelativeSceneSampleStep\f$, where +\f$SceneSampleStep = (int)(1.0/RelativeSceneSampleStep)\f$. In other words, if the +\f$RelativeSceneSampleStep = 1.0/5.0\f$, the subsampled scene will once again be uniformly sampled to +1/5 of the number of points. Maximum value of this parameter is 1 and increasing this parameter also +increases the stability, but decreases the speed. Again, because of the initial scene-independent +relative sampling, fine tuning this parameter is not a big concern. This would only be an issue when +the model shape occupies a volume uniformly, or when the model shape is condensed in a tiny place +within the quantization volume (e.g. The octree representation would have too much empty cells). + +\f$RelativeDistanceStep\f$ acts as a step of discretization over the hash table. The point pair features +are quantized to be mapped to the buckets of the hashtable. This discretization involves a +multiplication and a casting to the integer. Adjusting RelativeDistanceStep in theory controls the +collision rate. Note that, more collisions on the hashtable results in less accurate estimations. +Reducing this parameter increases the affect of quantization but starts to assign non-similar point +pairs to the same bins. Increasing it however, wanes the ability to group the similar pairs. +Generally, because during the sampling stage, the training model points are selected uniformly with +a distance controlled by RelativeSamplingStep, RelativeDistanceStep is expected to equate to this +value. Yet again, values in the range of 0.025-0.05 are sensible. This time however, when the model +is dense, it is not advised to decrease this value. For noisy scenes, the value can be increased to +improve the robustness of the matching against noisy points. + +*/ + #endif diff --git a/modules/surface_matching/include/opencv2/surface_matching/icp.hpp b/modules/surface_matching/include/opencv2/surface_matching/icp.hpp index f0dc5b303..d8947027a 100644 --- a/modules/surface_matching/include/opencv2/surface_matching/icp.hpp +++ b/modules/surface_matching/include/opencv2/surface_matching/icp.hpp @@ -35,15 +35,12 @@ // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. -// -// Author: Tolga Birdal - /** - * @file icp.hpp + * @file * * @brief Implementation of ICP (Iterative Closest Point) Algorithm - * @author Tolga Birdal + * @author Tolga Birdal */ #ifndef __OPENCV_SURFACE_MATCHING_ICP_HPP__ @@ -58,8 +55,11 @@ namespace cv { namespace ppf_match_3d { + +//! @addtogroup surface_matching +//! @{ + /** -* @class ICP * @brief This class implements a very efficient and robust variant of the iterative closest point (ICP) algorithm. * The task is to register a 3D model (or point cloud) against a set of noisy target data. The variants are put together * by myself after certain tests. The task is to be able to match partial, noisy point clouds in cluttered scenes, quickly. @@ -161,6 +161,8 @@ private: }; +//! @} + } // namespace ppf_match_3d } // namespace cv diff --git a/modules/surface_matching/include/opencv2/surface_matching/pose_3d.hpp b/modules/surface_matching/include/opencv2/surface_matching/pose_3d.hpp index 82093701b..cd9c4a422 100644 --- a/modules/surface_matching/include/opencv2/surface_matching/pose_3d.hpp +++ b/modules/surface_matching/include/opencv2/surface_matching/pose_3d.hpp @@ -35,8 +35,10 @@ // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. -// -// Author: Tolga Birdal + +/** @file +@author Tolga Birdal +*/ #ifndef __OPENCV_SURFACE_MATCHING_POSE3D_HPP__ #define __OPENCV_SURFACE_MATCHING_POSE3D_HPP__ @@ -50,6 +52,9 @@ namespace cv namespace ppf_match_3d { +//! @addtogroup surface_matching +//! @{ + class Pose3D; typedef Ptr Pose3DPtr; @@ -57,7 +62,6 @@ class PoseCluster3D; typedef Ptr PoseCluster3DPtr; /** -* @class Pose3D * @brief Class, allowing the storage of a pose. The data structure stores both * the quaternions and the matrix forms. It supports IO functionality together with * various helper methods to work with poses @@ -127,7 +131,6 @@ public: }; /** -* @class PoseCluster3D * @brief When multiple poses (see Pose3D) are grouped together (contribute to the same transformation) * pose clusters occur. This class is a general container for such groups of poses. It is possible to store, * load and perform IO on these poses. @@ -176,6 +179,7 @@ public: int id; }; +//! @} } // namespace ppf_match_3d } // namespace cv diff --git a/modules/surface_matching/include/opencv2/surface_matching/ppf_helpers.hpp b/modules/surface_matching/include/opencv2/surface_matching/ppf_helpers.hpp index f05b7a7a0..9c87ecac3 100644 --- a/modules/surface_matching/include/opencv2/surface_matching/ppf_helpers.hpp +++ b/modules/surface_matching/include/opencv2/surface_matching/ppf_helpers.hpp @@ -36,7 +36,10 @@ // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // -// Author: Tolga Birdal + +/** @file +@author Tolga Birdal +*/ #ifndef __OPENCV_SURFACE_MATCHING_HELPERS_HPP__ #define __OPENCV_SURFACE_MATCHING_HELPERS_HPP__ @@ -48,6 +51,9 @@ namespace cv namespace ppf_match_3d { +//! @addtogroup surface_matching +//! @{ + /** * @brief Load a PLY file * @param [in] fileName The PLY model to read @@ -140,6 +146,9 @@ CV_EXPORTS Mat addNoisePC(Mat pc, double scale); * @return Returns 0 on success */ CV_EXPORTS int computeNormalsPC3d(const Mat& PC, Mat& PCNormals, const int NumNeighbors, const bool FlipViewpoint, const double viewpoint[3]); + +//! @} + } // namespace ppf_match_3d } // namespace cv diff --git a/modules/surface_matching/include/opencv2/surface_matching/ppf_match_3d.hpp b/modules/surface_matching/include/opencv2/surface_matching/ppf_match_3d.hpp index 31c65fce7..ffc8615b3 100644 --- a/modules/surface_matching/include/opencv2/surface_matching/ppf_match_3d.hpp +++ b/modules/surface_matching/include/opencv2/surface_matching/ppf_match_3d.hpp @@ -50,7 +50,10 @@ Model Globally, Match Locally: Efficient and Robust 3D Object Recognition IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR), San Francisco, California (USA), June 2010. ***/ -// Author: Tolga Birdal + +/** @file +@author Tolga Birdal +*/ #ifndef __OPENCV_SURFACE_MATCHING_PPF_MATCH_3D_HPP__ @@ -67,8 +70,10 @@ namespace cv namespace ppf_match_3d { +//! @addtogroup surface_matching +//! @{ + /** - * @struct THash * @brief Struct, holding a node in the hashtable */ typedef struct THash @@ -78,17 +83,16 @@ typedef struct THash } THash; /** - * @class PPF3DDetector * @brief Class, allowing the load and matching 3D models. * Typical Use: - * + * @code * // Train a model - * ppf_match_3d::PPF3DDetector detector(0.05, 0.05); + * ppf_match_3d::PPF3DDetector detector(0.05, 0.05); * detector.trainModel(pc); * // Search the model in a given scene - * vector results; + * vector results; * detector.match(pcTest, results, 1.0/5.0,0.05); - * + * @endcode */ class CV_EXPORTS PPF3DDetector { @@ -167,6 +171,8 @@ private: bool trained; }; +//! @} + } // namespace ppf_match_3d } // namespace cv diff --git a/modules/surface_matching/include/opencv2/surface_matching/t_hash_int.hpp b/modules/surface_matching/include/opencv2/surface_matching/t_hash_int.hpp index 95d973924..9e251e25b 100644 --- a/modules/surface_matching/include/opencv2/surface_matching/t_hash_int.hpp +++ b/modules/surface_matching/include/opencv2/surface_matching/t_hash_int.hpp @@ -36,7 +36,10 @@ // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // -// Author: Tolga Birdal + +/** @file +@author Tolga Birdal +*/ #ifndef __OPENCV_SURFACE_MATCHING_T_HASH_INT_HPP__ #define __OPENCV_SURFACE_MATCHING_T_HASH_INT_HPP__ @@ -49,6 +52,9 @@ namespace cv namespace ppf_match_3d { +//! @addtogroup surface_matching +//! @{ + typedef unsigned int KeyType; typedef struct hashnode_i @@ -66,10 +72,12 @@ typedef struct HSHTBL_i } hashtable_int; +/** @brief Round up to the next highest power of 2 + +from http://www-graphics.stanford.edu/~seander/bithacks.html +*/ inline static unsigned int next_power_of_two(unsigned int value) { - /* Round up to the next highest power of 2 */ - /* from http://www-graphics.stanford.edu/~seander/bithacks.html */ --value; value |= value >> 1; @@ -95,6 +103,8 @@ hashtable_int *hashtableRead(FILE* f); int hashtableWrite(const hashtable_int * hashtbl, const size_t dataSize, FILE* f); void hashtablePrint(hashtable_int *hashtbl); +//! @} + } // namespace ppf_match_3d } // namespace cv diff --git a/modules/text/include/opencv2/text.hpp b/modules/text/include/opencv2/text.hpp index e18e5631e..591424cb4 100644 --- a/modules/text/include/opencv2/text.hpp +++ b/modules/text/include/opencv2/text.hpp @@ -42,4 +42,60 @@ the use of this software, even if advised of the possibility of such damage. #include "opencv2/text/erfilter.hpp" #include "opencv2/text/ocr.hpp" +/** @defgroup text Scene Text Detection and Recognition + +The opencv_text module provides different algorithms for text detection and recognition in natural +scene images. + + @{ + @defgroup text_detect Scene Text Detection + +Class-specific Extremal Regions for Scene Text Detection +-------------------------------------------------------- + +The scene text detection algorithm described below has been initially proposed by Lukás Neumann & +Jiri Matas [Neumann12]. The main idea behind Class-specific Extremal Regions is similar to the MSER +in that suitable Extremal Regions (ERs) are selected from the whole component tree of the image. +However, this technique differs from MSER in that selection of suitable ERs is done by a sequential +classifier trained for character detection, i.e. dropping the stability requirement of MSERs and +selecting class-specific (not necessarily stable) regions. + +The component tree of an image is constructed by thresholding by an increasing value step-by-step +from 0 to 255 and then linking the obtained connected components from successive levels in a +hierarchy by their inclusion relation: + +![image](pics/component_tree.png) + +The component tree may conatain a huge number of regions even for a very simple image as shown in +the previous image. This number can easily reach the order of 1 x 10\^6 regions for an average 1 +Megapixel image. In order to efficiently select suitable regions among all the ERs the algorithm +make use of a sequential classifier with two differentiated stages. + +In the first stage incrementally computable descriptors (area, perimeter, bounding box, and euler +number) are computed (in O(1)) for each region r and used as features for a classifier which +estimates the class-conditional probability p(r|character). Only the ERs which correspond to local +maximum of the probability p(r|character) are selected (if their probability is above a global limit +p_min and the difference between local maximum and local minimum is greater than a delta_min +value). + +In the second stage, the ERs that passed the first stage are classified into character and +non-character classes using more informative but also more computationally expensive features. (Hole +area ratio, convex hull ratio, and the number of outer boundary inflexion points). + +This ER filtering process is done in different single-channel projections of the input image in +order to increase the character localization recall. + +After the ER filtering is done on each input channel, character candidates must be grouped in +high-level text blocks (i.e. words, text lines, paragraphs, ...). The opencv_text module implements +two different grouping algorithms: the Exhaustive Search algorithm proposed in [Neumann11] for +grouping horizontally aligned text, and the method proposed by Lluis Gomez and Dimosthenis Karatzas +in [Gomez13][Gomez14] for grouping arbitrary oriented text (see erGrouping). + +To see the text detector at work, have a look at the textdetection demo: + + + @defgroup text_recognize Scene Text Recognition + @} +*/ + #endif diff --git a/modules/text/include/opencv2/text/erfilter.hpp b/modules/text/include/opencv2/text/erfilter.hpp index 3064a6b88..095ffea2a 100644 --- a/modules/text/include/opencv2/text/erfilter.hpp +++ b/modules/text/include/opencv2/text/erfilter.hpp @@ -54,15 +54,15 @@ namespace cv namespace text { -/*! - Extremal Region Stat structure +//! @addtogroup text_detect +//! @{ - The ERStat structure represents a class-specific Extremal Region (ER). +/** @brief The ERStat structure represents a class-specific Extremal Region (ER). - An ER is a 4-connected set of pixels with all its grey-level values smaller than the values - in its outer boundary. A class-specific ER is selected (using a classifier) from all the ER's - in the component tree of the image. -*/ +An ER is a 4-connected set of pixels with all its grey-level values smaller than the values in its +outer boundary. A class-specific ER is selected (using a classifier) from all the ER's in the +component tree of the image. : + */ struct CV_EXPORTS ERStat { public: @@ -111,31 +111,43 @@ public: ERStat* min_probability_ancestor; }; -/*! - Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithms - Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012 +/** @brief Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithm [Neumann12]. : - Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier. -*/ +Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier. + */ class CV_EXPORTS ERFilter : public Algorithm { public: - //! callback with the classifier is made a class. By doing it we hide SVM, Boost etc. + /** @brief Callback with the classifier is made a class. + + By doing it we hide SVM, Boost etc. Developers can provide their own classifiers to the + ERFilter algorithm. + */ class CV_EXPORTS Callback { public: virtual ~Callback() { } - //! The classifier must return probability measure for the region. + /** @brief The classifier must return probability measure for the region. + + @param stat : The region to be classified + */ virtual double eval(const ERStat& stat) = 0; //const = 0; //TODO why cannot use const = 0 here? }; - /*! - the key method. Takes image on input and returns the selected regions in a vector of ERStat - only distinctive ERs which correspond to characters are selected by a sequential classifier - \param image is the input image - \param regions is output for the first stage, input/output for the second one. - */ + /** @brief The key method of ERFilter algorithm. + + Takes image on input and returns the selected regions in a vector of ERStat only distinctive + ERs which correspond to characters are selected by a sequential classifier + + @param image Sinle channel image CV_8UC1 + + @param regions Output for the 1st stage and Input/Output for the 2nd. The selected Extremal Regions + are stored here. + + Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given + classifier. + */ virtual void run( InputArray image, std::vector& regions ) = 0; @@ -174,106 +186,136 @@ public: @param nonMaxSuppression – Whenever non-maximum suppression is done over the branch probabilities @param minProbabilityDiff – The minimum probability difference between local maxima and local minima ERs */ + +/** @brief Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm [Neumann12]. + +@param cb : Callback with the classifier. Default classifier can be implicitly load with function +loadClassifierNM1, e.g. from file in samples/cpp/trained_classifierNM1.xml +@param thresholdDelta : Threshold step in subsequent thresholds when extracting the component tree +@param minArea : The minimum area (% of image size) allowed for retreived ER's +@param minArea : The maximum area (% of image size) allowed for retreived ER's +@param minProbability : The minimum probability P(er|character) allowed for retreived ER's +@param nonMaxSuppression : Whenever non-maximum suppression is done over the branch probabilities +@param minProbability : The minimum probability difference between local maxima and local minima ERs + +The component tree of the image is extracted by a threshold increased step by step from 0 to 255, +incrementally computable descriptors (aspect_ratio, compactness, number of holes, and number of +horizontal crossings) are computed for each ER and used as features for a classifier which estimates +the class-conditional probability P(er|character). The value of P(er|character) is tracked using the +inclusion relation of ER across all thresholds and only the ERs which correspond to local maximum of +the probability P(er|character) are selected (if the local maximum of the probability is above a +global limit pmin and the difference between local maximum and local minimum is greater than +minProbabilityDiff). + */ CV_EXPORTS Ptr createERFilterNM1(const Ptr& cb, int thresholdDelta = 1, float minArea = 0.00025, float maxArea = 0.13, float minProbability = 0.4, bool nonMaxSuppression = true, float minProbabilityDiff = 0.1); -/*! - Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm - Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012 +/** @brief Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm [Neumann12]. - In the second stage, the ERs that passed the first stage are classified into character - and non-character classes using more informative but also more computationally expensive - features. The classifier uses all the features calculated in the first stage and the following - additional features: hole area ratio, convex hull ratio, and number of outer inflexion points. +@param cb : Callback with the classifier. Default classifier can be implicitly load with function +loadClassifierNM2, e.g. from file in samples/cpp/trained_classifierNM2.xml +@param minProbability : The minimum probability P(er|character) allowed for retreived ER's - \param cb Callback with the classifier - default classifier can be implicitly load with function loadClassifierNM2() - from file in samples/cpp/trained_classifierNM2.xml - \param minProbability The minimum probability P(er|character) allowed for retreived ER's -*/ +In the second stage, the ERs that passed the first stage are classified into character and +non-character classes using more informative but also more computationally expensive features. The +classifier uses all the features calculated in the first stage and the following additional +features: hole area ratio, convex hull ratio, and number of outer inflexion points. + */ CV_EXPORTS Ptr createERFilterNM2(const Ptr& cb, float minProbability = 0.3); -/*! - Allow to implicitly load the default classifier when creating an ERFilter object. - The function takes as parameter the XML or YAML file with the classifier model - (e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback. -*/ +/** @brief Allow to implicitly load the default classifier when creating an ERFilter object. + +@param filename The XML or YAML file with the classifier model (e.g. trained_classifierNM1.xml) +returns a pointer to ERFilter::Callback. + */ CV_EXPORTS Ptr loadClassifierNM1(const std::string& filename); -/*! - Allow to implicitly load the default classifier when creating an ERFilter object. - The function takes as parameter the XML or YAML file with the classifier model - (e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback. -*/ +/** @brief Allow to implicitly load the default classifier when creating an ERFilter object. + +@param filename The XML or YAML file with the classifier model (e.g. trained_classifierNM2.xml) +returns a pointer to ERFilter::Callback. + */ CV_EXPORTS Ptr loadClassifierNM2(const std::string& filename); -// computeNMChannels operation modes +//! computeNMChannels operation modes enum { ERFILTER_NM_RGBLGrad, ERFILTER_NM_IHSGrad }; -/*! - Compute the different channels to be processed independently in the N&M algorithm - Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012 +/** @brief Compute the different channels to be processed independently in the N&M algorithm [Neumann12]. - In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient - magnitude channels (Grad) are used in order to obtain high localization recall. - This implementation also provides an alternative combination of red (R), green (G), blue (B), - lightness (L), and gradient magnitude (Grad). +@param _src Source image. Must be RGB CV_8UC3. - \param _src Source image. Must be RGB CV_8UC3. - \param _channels Output vector where computed channels are stored. - \param _mode Mode of operation. Currently the only available options are - ERFILTER_NM_RGBLGrad (by default) and ERFILTER_NM_IHSGrad. +@param _channels Output vector\ where computed channels are stored. -*/ +@param _mode Mode of operation. Currently the only available options are: +**ERFILTER_NM_RGBLGrad** (used by default) and **ERFILTER_NM_IHSGrad**. + +In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient magnitude +channels (Grad) are used in order to obtain high localization recall. This implementation also +provides an alternative combination of red (R), green (G), blue (B), lightness (L), and gradient +magnitude (Grad). + */ CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad); -// erGrouping operation modes -enum { ERGROUPING_ORIENTATION_HORIZ, - ERGROUPING_ORIENTATION_ANY +//! text::erGrouping operation modes +enum erGrouping_Modes { + + /** Exhaustive Search algorithm proposed in [Neumann11] for grouping horizontally aligned text. + The algorithm models a verification function for all the possible ER sequences. The + verification fuction for ER pairs consists in a set of threshold-based pairwise rules which + compare measurements of two regions (height ratio, centroid angle, and region distance). The + verification function for ER triplets creates a word text line estimate using Least + Median-Squares fitting for a given triplet and then verifies that the estimate is valid (based + on thresholds created during training). Verification functions for sequences larger than 3 are + approximated by verifying that the text line parameters of all (sub)sequences of length 3 are + consistent. + */ + ERGROUPING_ORIENTATION_HORIZ, + /** Text grouping method proposed in [Gomez13][Gomez14] for grouping arbitrary oriented text. Regions + are agglomerated by Single Linkage Clustering in a weighted feature space that combines proximity + (x,y coordinates) and similarity measures (color, size, gradient magnitude, stroke width, etc.). + SLC provides a dendrogram where each node represents a text group hypothesis. Then the algorithm + finds the branches corresponding to text groups by traversing this dendrogram with a stopping rule + that combines the output of a rotation invariant text group classifier and a probabilistic measure + for hierarchical clustering validity assessment. + */ + ERGROUPING_ORIENTATION_ANY }; -/*! - Find groups of Extremal Regions that are organized as text blocks. This function implements - the grouping algorithm described in: - Gomez L. and Karatzas D.: Multi-script Text Extraction from Natural Scenes, ICDAR 2013. - Notice that this implementation constrains the results to horizontally-aligned text and - latin script (since ERFilter classifiers are trained only for latin script detection). - - The algorithm combines two different clustering techniques in a single parameter-free procedure - to detect groups of regions organized as text. The maximally meaningful groups are fist detected - in several feature spaces, where each feature space is a combination of proximity information - (x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.), - thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to - combine all these hypotheses to get the final estimate. Each of the resulting groups are finally - validated using a classifier in order to assest if they form a valid horizontally-aligned text block. - - @param img – Original RGB or Grayscale image from wich the regions were extracted. - @param channels – Vector of single channel images CV_8UC1 from wich the regions were extracted. - @param regions – Vector of ER’s retreived from the ERFilter algorithm from each channel. - @param groups – The output of the algorithm is stored in this parameter as set of lists of - indexes to provided regions. - @param groups_rects – The output of the algorithm are stored in this parameter as list of rectangles. - @param method – Grouping method (see the details below). Can be one of ERGROUPING_ORIENTATION_HORIZ, - ERGROUPING_ORIENTATION_ANY. - @param filename – The XML or YAML file with the classifier model (e.g. - samples/trained_classifier_erGrouping.xml). Only to use when grouping method is - ERGROUPING_ORIENTATION_ANY. - @param minProbablity – The minimum probability for accepting a group. Only to use when grouping - method is ERGROUPING_ORIENTATION_ANY. +/** @brief Find groups of Extremal Regions that are organized as text blocks. -*/ +@param img Original RGB or Greyscale image from wich the regions were extracted. + +@param channels Vector of single channel images CV_8UC1 from wich the regions were extracted. + +@param regions Vector of ER's retreived from the ERFilter algorithm from each channel. + +@param groups The output of the algorithm is stored in this parameter as set of lists of indexes to +provided regions. + +@param groups_rects The output of the algorithm are stored in this parameter as list of rectangles. + +@param method Grouping method (see text::erGrouping_Modes). Can be one of ERGROUPING_ORIENTATION_HORIZ, +ERGROUPING_ORIENTATION_ANY. + +@param filename The XML or YAML file with the classifier model (e.g. +samples/trained_classifier_erGrouping.xml). Only to use when grouping method is +ERGROUPING_ORIENTATION_ANY. + +@param minProbablity The minimum probability for accepting a group. Only to use when grouping +method is ERGROUPING_ORIENTATION_ANY. + */ CV_EXPORTS void erGrouping(InputArray img, InputArrayOfArrays channels, std::vector > ®ions, std::vector > &groups, @@ -282,15 +324,27 @@ CV_EXPORTS void erGrouping(InputArray img, InputArrayOfArrays channels, const std::string& filename = std::string(), float minProbablity = 0.5); -/*! - * MSERsToERStats function converts MSER contours (vector) to ERStat regions. - * It takes as input the contours provided by the OpenCV MSER feature detector and returns as output two vectors - * of ERStats. MSER output contains both MSER+ and MSER- regions in a single vector, the function separates - * them in two different vectors (this is the ERStats where extracted from two different channels). - * */ +/** @brief Converts MSER contours (vector\) to ERStat regions. + +@param image Source image CV_8UC1 from which the MSERs where extracted. + +@param contours Intput vector with all the contours (vector\). + +@param regions Output where the ERStat regions are stored. + +It takes as input the contours provided by the OpenCV MSER feature detector and returns as output +two vectors of ERStats. This is because MSER() output contains both MSER+ and MSER- regions in a +single vector\, the function separates them in two different vectors (this is as if the +ERStats where extracted from two different channels). + +An example of MSERsToERStats in use can be found in the text detection webcam_demo: + + */ CV_EXPORTS void MSERsToERStats(InputArray image, std::vector > &contours, std::vector > ®ions); +//! @} + } } #endif // _OPENCV_TEXT_ERFILTER_HPP_ diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp index 0aa32318c..74106f746 100644 --- a/modules/text/include/opencv2/text/ocr.hpp +++ b/modules/text/include/opencv2/text/ocr.hpp @@ -52,6 +52,9 @@ namespace cv namespace text { +//! @addtogroup text_recognize +//! @{ + enum { OCR_LEVEL_WORD, @@ -68,15 +71,55 @@ public: int component_level=0) = 0; }; -/* OCR Tesseract */ +/** @brief OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++. + +Notice that it is compiled only when tesseract-ocr is correctly installed. +@note + - (C++) An example of OCRTesseract recognition combined with scene text detection can be found + at the end_to_end_recognition demo: + + - (C++) Another example of OCRTesseract recognition combined with scene text detection can be + found at the webcam_demo: + + */ class CV_EXPORTS OCRTesseract : public BaseOCR { public: + /** @brief Recognize text using the tesseract-ocr API. + + Takes image on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. + + @param image Input image CV_8UC1 or CV_8UC3 + @param output_text Output text of the tesseract-ocr. + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words or text lines). + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words or text lines). + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words or text lines). + @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE. + */ virtual void run(Mat& image, std::string& output_text, std::vector* component_rects=NULL, std::vector* component_texts=NULL, std::vector* component_confidences=NULL, int component_level=0); + /** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract. + + @param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the + system's default directory. + @param language an ISO 639-3 code or NULL will default to "eng". + @param char_whitelist specifies the list of characters used for recognition. NULL defaults to + "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ". + @param oem tesseract-ocr offers different OCR Engine Modes (OEM), by deffault + tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible + values. + @param psmode tesseract-ocr offers different Page Segmentation Modes (PSM) tesseract::PSM_AUTO + (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other + possible values. + */ static Ptr create(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL, int oem=3, int psmode=3); }; @@ -89,25 +132,83 @@ enum decoder_mode OCR_DECODER_VITERBI = 0 // Other algorithms may be added }; +/** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models. + +@note + - (C++) An example on using OCRHMMDecoder recognition combined with scene text detection can + be found at the webcam_demo sample: + + */ class CV_EXPORTS OCRHMMDecoder : public BaseOCR { public: - //! callback with the character classifier is made a class. This way we hide the feature extractor and the classifier itself + /** @brief Callback with the character classifier is made a class. + + This way it hides the feature extractor and the classifier itself, so developers can write + their own OCR code. + + The default character classifier and feature extractor can be loaded using the utility funtion + loadOCRHMMClassifierNM and KNN model provided in + . + */ class CV_EXPORTS ClassifierCallback { public: virtual ~ClassifierCallback() { } - //! The classifier must return a (ranked list of) class(es) id('s) + /** @brief The character classifier must return a (ranked list of) class(es) id('s) + + @param image Input image CV_8UC1 or CV_8UC3 with a single letter. + @param out_class The classifier returns the character class categorical label, or list of + class labels, to which the input image corresponds. + @param out_confidence The classifier returns the probability of the input image + corresponding to each classes in out_class. + */ virtual void eval( InputArray image, std::vector& out_class, std::vector& out_confidence); }; public: - //! Decode a group of regions and output the most likely sequence of characters + /** @brief Recognize text using HMM. + + Takes image on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. + + @param image Input image CV_8UC1 with a single text line (or word). + + @param output_text Output text. Most likely character sequence found by the HMM decoder. + + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words). + + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words). + + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words). + + @param component_level Only OCR_LEVEL_WORD is supported. + */ virtual void run(Mat& image, std::string& output_text, std::vector* component_rects=NULL, std::vector* component_texts=NULL, std::vector* component_confidences=NULL, int component_level=0); + /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder. + + @param classifier The character classifier with built in feature extractor. + + @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size() + must be equal to the number of classes of the classifier. + + @param transition_probabilities_table Table with transition probabilities between character + pairs. cols == rows == vocabulary.size(). + + @param emission_probabilities_table Table with observation emission probabilities. cols == + rows == vocabulary.size(). + + @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment + (). + */ static Ptr create(const Ptr classifier,// The character classifier with built in feature extractor const std::string& vocabulary, // The language vocabulary (chars when ascii english text) // size() must be equal to the number of classes @@ -126,8 +227,21 @@ protected: decoder_mode mode; }; +/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object. + +@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml) + +The default classifier is based in the scene text recognition method proposed by Lukás Neumann & +Jiri Matas in [Neumann11b]. Basically, the region (contour) in the input image is normalized to a +fixed size, while retaining the centroid and aspect ratio, in order to extract a feature vector +based on gradient orientations along the chain-code of its perimeter. Then, the region is classified +using a KNN model trained with synthetic data of rendered characters with different standard font +types. + */ CV_EXPORTS Ptr loadOCRHMMClassifierNM(const std::string& filename); +//! @} + } } #endif // _OPENCV_TEXT_OCR_HPP_ diff --git a/modules/tracking/doc/tracking.bib b/modules/tracking/doc/tracking.bib new file mode 100644 index 000000000..95d50506f --- /dev/null +++ b/modules/tracking/doc/tracking.bib @@ -0,0 +1,69 @@ +@inproceedings{MIL, + title={Visual tracking with online multiple instance learning}, + author={Babenko, Boris and Yang, Ming-Hsuan and Belongie, Serge}, + booktitle={Computer Vision and Pattern Recognition, 2009. CVPR 2009. IEEE Conference on}, + pages={983--990}, + year={2009}, + organization={IEEE} +} + +@inproceedings{OLB, + title={Real-Time Tracking via On-line Boosting.}, + author={Grabner, Helmut and Grabner, Michael and Bischof, Horst}, + booktitle={BMVC}, + volume={1}, + number={5}, + pages={6}, + year={2006} +} + +@inproceedings{MedianFlow, + title={Forward-backward error: Automatic detection of tracking failures}, + author={Kalal, Zdenek and Mikolajczyk, Krystian and Matas, Jiri}, + booktitle={Pattern Recognition (ICPR), 2010 20th International Conference on}, + pages={2756--2759}, + year={2010}, + organization={IEEE} +} + +@article{TLD, + title={Tracking-learning-detection}, + author={Kalal, Zdenek and Mikolajczyk, Krystian and Matas, Jiri}, + journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume={34}, + number={7}, + pages={1409--1422}, + year={2012}, + publisher={IEEE} +} + +@article{AAM, + title={Adaptive appearance modeling for video tracking: survey and evaluation}, + author={Salti, Samuele and Cavallaro, Andrea and Di Stefano, Luigi}, + journal={Image Processing, IEEE Transactions on}, + volume={21}, + number={10}, + pages={4334--4348}, + year={2012}, + publisher={IEEE} +} + +@article{AMVOT, + title={A survey of appearance models in visual object tracking}, + author={Li, Xi and Hu, Weiming and Shen, Chunhua and Zhang, Zhongfei and Dick, Anthony and Hengel, Anton Van Den}, + journal={ACM Transactions on Intelligent Systems and Technology (TIST)}, + volume={4}, + number={4}, + pages={58}, + year={2013}, + publisher={ACM} +} + +@inproceedings{OOT, + title={Online object tracking: A benchmark}, + author={Wu, Yi and Lim, Jongwoo and Yang, Ming-Hsuan}, + booktitle={Computer Vision and Pattern Recognition (CVPR), 2013 IEEE Conference on}, + pages={2411--2418}, + year={2013}, + organization={IEEE} +} diff --git a/modules/tracking/include/opencv2/tracking.hpp b/modules/tracking/include/opencv2/tracking.hpp index 1aec4df71..c4ab41fdd 100644 --- a/modules/tracking/include/opencv2/tracking.hpp +++ b/modules/tracking/include/opencv2/tracking.hpp @@ -44,6 +44,289 @@ #include "opencv2/core/cvdef.h" +/** @defgroup tracking Tracking API + +Long-term optical tracking API +------------------------------ + +Long-term optical tracking is one of most important issue for many computer vision applications in +real world scenario. The development in this area is very fragmented and this API is an unique +interface useful for plug several algorithms and compare them. This work is partially based on +@cite AAM and @cite AMVOT. + +This algorithms start from a bounding box of the target and with their internal representation they +avoid the drift during the tracking. These long-term trackers are able to evaluate online the +quality of the location of the target in the new frame, without ground truth. + +There are three main components: the TrackerSampler, the TrackerFeatureSet and the TrackerModel. The +first component is the object that computes the patches over the frame based on the last target +location. The TrackerFeatureSet is the class that manages the Features, is possible plug many kind +of these (HAAR, HOG, LBP, Feature2D, etc). The last component is the internal representation of the +target, it is the appearence model. It stores all state candidates and compute the trajectory (the +most likely target states). The class TrackerTargetState represents a possible state of the target. +The TrackerSampler and the TrackerFeatureSet are the visual representation of the target, instead +the TrackerModel is the statistical model. + +A recent benchmark between these algorithms can be found in @cite OOT. + +UML design: +----------- + +**General diagram** + +![General diagram](pics/package.png) + +**Tracker diagram** + +![Tracker diagram](pics/Tracker.png) + +**TrackerSampler diagram** + +![TrackerSampler diagram](pics/TrackerSampler.png) + +**TrackerFeatureSet diagram** + +![TrackerFeatureSet diagram](pics/TrackerFeature.png) + +**TrackerModel diagram** + +![TrackerModel diagram](pics/TrackerModel.png) + +To see how API works, try tracker demo: + + +@note This Tracking API has been designed with PlantUML. If you modify this API please change UML +files under modules/tracking/misc/ The following reference was used in the API + +Creating Own Tracker +-------------------- + +If you want create a new tracker, here's what you have to do. First, decide on the name of the class +for the tracker (to meet the existing style, we suggest something with prefix "tracker", e.g. +trackerMIL, trackerBoosting) -- we shall refer to this choice as to "classname" in subsequent. Also, +you should decide upon the name of the tracker, is it will be known to user (the current style +suggests using all capitals, say MIL or BOOSTING) --we'll call it a "name". + +- Declare your tracker in include/opencv2/tracking/tracker.hpp. Your tracker should inherit from + Tracker (please, see the example below). You should declare the specialized Param structure, + where you probably will want to put the data, needed to initialize your tracker. Also don't + forget to put the BOILERPLATE_CODE(name,classname) macro inside the class declaration. That + macro will generate static createTracker() function, which we'll talk about later. You should + get something similar to : +@code + class CV_EXPORTS_W TrackerMIL : public Tracker + { + public: + struct CV_EXPORTS Params + { + Params(); + //parameters for sampler + float samplerInitInRadius; // radius for gathering positive instances during init + int samplerInitMaxNegNum; // # negative samples to use during init + float samplerSearchWinSize; // size of search window + float samplerTrackInRadius; // radius for gathering positive instances during tracking + int samplerTrackMaxPosNum; // # positive samples to use during tracking + int samplerTrackMaxNegNum; // # negative samples to use during tracking + int featureSetNumFeatures; // #features + + void read( const FileNode& fn ); + void write( FileStorage& fs ) const; + }; +@endcode + of course, you can also add any additional methods of your choice. It should be pointed out, + however, that it is not expected to have a constructor declared, as creation should be done via + the corresponding createTracker() method. +- In src/tracker.cpp file add BOILERPLATE_CODE(name,classname) line to the body of + Tracker::create() method you will find there, like : +@code + Ptr Tracker::create( const String& trackerType ) + { + BOILERPLATE_CODE("BOOSTING",TrackerBoosting); + BOILERPLATE_CODE("MIL",TrackerMIL); + return Ptr(); + } +@endcode +- Finally, you should implement the function with signature : +@code + Ptr classname::createTracker(const classname::Params ¶meters){ + ... + } +@endcode + That function can (and probably will) return a pointer to some derived class of "classname", + which will probably have a real constructor. + +Every tracker has three component TrackerSampler, TrackerFeatureSet and TrackerModel. The first two +are instantiated from Tracker base class, instead the last component is abstract, so you must +implement your TrackerModel. + +### TrackerSampler + +TrackerSampler is already instantiated, but you should define the sampling algorithm and add the +classes (or single class) to TrackerSampler. You can choose one of the ready implementation as +TrackerSamplerCSC or you can implement your sampling method, in this case the class must inherit +TrackerSamplerAlgorithm. Fill the samplingImpl method that writes the result in "sample" output +argument. + +Example of creating specialized TrackerSamplerAlgorithm TrackerSamplerCSC : : +@code + class CV_EXPORTS_W TrackerSamplerCSC : public TrackerSamplerAlgorithm + { + public: + TrackerSamplerCSC( const TrackerSamplerCSC::Params ¶meters = TrackerSamplerCSC::Params() ); + ~TrackerSamplerCSC(); + ... + + protected: + bool samplingImpl( const Mat& image, Rect boundingBox, std::vector& sample ); + ... + + }; +@endcode + +Example of adding TrackerSamplerAlgorithm to TrackerSampler : : +@code + //sampler is the TrackerSampler + Ptr CSCSampler = new TrackerSamplerCSC( CSCparameters ); + if( !sampler->addTrackerSamplerAlgorithm( CSCSampler ) ) + return false; + + //or add CSC sampler with default parameters + //sampler->addTrackerSamplerAlgorithm( "CSC" ); +@endcode +@sa + TrackerSamplerCSC, TrackerSamplerAlgorithm + +### TrackerFeatureSet + +TrackerFeatureSet is already instantiated (as first) , but you should define what kinds of features +you'll use in your tracker. You can use multiple feature types, so you can add a ready +implementation as TrackerFeatureHAAR in your TrackerFeatureSet or develop your own implementation. +In this case, in the computeImpl method put the code that extract the features and in the selection +method optionally put the code for the refinement and selection of the features. + +Example of creating specialized TrackerFeature TrackerFeatureHAAR : : +@code + class CV_EXPORTS_W TrackerFeatureHAAR : public TrackerFeature + { + public: + TrackerFeatureHAAR( const TrackerFeatureHAAR::Params ¶meters = TrackerFeatureHAAR::Params() ); + ~TrackerFeatureHAAR(); + void selection( Mat& response, int npoints ); + ... + + protected: + bool computeImpl( const std::vector& images, Mat& response ); + ... + + }; +@endcode +Example of adding TrackerFeature to TrackerFeatureSet : : +@code + //featureSet is the TrackerFeatureSet + Ptr trackerFeature = new TrackerFeatureHAAR( HAARparameters ); + featureSet->addTrackerFeature( trackerFeature ); +@endcode +@sa + TrackerFeatureHAAR, TrackerFeatureSet + +### TrackerModel + +TrackerModel is abstract, so in your implementation you must develop your TrackerModel that inherit +from TrackerModel. Fill the method for the estimation of the state "modelEstimationImpl", that +estimates the most likely target location, see @cite AAM table I (ME) for further information. Fill +"modelUpdateImpl" in order to update the model, see @cite AAM table I (MU). In this class you can use +the :cConfidenceMap and :cTrajectory to storing the model. The first represents the model on the all +possible candidate states and the second represents the list of all estimated states. + +Example of creating specialized TrackerModel TrackerMILModel : : +@code + class TrackerMILModel : public TrackerModel + { + public: + TrackerMILModel( const Rect& boundingBox ); + ~TrackerMILModel(); + ... + + protected: + void modelEstimationImpl( const std::vector& responses ); + void modelUpdateImpl(); + ... + + }; +@endcode +And add it in your Tracker : : +@code + bool TrackerMIL::initImpl( const Mat& image, const Rect2d& boundingBox ) + { + ... + //model is the general TrackerModel field od the general Tracker + model = new TrackerMILModel( boundingBox ); + ... + } +@endcode +In the last step you should define the TrackerStateEstimator based on your implementation or you can +use one of ready class as TrackerStateEstimatorMILBoosting. It represent the statistical part of the +model that estimates the most likely target state. + +Example of creating specialized TrackerStateEstimator TrackerStateEstimatorMILBoosting : : +@code + class CV_EXPORTS_W TrackerStateEstimatorMILBoosting : public TrackerStateEstimator + { + class TrackerMILTargetState : public TrackerTargetState + { + ... + }; + + public: + TrackerStateEstimatorMILBoosting( int nFeatures = 250 ); + ~TrackerStateEstimatorMILBoosting(); + ... + + protected: + Ptr estimateImpl( const std::vector& confidenceMaps ); + void updateImpl( std::vector& confidenceMaps ); + ... + + }; +@endcode +And add it in your TrackerModel : : +@code + //model is the TrackerModel of your Tracker + Ptr stateEstimator = new TrackerStateEstimatorMILBoosting( params.featureSetNumFeatures ); + model->setTrackerStateEstimator( stateEstimator ); +@endcode +@sa + TrackerModel, TrackerStateEstimatorMILBoosting, TrackerTargetState + +During this step, you should define your TrackerTargetState based on your implementation. +TrackerTargetState base class has only the bounding box (upper-left position, width and height), you +can enrich it adding scale factor, target rotation, etc. + +Example of creating specialized TrackerTargetState TrackerMILTargetState : : +@code + class TrackerMILTargetState : public TrackerTargetState + { + public: + TrackerMILTargetState( const Point2f& position, int targetWidth, int targetHeight, bool foreground, const Mat& features ); + ~TrackerMILTargetState(); + ... + + private: + bool isTarget; + Mat targetFeatures; + ... + + }; +@endcode +### Try it + +To try your tracker you can use the demo at +. + +The first argument is the name of the tracker and the second is a video source. + +*/ + namespace cv { CV_EXPORTS bool initModule_tracking(void); diff --git a/modules/tracking/include/opencv2/tracking/feature.hpp b/modules/tracking/include/opencv2/tracking/feature.hpp index 34c0f15af..b354d62cc 100644 --- a/modules/tracking/include/opencv2/tracking/feature.hpp +++ b/modules/tracking/include/opencv2/tracking/feature.hpp @@ -56,6 +56,9 @@ namespace cv { +//! @addtogroup tracking +//! @{ + #define FEATURES "features" #define CC_FEATURES FEATURES @@ -405,6 +408,8 @@ inline uchar CvLBPEvaluator::Feature::calc( const Mat &_sum, size_t y ) const ( psum[p[4]] - psum[p[5]] - psum[p[8]] + psum[p[9]] >= cval ? 1 : 0 ) ); // 3 } +//! @} + } /* namespace cv */ #endif diff --git a/modules/tracking/include/opencv2/tracking/onlineBoosting.hpp b/modules/tracking/include/opencv2/tracking/onlineBoosting.hpp index d0eb3a10b..982bc205b 100644 --- a/modules/tracking/include/opencv2/tracking/onlineBoosting.hpp +++ b/modules/tracking/include/opencv2/tracking/onlineBoosting.hpp @@ -46,6 +46,10 @@ namespace cv { + +//! @addtogroup tracking +//! @{ + //TODO based on the original implementation //http://vision.ucsd.edu/~bbabenko/project_miltrack.shtml @@ -277,6 +281,8 @@ class ClassifierThreshold int m_parity; }; +//! @} + } /* namespace cv */ #endif diff --git a/modules/tracking/include/opencv2/tracking/onlineMIL.hpp b/modules/tracking/include/opencv2/tracking/onlineMIL.hpp index 2bb006334..be03ec8eb 100644 --- a/modules/tracking/include/opencv2/tracking/onlineMIL.hpp +++ b/modules/tracking/include/opencv2/tracking/onlineMIL.hpp @@ -47,6 +47,10 @@ namespace cv { + +//! @addtogroup tracking +//! @{ + //TODO based on the original implementation //http://vision.ucsd.edu/~bbabenko/project_miltrack.shtml @@ -109,6 +113,8 @@ class ClfOnlineStump }; +//! @} + } /* namespace cv */ #endif diff --git a/modules/tracking/include/opencv2/tracking/tracker.hpp b/modules/tracking/include/opencv2/tracking/tracker.hpp index 8a91ed7e5..1d85a6106 100644 --- a/modules/tracking/include/opencv2/tracking/tracker.hpp +++ b/modules/tracking/include/opencv2/tracking/tracker.hpp @@ -66,39 +66,49 @@ namespace cv { +//! @addtogroup tracking +//! @{ + /************************************ TrackerFeature Base Classes ************************************/ -/** - * \brief Abstract base class for TrackerFeature that represents the feature. +/** @brief Abstract base class for TrackerFeature that represents the feature. */ class CV_EXPORTS_W TrackerFeature { public: virtual ~TrackerFeature(); - /** - * \brief Compute the features in the images collection - * \param images The images. - * \param response Computed features. - */ + /** @brief Compute the features in the images collection + @param images The images + @param response The output response + */ void compute( const std::vector& images, Mat& response ); - /** - * \brief Create TrackerFeature by tracker feature type. - */ + /** @brief Create TrackerFeature by tracker feature type + @param trackerFeatureType The TrackerFeature name + + The modes available now: + + - "HAAR" -- Haar Feature-based + + The modes that will be available soon: + + - "HOG" -- Histogram of Oriented Gradients features + - "LBP" -- Local Binary Pattern features + - "FEATURE2D" -- All types of Feature2D + */ static Ptr create( const String& trackerFeatureType ); - /** - * \brief Identify most effective features - * \param response Collection of response for the specific TrackerFeature - * \param npoints Max number of features - */ + /** @brief Identify most effective features + @param response Collection of response for the specific TrackerFeature + @param npoints Max number of features + + @note This method modifies the response parameter + */ virtual void selection( Mat& response, int npoints ) = 0; - /** - * \brief Get the name of the specific tracker feature - * \return The name of the tracker feature - */ + /** @brief Get the name of the specific TrackerFeature + */ String getClassName() const; protected: @@ -108,10 +118,17 @@ class CV_EXPORTS_W TrackerFeature String className; }; -/** - * \brief Class that manages the extraction and selection of features - * [AAM] Feature Extraction and Feature Set Refinement (Feature Processing and Feature Selection). See table I and section III C - * [AMVOT] Appearance modelling -> Visual representation (Table II, section 3.1 - 3.2) +/** @brief Class that manages the extraction and selection of features + +@cite AAM Feature Extraction and Feature Set Refinement (Feature Processing and Feature Selection). +See table I and section III C @cite AMVOT Appearance modelling -\> Visual representation (Table II, +section 3.1 - 3.2) + +TrackerFeatureSet is an aggregation of TrackerFeature + +@sa + TrackerFeature + */ class CV_EXPORTS_W TrackerFeatureSet { @@ -121,46 +138,59 @@ class CV_EXPORTS_W TrackerFeatureSet ~TrackerFeatureSet(); - /** - * \brief Extract features from the images collection - * \param images The images - */ + /** @brief Extract features from the images collection + @param images The input images + */ void extraction( const std::vector& images ); - /** - * \brief Identify most effective features for all feature types - */ + /** @brief Identify most effective features for all feature types (optional) + */ void selection(); - /** - * \brief Remove outliers for all feature types - */ + /** @brief Remove outliers for all feature types (optional) + */ void removeOutliers(); - /** - * \brief Add TrackerFeature in the collection from tracker feature type - * \param trackerFeatureType the tracker feature type FEATURE2D.DETECTOR.DESCRIPTOR - HOG - HAAR - LBP - * \return true if feature is added, false otherwise - */ + /** @brief Add TrackerFeature in the collection. Return true if TrackerFeature is added, false otherwise + @param trackerFeatureType The TrackerFeature name + + The modes available now: + + - "HAAR" -- Haar Feature-based + + The modes that will be available soon: + + - "HOG" -- Histogram of Oriented Gradients features + - "LBP" -- Local Binary Pattern features + - "FEATURE2D" -- All types of Feature2D + + Example TrackerFeatureSet::addTrackerFeature : : + @code + //sample usage: + + Ptr trackerFeature = new TrackerFeatureHAAR( HAARparameters ); + featureSet->addTrackerFeature( trackerFeature ); + + //or add CSC sampler with default parameters + //featureSet->addTrackerFeature( "HAAR" ); + @endcode + @note If you use the second method, you must initialize the TrackerFeature + */ bool addTrackerFeature( String trackerFeatureType ); - /** - * \brief Add TrackerFeature in collection directly - * \param feature The TrackerFeature - * \return true if feature is added, false otherwise - */ + /** @overload + @param feature The TrackerFeature class + */ bool addTrackerFeature( Ptr& feature ); - /** - * \brief Get the TrackerFeature collection - * \return The TrackerFeature collection - */ + /** @brief Get the TrackerFeature collection (TrackerFeature name, TrackerFeature pointer) + */ const std::vector > >& getTrackerFeature() const; - /** - * \brief Get the responses - * \return the responses - */ + /** @brief Get the responses + + @note Be sure to call extraction before getResponses Example TrackerFeatureSet::getResponses : : + */ const std::vector& getResponses() const; private: @@ -175,8 +205,8 @@ class CV_EXPORTS_W TrackerFeatureSet /************************************ TrackerSampler Base Classes ************************************/ -/** - * \brief Abstract base class for TrackerSamplerAlgorithm that represents the algorithm for the specific sampler. +/** @brief Abstract base class for TrackerSamplerAlgorithm that represents the algorithm for the specific +sampler. */ class CV_EXPORTS_W TrackerSamplerAlgorithm { @@ -186,24 +216,29 @@ class CV_EXPORTS_W TrackerSamplerAlgorithm */ virtual ~TrackerSamplerAlgorithm(); - /** - * \brief Create TrackerSamplerAlgorithm by tracker sampler type. - */ + /** @brief Create TrackerSamplerAlgorithm by tracker sampler type. + @param trackerSamplerType The trackerSamplerType name + + The modes available now: + + - "CSC" -- Current State Center + - "CS" -- Current State + */ static Ptr create( const String& trackerSamplerType ); - /** - * \brief Computes the regions starting from a position in an image - * \param image The image - * \param boundingBox The bounding box from which regions can be calculated - * \param sample The computed samples [AAM] Fig. 1 variable Sk - * \return true if samples are computed, false otherwise - */ + /** @brief Computes the regions starting from a position in an image. + + Return true if samples are computed, false otherwise + + @param image The current frame + @param boundingBox The bounding box from which regions can be calculated + + @param sample The computed samples @cite AAM Fig. 1 variable Sk + */ bool sampling( const Mat& image, Rect boundingBox, std::vector& sample ); - /** - * \brief Get the name of the specific sampler algorithm - * \return The name of the tracker sampler algorithm - */ + /** @brief Get the name of the specific TrackerSamplerAlgorithm + */ String getClassName() const; protected: @@ -216,6 +251,15 @@ class CV_EXPORTS_W TrackerSamplerAlgorithm * \brief Class that manages the sampler in order to select regions for the update the model of the tracker * [AAM] Sampling e Labeling. See table I and section III B */ + +/** @brief Class that manages the sampler in order to select regions for the update the model of the tracker + +@cite AAM Sampling e Labeling. See table I and section III B + +TrackerSampler is an aggregation of TrackerSamplerAlgorithm +@sa + TrackerSamplerAlgorithm + */ class CV_EXPORTS_W TrackerSampler { public: @@ -230,37 +274,46 @@ class CV_EXPORTS_W TrackerSampler */ ~TrackerSampler(); - /** - * \brief Computes the regions starting from a position in an image - * \param image The image - * \param boundingBox The bounding box from which regions can be calculated - */ + /** @brief Computes the regions starting from a position in an image + @param image The current frame + @param boundingBox The bounding box from which regions can be calculated + */ void sampling( const Mat& image, Rect boundingBox ); - /** - * Get the all samplers - * \return The samplers - */ + /** @brief Return the collection of the TrackerSamplerAlgorithm + */ const std::vector > >& getSamplers() const; - /** - * Get the samples from all TrackerSamplerAlgorithm - * \return The samples [AAM] Fig. 1 variable Sk - */ + /** @brief Return the samples from all TrackerSamplerAlgorithm, @cite AAM Fig. 1 variable Sk + */ const std::vector& getSamples() const; - /** - * \brief Add TrackerSamplerAlgorithm in the collection from tracker sampler type - * \param trackerSamplerAlgorithmType the tracker sampler type CSC - CS - * \return true if sampler is added, false otherwise - */ + /** @brief Add TrackerSamplerAlgorithm in the collection. Return true if sampler is added, false otherwise + @param trackerSamplerAlgorithmType The TrackerSamplerAlgorithm name + + The modes available now: + - "CSC" -- Current State Center + - "CS" -- Current State + - "PF" -- Particle Filtering + + Example TrackerSamplerAlgorithm::addTrackerSamplerAlgorithm : : + @code + TrackerSamplerCSC::Params CSCparameters; + Ptr CSCSampler = new TrackerSamplerCSC( CSCparameters ); + + if( !sampler->addTrackerSamplerAlgorithm( CSCSampler ) ) + return false; + + //or add CSC sampler with default parameters + //sampler->addTrackerSamplerAlgorithm( "CSC" ); + @endcode + @note If you use the second method, you must initialize the TrackerSamplerAlgorithm + */ bool addTrackerSamplerAlgorithm( String trackerSamplerAlgorithmType ); - /** - * \brief Add TrackerSamplerAlgorithm in collection directly - * \param sampler The TrackerSamplerAlgorithm - * \return true if sampler is added, false otherwise - */ + /** @overload + @param sampler The TrackerSamplerAlgorithm + */ bool addTrackerSamplerAlgorithm( Ptr& sampler ); private: @@ -273,10 +326,12 @@ class CV_EXPORTS_W TrackerSampler /************************************ TrackerModel Base Classes ************************************/ -/** - * \brief Abstract base class for TrackerTargetState that represents a possible state of the target - * [AAM] x̄_i all the states candidates - * Inherits this with your Target state +/** @brief Abstract base class for TrackerTargetState that represents a possible state of the target. + +See @cite AAM \f$\hat{x}^{i}_{k}\f$ all the states candidates. + +Inherits this class with your Target state, In own implementation you can add scale variation, +width, height, orientation, etc. */ class CV_EXPORTS_W TrackerTargetState { @@ -326,50 +381,57 @@ class CV_EXPORTS_W TrackerTargetState }; -/** - * \brief Represents the model of the target at frame k (all states and scores) - * [AAM] The set of the pair (x̄_k(i), C_k(i)) +/** @brief Represents the model of the target at frame \f$k\f$ (all states and scores) + +See @cite AAM The set of the pair \f$\langle \hat{x}^{i}_{k}, C^{i}_{k} \rangle\f$ +@sa TrackerTargetState */ typedef std::vector, float> > ConfidenceMap; -/** - * \brief Represents the estimate states for all frames - * [AAM] Xk is the trajectory of the target up to time k +/** @brief Represents the estimate states for all frames + +@cite AAM \f$x_{k}\f$ is the trajectory of the target up to time \f$k\f$ + +@sa TrackerTargetState */ typedef std::vector > Trajectory; -/** - * \brief Abstract base class for TrackerStateEstimator that estimates the most likely target state. - * [AAM] State estimator - * [AMVOT] Statistical modeling (Fig. 3), Table III (generative) - IV (discriminative) - V (hybrid) +/** @brief Abstract base class for TrackerStateEstimator that estimates the most likely target state. + +See @cite AAM State estimator + +See @cite AMVOT Statistical modeling (Fig. 3), Table III (generative) - IV (discriminative) - V (hybrid) */ class CV_EXPORTS_W TrackerStateEstimator { public: virtual ~TrackerStateEstimator(); - /** - * \brief Estimate the most likely target state - * \param confidenceMaps The overall appearance model - * \return The estimated state - */ + /** @brief Estimate the most likely target state, return the estimated state + @param confidenceMaps The overall appearance model as a list of :cConfidenceMap + */ Ptr estimate( const std::vector& confidenceMaps ); - /** - * \brief Update the ConfidenceMap with the scores - * \param confidenceMaps The overall appearance model - */ + /** @brief Update the ConfidenceMap with the scores + @param confidenceMaps The overall appearance model as a list of :cConfidenceMap + */ void update( std::vector& confidenceMaps ); - /** - * \brief Create TrackerStateEstimator by tracker state estimator type SVM - BOOSTING. - */ + /** @brief Create TrackerStateEstimator by tracker state estimator type + @param trackeStateEstimatorType The TrackerStateEstimator name + + The modes available now: + + - "BOOSTING" -- Boosting-based discriminative appearance models. See @cite AMVOT section 4.4 + + The modes available soon: + + - "SVM" -- SVM-based discriminative appearance models. See @cite AMVOT section 4.5 + */ static Ptr create( const String& trackeStateEstimatorType ); - /** - * \brief Get the name of the specific state estimator - * \return The name of the state estimator - */ + /** @brief Get the name of the specific TrackerStateEstimator + */ String getClassName() const; protected: @@ -379,9 +441,12 @@ class CV_EXPORTS_W TrackerStateEstimator String className; }; -/** - * \brief Abstract class that represents the model of the target. It must be instantiated by specialized tracker - * [AAM] Ak +/** @brief Abstract class that represents the model of the target. It must be instantiated by specialized +tracker + +See @cite AAM Ak + +Inherits this with your TrackerModel */ class CV_EXPORTS_W TrackerModel { @@ -397,59 +462,48 @@ class CV_EXPORTS_W TrackerModel */ virtual ~TrackerModel(); - /** - * \brief Set TrackerEstimator - * \return true if the tracker state estimator is added, false otherwise - */ + /** @brief Set TrackerEstimator, return true if the tracker state estimator is added, false otherwise + @param trackerStateEstimator The TrackerStateEstimator + @note You can add only one TrackerStateEstimator + */ bool setTrackerStateEstimator( Ptr trackerStateEstimator ); - /** - * \brief Estimate the most likely target location - * [AAM] ME, Model Estimation table I - * \param responses Features extracted - */ + /** @brief Estimate the most likely target location + + @cite AAM ME, Model Estimation table I + @param responses Features extracted from TrackerFeatureSet + */ void modelEstimation( const std::vector& responses ); - /** - * \brief Update the model - * [AAM] MU, Model Update table I - */ + /** @brief Update the model + + @cite AAM MU, Model Update table I + */ void modelUpdate(); - /** - * \brief Run the TrackerStateEstimator - * \return true if is possible to estimate a new state, false otherwise - */ + /** @brief Run the TrackerStateEstimator, return true if is possible to estimate a new state, false otherwise + */ bool runStateEstimator(); - /** - * \brief Set the current estimated state - * \param lastTargetState the current estimated state - */ + /** @brief Set the current TrackerTargetState in the Trajectory + @param lastTargetState The current TrackerTargetState + */ void setLastTargetState( const Ptr& lastTargetState ); - /** - * \brief Get the last target state - * \return The last target state - */ + /** @brief Get the last TrackerTargetState from Trajectory + */ Ptr getLastTargetState() const; - /** - * \brief Get the list of the confidence map - * \return The list of the confidence map - */ + /** @brief Get the list of the ConfidenceMap + */ const std::vector& getConfidenceMaps() const; - /** - * \brief Get the last confidence map - * \return The the last confidence map - */ + /** @brief Get the last ConfidenceMap for the current frame + */ const ConfidenceMap& getLastConfidenceMap() const; - /** - * \brief Get the tracker state estimator - * \return The tracker state estimator - */ + /** @brief Get the TrackerStateEstimator + */ Ptr getTrackerStateEstimator() const; private: @@ -470,8 +524,7 @@ class CV_EXPORTS_W TrackerModel /************************************ Tracker Base Class ************************************/ -/** - * \brief Abstract base class for Tracker algorithm. +/** @brief Base abstract class for the long-term tracker: */ class CV_EXPORTS_W Tracker : public virtual Algorithm { @@ -479,25 +532,33 @@ class CV_EXPORTS_W Tracker : public virtual Algorithm virtual ~Tracker(); - /** - * \brief Initialize the tracker at the first frame. - * \param image The image. - * \param boundingBox The bounding box. - * \return true the tracker is initialized, false otherwise - */ + /** @brief Initialize the tracker with a know bounding box that surrounding the target + @param image The initial frame + @param boundingBox The initial boundig box + + @return True if initialization went succesfully, false otherwise + */ bool init( const Mat& image, const Rect2d& boundingBox ); - /** - * \brief Update the tracker at the next frames. - * \param image The image. - * \param boundingBox The bounding box. - * \return true the tracker is updated, false otherwise - */ + /** @brief Update the tracker, find the new most likely bounding box for the target + @param image The current frame + @param boundingBox The boundig box that represent the new target location, if true was returned, not + modified otherwise + + @return True means that target was located and false means that tracker cannot locate target in + current frame. Note, that latter *does not* imply that tracker has failed, maybe target is indeed + missing from the frame (say, out of sight) + */ bool update( const Mat& image, Rect2d& boundingBox ); - /** - * \brief Create tracker by tracker type MIL - BOOSTING. - */ + /** @brief Creates a tracker by its name. + @param trackerType Tracker type + + The following detector types are supported: + + - "MIL" -- TrackerMIL + - "BOOSTING" -- TrackerBoosting + */ static Ptr create( const String& trackerType ); virtual void read( const FileNode& fn )=0; @@ -518,9 +579,8 @@ class CV_EXPORTS_W Tracker : public virtual Algorithm /************************************ Specific TrackerStateEstimator Classes ************************************/ -/** - * \brief TrackerStateEstimator based on MILBoosting - */ +/** @brief TrackerStateEstimator based on Boosting + */ class CV_EXPORTS_W TrackerStateEstimatorMILBoosting : public TrackerStateEstimator { public: @@ -550,12 +610,19 @@ class CV_EXPORTS_W TrackerStateEstimatorMILBoosting : public TrackerStateEstimat } ; - /** - * setters and getters + /** @brief Set label: true for target foreground, false for background + @param foreground Label for background/foreground */ void setTargetFg( bool foreground ); + /** @brief Set the features extracted from TrackerFeatureSet + @param features The features extracted + */ void setFeatures( const Mat& features ); + /** @brief Get the label. Return true for target foreground, false for background + */ bool isTargetFg() const; + /** @brief Get the features extracted + */ Mat getFeatures() const; private: @@ -563,9 +630,15 @@ class CV_EXPORTS_W TrackerStateEstimatorMILBoosting : public TrackerStateEstimat Mat targetFeatures; }; + /** @brief Constructor + @param nFeatures Number of features for each sample + */ TrackerStateEstimatorMILBoosting( int nFeatures = 250 ); ~TrackerStateEstimatorMILBoosting(); + /** @brief Set the current confidenceMap + @param confidenceMap The current :cConfidenceMap + */ void setCurrentConfidenceMap( ConfidenceMap& confidenceMap ); protected: @@ -583,12 +656,13 @@ class CV_EXPORTS_W TrackerStateEstimatorMILBoosting : public TrackerStateEstimat ConfidenceMap currentConfidenceMap; }; -/** - * \brief TrackerStateEstimator based on AdaBoosting +/** @brief TrackerStateEstimatorAdaBoosting based on ADA-Boosting */ class CV_EXPORTS_W TrackerStateEstimatorAdaBoosting : public TrackerStateEstimator { public: + /** @brief Implementation of the target state for TrackerAdaBoostingTargetState + */ class TrackerAdaBoostingTargetState : public TrackerTargetState { @@ -611,12 +685,19 @@ class CV_EXPORTS_W TrackerStateEstimatorAdaBoosting : public TrackerStateEstimat } ; - /** - * setters and getters + /** @brief Set the features extracted from TrackerFeatureSet + @param responses The features extracted */ void setTargetResponses( const Mat& responses ); + /** @brief Set label: true for target foreground, false for background + @param foreground Label for background/foreground + */ void setTargetFg( bool foreground ); + /** @brief Get the features extracted + */ Mat getTargetResponses() const; + /** @brief Get the label. Return true for target foreground, false for background + */ bool isTargetFg() const; private: @@ -625,14 +706,13 @@ class CV_EXPORTS_W TrackerStateEstimatorAdaBoosting : public TrackerStateEstimat }; - /** - * \brief Constructor - * \param numClassifer Number of base classifiers - * \param initIterations Number of iterations in the initialization - * \param nFeatures Number of features/weak classifiers - * \param patchSize tracking rect - * \param ROI initial ROI - */ + /** @brief Constructor + @param numClassifer Number of base classifiers + @param initIterations Number of iterations in the initialization + @param nFeatures Number of features/weak classifiers + @param patchSize tracking rect + @param ROI initial ROI + */ TrackerStateEstimatorAdaBoosting( int numClassifer, int initIterations, int nFeatures, Size patchSize, const Rect& ROI ); /** @@ -640,40 +720,30 @@ class CV_EXPORTS_W TrackerStateEstimatorAdaBoosting : public TrackerStateEstimat */ ~TrackerStateEstimatorAdaBoosting(); - /** - * \brief Get the sampling ROI - * \return the sampling ROI - */ + /** @brief Get the sampling ROI + */ Rect getSampleROI() const; - /** - * \brief Set the sampling ROI - * \param ROI the sampling ROI - */ + /** @brief Set the sampling ROI + @param ROI the sampling ROI + */ void setSampleROI( const Rect& ROI ); - /** - * \brief Set the current confidence map - * \param confidenceMap the current confidence map - */ + /** @brief Set the current confidenceMap + @param confidenceMap The current :cConfidenceMap + */ void setCurrentConfidenceMap( ConfidenceMap& confidenceMap ); - /** - * \brief Get the list of the selected weak classifiers for the classification step - * \return the list of the selected weak classifiers - */ + /** @brief Get the list of the selected weak classifiers for the classification step + */ std::vector computeSelectedWeakClassifier(); - /** - * \brief Get the list of the weak classifiers that should be replaced - * \return the list of the weak classifiers - */ + /** @brief Get the list of the weak classifiers that should be replaced + */ std::vector computeReplacedClassifier(); - /** - * \brief Get the list of the weak classifiers that replace those to be replaced - * \return the list of the weak classifiers - */ + /** @brief Get the list of the weak classifiers that replace those to be replaced + */ std::vector computeSwappedClassifier(); protected: @@ -711,37 +781,47 @@ class CV_EXPORTS_W TrackerStateEstimatorSVM : public TrackerStateEstimator /************************************ Specific TrackerSamplerAlgorithm Classes ************************************/ -/** - * \brief TrackerSampler based on CSC (current state centered) +/** @brief TrackerSampler based on CSC (current state centered), used by MIL algorithm TrackerMIL */ class CV_EXPORTS_W TrackerSamplerCSC : public TrackerSamplerAlgorithm { public: enum { - MODE_INIT_POS = 1, // mode for init positive samples - MODE_INIT_NEG = 2, // mode for init negative samples - MODE_TRACK_POS = 3, // mode for update positive samples - MODE_TRACK_NEG = 4, // mode for update negative samples - MODE_DETECT = 5 // mode for detect samples + MODE_INIT_POS = 1, //!< mode for init positive samples + MODE_INIT_NEG = 2, //!< mode for init negative samples + MODE_TRACK_POS = 3, //!< mode for update positive samples + MODE_TRACK_NEG = 4, //!< mode for update negative samples + MODE_DETECT = 5 //!< mode for detect samples }; struct CV_EXPORTS Params { Params(); - float initInRad; // radius for gathering positive instances during init - float trackInPosRad; // radius for gathering positive instances during tracking - float searchWinSize; // size of search window - int initMaxNegNum; // # negative samples to use during init - int trackMaxPosNum; // # positive samples to use during training - int trackMaxNegNum; // # negative samples to use during training + float initInRad; //!< radius for gathering positive instances during init + float trackInPosRad; //!< radius for gathering positive instances during tracking + float searchWinSize; //!< size of search window + int initMaxNegNum; //!< # negative samples to use during init + int trackMaxPosNum; //!< # positive samples to use during training + int trackMaxNegNum; //!< # negative samples to use during training }; + /** @brief Constructor + @param parameters TrackerSamplerCSC parameters TrackerSamplerCSC::Params + */ TrackerSamplerCSC( const TrackerSamplerCSC::Params ¶meters = TrackerSamplerCSC::Params() ); - /** - * \brief set the sampling mode - */ + /** @brief Set the sampling mode of TrackerSamplerCSC + @param samplingMode The sampling mode + + The modes are: + + - "MODE_INIT_POS = 1" -- for the positive sampling in initialization step + - "MODE_INIT_NEG = 2" -- for the negative sampling in initialization step + - "MODE_TRACK_POS = 3" -- for the positive sampling in update step + - "MODE_TRACK_NEG = 4" -- for the negative sampling in update step + - "MODE_DETECT = 5" -- for the sampling in detection step + */ void setMode( int samplingMode ); ~TrackerSamplerCSC(); @@ -759,30 +839,38 @@ class CV_EXPORTS_W TrackerSamplerCSC : public TrackerSamplerAlgorithm std::vector sampleImage( const Mat& img, int x, int y, int w, int h, float inrad, float outrad = 0, int maxnum = 1000000 ); }; -/** - * \brief TrackerSampler based on CS (current state) +/** @brief TrackerSampler based on CS (current state), used by algorithm TrackerBoosting */ class CV_EXPORTS_W TrackerSamplerCS : public TrackerSamplerAlgorithm { public: enum { - MODE_POSITIVE = 1, // mode for positive samples - MODE_NEGATIVE = 2, // mode for negative samples - MODE_CLASSIFY = 3 // mode for classify samples + MODE_POSITIVE = 1, //!< mode for positive samples + MODE_NEGATIVE = 2, //!< mode for negative samples + MODE_CLASSIFY = 3 //!< mode for classify samples }; struct CV_EXPORTS Params { Params(); - float overlap; //overlapping for the search windows - float searchFactor; //search region parameter + float overlap; //! std; + int iterationNum; //!< number of selection rounds + int particlesNum; //!< number of "perturbed" boxes on each round + double alpha; //!< with each new round we exponentially decrease the amount of "perturbing" we allow (like in simulated annealing) + //!< and this very alpha controls how fast annealing happens, ie. how fast perturbing decreases + Mat_ std; //!< initial values for perturbing (1-by-4 array, as each rectangle is given by 4 values -- coordinates of opposite vertices, + //!< hence we have 4 values to perturb) }; + /** @brief Constructor + @param chosenRect Initial rectangle, that is supposed to contain target we'd like to track. + @param parameters + */ TrackerSamplerPF(const Mat& chosenRect,const TrackerSamplerPF::Params ¶meters = TrackerSamplerPF::Params()); protected: bool samplingImpl( const Mat& image, Rect boundingBox, std::vector& sample ); @@ -871,8 +986,8 @@ class CV_EXPORTS_W TrackerFeatureHOG : public TrackerFeature }; -/** - * \brief TrackerFeature based on HAAR +/** @brief TrackerFeature based on HAAR features, used by TrackerMIL and many others algorithms +@note HAAR features implementation is copied from apps/traincascade and modified according to MIL */ class CV_EXPORTS_W TrackerFeatureHAAR : public TrackerFeature { @@ -880,44 +995,48 @@ class CV_EXPORTS_W TrackerFeatureHAAR : public TrackerFeature struct CV_EXPORTS Params { Params(); - int numFeatures; // # of rects - Size rectSize; // rect size - bool isIntegral; // true if input images are integral, false otherwise + int numFeatures; //!< # of rects + Size rectSize; //!< rect size + bool isIntegral; //!< true if input images are integral, false otherwise }; + /** @brief Constructor + @param parameters TrackerFeatureHAAR parameters TrackerFeatureHAAR::Params + */ TrackerFeatureHAAR( const TrackerFeatureHAAR::Params ¶meters = TrackerFeatureHAAR::Params() ); ~TrackerFeatureHAAR(); - /** - * \brief Compute the features only for the selected indices in the images collection - * \param selFeatures indices of selected features - * \param images The images. - * \param response Computed features. - */ + /** @brief Compute the features only for the selected indices in the images collection + @param selFeatures indices of selected features + @param images The images + @param response Collection of response for the specific TrackerFeature + */ bool extractSelected( const std::vector selFeatures, const std::vector& images, Mat& response ); + /** @brief Identify most effective features + @param response Collection of response for the specific TrackerFeature + @param npoints Max number of features + + @note This method modifies the response parameter + */ void selection( Mat& response, int npoints ); - /** - * \brief Swap the feature in position source with the feature in position target - * \param source The source position - * \param target The target position - */ + /** @brief Swap the feature in position source with the feature in position target + @param source The source position + @param target The target position + */ bool swapFeature( int source, int target ); - /** - * \brief Swap the feature in position id with the feature input - * \param id The position - * \param feature The feature - */ + /** @brief Swap the feature in position id with the feature input + @param id The position + @param feature The feature + */ bool swapFeature( int id, CvHaarEvaluator::FeatureHaar& feature ); - /** - * \brief Get the feature - * \param id The position - * \return the feature in position id - */ + /** @brief Get the feature in position id + @param id The position + */ CvHaarEvaluator::FeatureHaar& getFeatureAt( int id ); protected: @@ -950,11 +1069,14 @@ class CV_EXPORTS_W TrackerFeatureLBP : public TrackerFeature /************************************ Specific Tracker Classes ************************************/ -/** - \brief TrackerMIL implementation. - For more details see B Babenko, MH Yang, S Belongie, Visual Tracking with Online Multiple Instance Learning - */ +/** @brief The MIL algorithm trains a classifier in an online manner to separate the object from the +background. +Multiple Instance Learning avoids the drift problem for a robust tracking. The implementation is +based on @cite MIL. + +Original code can be found here + */ class CV_EXPORTS_W TrackerMIL : public Tracker { public: @@ -962,24 +1084,28 @@ class CV_EXPORTS_W TrackerMIL : public Tracker { Params(); //parameters for sampler - float samplerInitInRadius; // radius for gathering positive instances during init - int samplerInitMaxNegNum; // # negative samples to use during init - float samplerSearchWinSize; // size of search window - float samplerTrackInRadius; // radius for gathering positive instances during tracking - int samplerTrackMaxPosNum; // # positive samples to use during tracking - int samplerTrackMaxNegNum; // # negative samples to use during tracking - int featureSetNumFeatures; // #features + float samplerInitInRadius; //!< radius for gathering positive instances during init + int samplerInitMaxNegNum; //!< # negative samples to use during init + float samplerSearchWinSize; //!< size of search window + float samplerTrackInRadius; //!< radius for gathering positive instances during tracking + int samplerTrackMaxPosNum; //!< # positive samples to use during tracking + int samplerTrackMaxNegNum; //!< # negative samples to use during tracking + int featureSetNumFeatures; //!< # features void read( const FileNode& fn ); void write( FileStorage& fs ) const; }; + /** @brief Constructor + @param parameters MIL parameters TrackerMIL::Params + */ BOILERPLATE_CODE("MIL",TrackerMIL); }; -/** - \brief TrackerBoosting implementation. - For more details see H Grabner, M Grabner, H Bischof, Real-time tracking via on-line boosting +/** @brief This is a real-time object tracking based on a novel on-line version of the AdaBoost algorithm. + +The classifier uses the surrounding background as negative examples in update step to avoid the +drifting problem. The implementation is based on @cite OLB. */ class CV_EXPORTS_W TrackerBoosting : public Tracker { @@ -987,11 +1113,11 @@ class CV_EXPORTS_W TrackerBoosting : public Tracker struct CV_EXPORTS Params { Params(); - int numClassifiers; //the number of classifiers to use in a OnlineBoosting algorithm - float samplerOverlap; //search region parameters to use in a OnlineBoosting algorithm - float samplerSearchFactor; // search region parameters to use in a OnlineBoosting algorithm - int iterationInit; //the initial iterations - int featureSetNumFeatures; // #features + int numClassifiers; //!, the courtesy of the author Arthur Amarra, was used for the +reference purpose. */ class CV_EXPORTS_W TrackerMedianFlow : public Tracker { @@ -1017,15 +1151,29 @@ class CV_EXPORTS_W TrackerMedianFlow : public Tracker struct CV_EXPORTS Params { Params(); - int pointsInGrid; //square root of number of keypoints used; increase it to trade - //accurateness for speed; default value is sensible and recommended + int pointsInGrid; //! create(bool orientationNormalized = true, bool scaleNormalized = true, float patternScale = 22.0f, @@ -67,10 +100,7 @@ public: }; -/*! - The "Star" Detector. - - The class implements the keypoint detector introduced by K. Konolige. +/** @brief The class implements the keypoint detector introduced by @cite Agrawal08, synonym of StarDetector. : */ class CV_EXPORTS StarDetector : public FeatureDetector { @@ -85,12 +115,22 @@ public: /* * BRIEF Descriptor */ + +/** @brief Class for computing BRIEF descriptors described in @cite calon2010 + +@note + - A complete BRIEF extractor sample can be found at + opencv_source_code/samples/cpp/brief_match_test.cpp + + */ class CV_EXPORTS BriefDescriptorExtractor : public DescriptorExtractor { public: static Ptr create( int bytes = 32 ); }; +//! @} + } } diff --git a/modules/xfeatures2d/include/opencv2/xfeatures2d/cuda.hpp b/modules/xfeatures2d/include/opencv2/xfeatures2d/cuda.hpp index b05804d83..3dc57c7ff 100644 --- a/modules/xfeatures2d/include/opencv2/xfeatures2d/cuda.hpp +++ b/modules/xfeatures2d/include/opencv2/xfeatures2d/cuda.hpp @@ -47,6 +47,42 @@ namespace cv { namespace cuda { +//! @addtogroup xfeatures2d_nonfree +//! @{ + +/** @brief Class used for extracting Speeded Up Robust Features (SURF) from an image. : + +The class SURF_CUDA implements Speeded Up Robust Features descriptor. There is a fast multi-scale +Hessian keypoint detector that can be used to find the keypoints (which is the default option). But +the descriptors can also be computed for the user-specified keypoints. Only 8-bit grayscale images +are supported. + +The class SURF_CUDA can store results in the GPU and CPU memory. It provides functions to convert +results between CPU and GPU version ( uploadKeypoints, downloadKeypoints, downloadDescriptors ). The +format of CPU results is the same as SURF results. GPU results are stored in GpuMat. The keypoints +matrix is \f$\texttt{nFeatures} \times 7\f$ matrix with the CV_32FC1 type. + +- keypoints.ptr\(X_ROW)[i] contains x coordinate of the i-th feature. +- keypoints.ptr\(Y_ROW)[i] contains y coordinate of the i-th feature. +- keypoints.ptr\(LAPLACIAN_ROW)[i] contains the laplacian sign of the i-th feature. +- keypoints.ptr\(OCTAVE_ROW)[i] contains the octave of the i-th feature. +- keypoints.ptr\(SIZE_ROW)[i] contains the size of the i-th feature. +- keypoints.ptr\(ANGLE_ROW)[i] contain orientation of the i-th feature. +- keypoints.ptr\(HESSIAN_ROW)[i] contains the response of the i-th feature. + +The descriptors matrix is \f$\texttt{nFeatures} \times \texttt{descriptorSize}\f$ matrix with the +CV_32FC1 type. + +The class SURF_CUDA uses some buffers and provides access to it. All buffers can be safely released +between function calls. + +@sa SURF + +@note + - An example for using the SURF keypoint matcher on GPU can be found at + opencv_source_code/samples/gpu/surf_keypoint_matcher.cpp + + */ class CV_EXPORTS SURF_CUDA { public: @@ -123,6 +159,8 @@ public: GpuMat maxPosBuffer; }; +//! @} + }} // namespace cv { namespace cuda { #endif // __OPENCV_XFEATURES2D_CUDA_HPP__ diff --git a/modules/xfeatures2d/include/opencv2/xfeatures2d/nonfree.hpp b/modules/xfeatures2d/include/opencv2/xfeatures2d/nonfree.hpp index 6278bb246..eae1bc5d0 100644 --- a/modules/xfeatures2d/include/opencv2/xfeatures2d/nonfree.hpp +++ b/modules/xfeatures2d/include/opencv2/xfeatures2d/nonfree.hpp @@ -50,14 +50,32 @@ namespace cv namespace xfeatures2d { -/*! - SIFT implementation. +//! @addtogroup xfeatures2d_nonfree +//! @{ - The class implements SIFT algorithm by D. Lowe. -*/ +/** @brief Class for extracting keypoints and computing descriptors using the Scale Invariant Feature Transform +(SIFT) algorithm by D. Lowe @cite Lowe04. + */ class CV_EXPORTS_W SIFT : public Feature2D { public: + /** + @param nfeatures The number of best features to retain. The features are ranked by their scores + (measured in SIFT algorithm as the local contrast) + + @param nOctaveLayers The number of layers in each octave. 3 is the value used in D. Lowe paper. The + number of octaves is computed automatically from the image resolution. + + @param contrastThreshold The contrast threshold used to filter out weak features in semi-uniform + (low-contrast) regions. The larger the threshold, the less features are produced by the detector. + + @param edgeThreshold The threshold used to filter out edge-like features. Note that the its meaning + is different from the contrastThreshold, i.e. the larger the edgeThreshold, the less features are + filtered out (more features are retained). + + @param sigma The sigma of the Gaussian applied to the input image at the octave \#0. If your image + is captured with a weak camera with soft lenses, you might want to reduce the number. + */ CV_WRAP static Ptr create( int nfeatures = 0, int nOctaveLayers = 3, double contrastThreshold = 0.04, double edgeThreshold = 10, double sigma = 1.6); @@ -66,14 +84,47 @@ public: typedef SIFT SiftFeatureDetector; typedef SIFT SiftDescriptorExtractor; -/*! - SURF implementation. - - The class implements SURF algorithm by H. Bay et al. +/** @brief Class for extracting Speeded Up Robust Features from an image @cite Bay06. + +The algorithm parameters: +- member int extended + - 0 means that the basic descriptors (64 elements each) shall be computed + - 1 means that the extended descriptors (128 elements each) shall be computed +- member int upright + - 0 means that detector computes orientation of each feature. + - 1 means that the orientation is not computed (which is much, much faster). For example, +if you match images from a stereo pair, or do image stitching, the matched features +likely have very similar angles, and you can speed up feature extraction by setting +upright=1. +- member double hessianThreshold +Threshold for the keypoint detector. Only features, whose hessian is larger than +hessianThreshold are retained by the detector. Therefore, the larger the value, the less +keypoints you will get. A good default value could be from 300 to 500, depending from the +image contrast. +- member int nOctaves +The number of a gaussian pyramid octaves that the detector uses. It is set to 4 by default. +If you want to get very large features, use the larger value. If you want just small +features, decrease it. +- member int nOctaveLayers +The number of images within each octave of a gaussian pyramid. It is set to 2 by default. +@note + - An example using the SURF feature detector can be found at + opencv_source_code/samples/cpp/generic_descriptor_match.cpp + - Another example using the SURF feature detector, extractor and matcher can be found at + opencv_source_code/samples/cpp/matcher_simple.cpp */ class CV_EXPORTS_W SURF : public Feature2D { public: + /** + @param hessianThreshold Threshold for hessian keypoint detector used in SURF. + @param nOctaves Number of pyramid octaves the keypoint detector will use. + @param nOctaveLayers Number of octave layers within each octave. + @param extended Extended descriptor flag (true - use extended 128-element descriptors; false - use + 64-element descriptors). + @param upright Up-right or rotated features flag (true - do not compute orientation of features; + false - compute orientation). + */ CV_WRAP static Ptr create(double hessianThreshold=100, int nOctaves = 4, int nOctaveLayers = 3, bool extended = false, bool upright = false); @@ -97,6 +148,8 @@ public: typedef SURF SurfFeatureDetector; typedef SURF SurfDescriptorExtractor; +//! @} + } } /* namespace cv */ diff --git a/modules/ximgproc/doc/ximgproc.bib b/modules/ximgproc/doc/ximgproc.bib new file mode 100644 index 000000000..224e01340 --- /dev/null +++ b/modules/ximgproc/doc/ximgproc.bib @@ -0,0 +1,48 @@ +@inproceedings{Dollar2013, + title={Structured forests for fast edge detection}, + author={Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={Computer Vision (ICCV), 2013 IEEE International Conference on}, + pages={1841--1848}, + year={2013}, + organization={IEEE} +} + +@incollection{VBRV14, + title={SEEDS: Superpixels extracted via energy-driven sampling}, + author={Van den Bergh, Michael and Boix, Xavier and Roig, Gemma and de Capitani, Benjamin and Van Gool, Luc}, + booktitle={Computer Vision--ECCV 2012}, + pages={13--26}, + year={2012}, + publisher={Springer} +} + +@inproceedings{Gastal11, + title={Domain transform for edge-aware image and video processing}, + author={Gastal, Eduardo SL and Oliveira, Manuel M}, + booktitle={ACM Transactions on Graphics (TOG)}, + volume={30}, + number={4}, + pages={69}, + year={2011}, + organization={ACM} +} + +@article{Gastal12, + title={Adaptive manifolds for real-time high-dimensional filtering}, + author={Gastal, Eduardo SL and Oliveira, Manuel M}, + journal={ACM Transactions on Graphics (TOG)}, + volume={31}, + number={4}, + pages={33}, + year={2012}, + publisher={ACM} +} + +@incollection{Kaiming10, + title={Guided image filtering}, + author={He, Kaiming and Sun, Jian and Tang, Xiaoou}, + booktitle={Computer Vision--ECCV 2010}, + pages={1--14}, + year={2010}, + publisher={Springer} +} diff --git a/modules/ximgproc/include/opencv2/ximgproc.hpp b/modules/ximgproc/include/opencv2/ximgproc.hpp index eb35ee8a6..69c0e7fe7 100644 --- a/modules/ximgproc/include/opencv2/ximgproc.hpp +++ b/modules/ximgproc/include/opencv2/ximgproc.hpp @@ -41,4 +41,17 @@ #include "ximgproc/structured_edge_detection.hpp" #include "ximgproc/seeds.hpp" +/** @defgroup ximgproc Extended Image Processing + @{ + @defgroup ximgproc_edge Structured forests for fast edge detection + +This module contains implementations of modern structured edge detection algorithms, i.e. algorithms +which somehow takes into account pixel affinities in natural images. + + @defgroup ximgproc_filters Filters + + @defgroup ximgproc_superpixel Superpixels + @} +*/ + #endif diff --git a/modules/ximgproc/include/opencv2/ximgproc/edge_filter.hpp b/modules/ximgproc/include/opencv2/ximgproc/edge_filter.hpp index 58fbde6e8..60132e0f2 100644 --- a/modules/ximgproc/include/opencv2/ximgproc/edge_filter.hpp +++ b/modules/ximgproc/include/opencv2/ximgproc/edge_filter.hpp @@ -45,6 +45,9 @@ namespace cv namespace ximgproc { +//! @addtogroup ximgproc_filters +//! @{ + enum EdgeAwareFiltersList { DTF_NC, @@ -56,51 +59,158 @@ enum EdgeAwareFiltersList }; -/*Interface for DT filters*/ +/** @brief Interface for realizations of Domain Transform filter. + +For more details about this filter see @cite Gastal11. + */ class CV_EXPORTS_W DTFilter : public Algorithm { public: + /** @brief Produce domain transform filtering operation on source image. + + @param src filtering image with unsigned 8-bit or floating-point 32-bit depth and up to 4 channels. + + @param dst destination image. + + @param dDepth optional depth of the output image. dDepth can be set to -1, which will be equivalent + to src.depth(). + */ CV_WRAP virtual void filter(InputArray src, OutputArray dst, int dDepth = -1) = 0; }; -/*Fabric function for DT filters*/ +/** @brief Factory method, create instance of DTFilter and produce initialization routines. + +@param guide guided image (used to build transformed distance, which describes edge structure of +guided image). + +@param sigmaSpatial \f${\sigma}_H\f$ parameter in the original article, it's similar to the sigma in the +coordinate space into bilateralFilter. + +@param sigmaColor \f${\sigma}_r\f$ parameter in the original article, it's similar to the sigma in the +color space into bilateralFilter. + +@param mode one form three modes DTF_NC, DTF_RF and DTF_IC which corresponds to three modes for +filtering 2D signals in the article. + +@param numIters optional number of iterations used for filtering, 3 is quite enough. + +For more details about Domain Transform filter parameters, see the original article @cite Gastal11 and +[Domain Transform filter homepage](http://www.inf.ufrgs.br/~eslgastal/DomainTransform/). + */ CV_EXPORTS_W Ptr createDTFilter(InputArray guide, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3); -/*One-line DT filter call*/ +/** @brief Simple one-line Domain Transform filter call. If you have multiple images to filter with the same +guided image then use DTFilter interface to avoid extra computations on initialization stage. + +@param guide guided image (also called as joint image) with unsigned 8-bit or floating-point 32-bit +depth and up to 4 channels. +@param src filtering image with unsigned 8-bit or floating-point 32-bit depth and up to 4 channels. +@param dst +@param sigmaSpatial \f${\sigma}_H\f$ parameter in the original article, it's similar to the sigma in the +coordinate space into bilateralFilter. +@param sigmaColor \f${\sigma}_r\f$ parameter in the original article, it's similar to the sigma in the +color space into bilateralFilter. +@param mode one form three modes DTF_NC, DTF_RF and DTF_IC which corresponds to three modes for +filtering 2D signals in the article. +@param numIters optional number of iterations used for filtering, 3 is quite enough. +@sa bilateralFilter, guidedFilter, amFilter + */ CV_EXPORTS_W void dtFilter(InputArray guide, InputArray src, OutputArray dst, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3); ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// -/*Interface for Guided Filter*/ +/** @brief Interface for realizations of Guided Filter. + +For more details about this filter see @cite Kaiming10. + */ class CV_EXPORTS_W GuidedFilter : public Algorithm { public: + /** @brief Apply Guided Filter to the filtering image. + + @param src filtering image with any numbers of channels. + + @param dst output image. + + @param dDepth optional depth of the output image. dDepth can be set to -1, which will be equivalent + to src.depth(). + */ CV_WRAP virtual void filter(InputArray src, OutputArray dst, int dDepth = -1) = 0; }; -/*Fabric function for Guided Filter*/ +/** @brief Factory method, create instance of GuidedFilter and produce initialization routines. + +@param guide guided image (or array of images) with up to 3 channels, if it have more then 3 +channels then only first 3 channels will be used. + +@param radius radius of Guided Filter. + +@param eps regularization term of Guided Filter. \f${eps}^2\f$ is similar to the sigma in the color +space into bilateralFilter. + +For more details about Guided Filter parameters, see the original article @cite Kaiming10. + */ CV_EXPORTS_W Ptr createGuidedFilter(InputArray guide, int radius, double eps); -/*One-line Guided Filter call*/ +/** @brief Simple one-line Guided Filter call. + +If you have multiple images to filter with the same guided image then use GuidedFilter interface to +avoid extra computations on initialization stage. + +@param guide guided image (or array of images) with up to 3 channels, if it have more then 3 +channels then only first 3 channels will be used. + +@param src filtering image with any numbers of channels. + +@param dst output image. + +@param radius radius of Guided Filter. + +@param eps regularization term of Guided Filter. \f${eps}^2\f$ is similar to the sigma in the color +space into bilateralFilter. + +@param dDepth optional depth of the output image. + +@sa bilateralFilter, dtFilter, amFilter */ CV_EXPORTS_W void guidedFilter(InputArray guide, InputArray src, OutputArray dst, int radius, double eps, int dDepth = -1); ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// +/** @brief Interface for Adaptive Manifold Filter realizations. + +For more details about this filter see @cite Gastal12 and References_. + +Below listed optional parameters which may be set up with Algorithm::set function. +- member double sigma_s = 16.0 +Spatial standard deviation. +- member double sigma_r = 0.2 +Color space standard deviation. +- member int tree_height = -1 +Height of the manifold tree (default = -1 : automatically computed). +- member int num_pca_iterations = 1 +Number of iterations to computed the eigenvector. +- member bool adjust_outliers = false +Specify adjust outliers using Eq. 9 or not. +- member bool use_RNG = true +Specify use random number generator to compute eigenvector or not. + */ class CV_EXPORTS_W AdaptiveManifoldFilter : public Algorithm { public: - /** - * @brief Apply High-dimensional filtering using adaptive manifolds - * @param src Input image to be filtered. - * @param dst Adaptive-manifold filter response. - * @param joint Image for joint filtering (optional). - */ + /** @brief Apply high-dimensional filtering using adaptive manifolds. + + @param src filtering image with any numbers of channels. + + @param dst output image. + + @param joint optional joint (also called as guided) image with any numbers of channels. + */ CV_WRAP virtual void filter(InputArray src, OutputArray dst, InputArray joint = noArray()) = 0; CV_WRAP virtual void collectGarbage() = 0; @@ -108,18 +218,81 @@ public: CV_WRAP static Ptr create(); }; -//Fabric function for AM filter algorithm +/** @brief Factory method, create instance of AdaptiveManifoldFilter and produce some initialization routines. + +@param sigma_s spatial standard deviation. + +@param sigma_r color space standard deviation, it is similar to the sigma in the color space into +bilateralFilter. + +@param adjust_outliers optional, specify perform outliers adjust operation or not, (Eq. 9) in the +original paper. + +For more details about Adaptive Manifold Filter parameters, see the original article @cite Gastal12. + +@note Joint images with CV_8U and CV_16U depth converted to images with CV_32F depth and [0; 1] +color range before processing. Hence color space sigma sigma_r must be in [0; 1] range, unlike same +sigmas in bilateralFilter and dtFilter functions. +*/ CV_EXPORTS_W Ptr createAMFilter(double sigma_s, double sigma_r, bool adjust_outliers = false); -//One-line Adaptive Manifold filter call +/** @brief Simple one-line Adaptive Manifold Filter call. + +@param joint joint (also called as guided) image or array of images with any numbers of channels. + +@param src filtering image with any numbers of channels. + +@param dst output image. + +@param sigma_s spatial standard deviation. + +@param sigma_r color space standard deviation, it is similar to the sigma in the color space into +bilateralFilter. + +@param adjust_outliers optional, specify perform outliers adjust operation or not, (Eq. 9) in the +original paper. + +@note Joint images with CV_8U and CV_16U depth converted to images with CV_32F depth and [0; 1] +color range before processing. Hence color space sigma sigma_r must be in [0; 1] range, unlike same +sigmas in bilateralFilter and dtFilter functions. @sa bilateralFilter, dtFilter, guidedFilter +*/ CV_EXPORTS_W void amFilter(InputArray joint, InputArray src, OutputArray dst, double sigma_s, double sigma_r, bool adjust_outliers = false); ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// +/** @brief Applies the joint bilateral filter to an image. + +@param joint Joint 8-bit or floating-point, 1-channel or 3-channel image. + +@param src Source 8-bit or floating-point, 1-channel or 3-channel image with the same depth as joint +image. + +@param dst Destination image of the same size and type as src . + +@param d Diameter of each pixel neighborhood that is used during filtering. If it is non-positive, +it is computed from sigmaSpace . + +@param sigmaColor Filter sigma in the color space. A larger value of the parameter means that +farther colors within the pixel neighborhood (see sigmaSpace ) will be mixed together, resulting in +larger areas of semi-equal color. + +@param sigmaSpace Filter sigma in the coordinate space. A larger value of the parameter means that +farther pixels will influence each other as long as their colors are close enough (see sigmaColor ). +When d\>0 , it specifies the neighborhood size regardless of sigmaSpace . Otherwise, d is +proportional to sigmaSpace . + +@param borderType + +@note bilateralFilter and jointBilateralFilter use L1 norm to compute difference between colors. + +@sa bilateralFilter, amFilter +*/ CV_EXPORTS_W void jointBilateralFilter(InputArray joint, InputArray src, OutputArray dst, int d, double sigmaColor, double sigmaSpace, int borderType = BORDER_DEFAULT); +//! @} + } } #endif diff --git a/modules/ximgproc/include/opencv2/ximgproc/seeds.hpp b/modules/ximgproc/include/opencv2/ximgproc/seeds.hpp index 92b62a3e9..c9bce4045 100644 --- a/modules/ximgproc/include/opencv2/ximgproc/seeds.hpp +++ b/modules/ximgproc/include/opencv2/ximgproc/seeds.hpp @@ -50,66 +50,132 @@ namespace cv namespace ximgproc { +//! @addtogroup ximgproc_superpixel +//! @{ -//! Superpixel implementation: "SEEDS: Superpixels Extracted via Energy-Driven Sampling", IJCV 2014 +/** @brief Class implementing the SEEDS (Superpixels Extracted via Energy-Driven Sampling) superpixels +algorithm described in @cite VBRV14. + +The algorithm uses an efficient hill-climbing algorithm to optimize the superpixels' energy +function that is based on color histograms and a boundary term, which is optional. The energy +function encourages superpixels to be of the same color, and if the boundary term is activated, the +superpixels have smooth boundaries and are of similar shape. In practice it starts from a regular +grid of superpixels and moves the pixels or blocks of pixels at the boundaries to refine the +solution. The algorithm runs in real-time using a single CPU. + */ class CV_EXPORTS_W SuperpixelSEEDS : public Algorithm { public: - /*! get the actual number of superpixels */ + /** @brief Calculates the superpixel segmentation on a given image stored in SuperpixelSEEDS object. + + The function computes the superpixels segmentation of an image with the parameters initialized + with the function createSuperpixelSEEDS(). + */ CV_WRAP virtual int getNumberOfSuperpixels() = 0; - /*! - * calculate the segmentation on a given image. To get the result use getLabels() - * @param img input image. supported formats: CV_8U, CV_16U, CV_32F - * image size & number of channels must match with the - * initialized image size & channels. - * @param num_iterations number of pixel level iterations. higher number - * improves the result + /** @brief Calculates the superpixel segmentation on a given image with the initialized + parameters in the SuperpixelSEEDS object. + + This function can be called again for other images without the need of initializing the + algorithm with createSuperpixelSEEDS(). This save the computational cost of allocating memory + for all the structures of the algorithm. + + @param img Input image. Supported formats: CV_8U, CV_16U, CV_32F. Image size & number of + channels must match with the initialized image size & channels with the function + createSuperpixelSEEDS(). It should be in HSV or Lab color space. Lab is a bit better, but also + slower. + + @param num_iterations Number of pixel level iterations. Higher number improves the result. + + The function computes the superpixels segmentation of an image with the parameters initialized + with the function createSuperpixelSEEDS(). The algorithms starts from a grid of superpixels and + then refines the boundaries by proposing updates of blocks of pixels that lie at the boundaries + from large to smaller size, finalizing with proposing pixel updates. An illustrative example + can be seen below. + + ![image](pics/superpixels_blocks2.png) */ CV_WRAP virtual void iterate(InputArray img, int num_iterations=4) = 0; - /*! - * retrieve the segmentation results. - * @param labels_out Return: A CV_32UC1 integer array containing the labels - * labels are in the range [0, getNumberOfSuperpixels()] + /** @brief Returns the segmentation labeling of the image. + + Each label represents a superpixel, and each pixel is assigned to one superpixel label. + + @param labels_out Return: A CV_32UC1 integer array containing the labels of the superpixel + segmentation. The labels are in the range [0, getNumberOfSuperpixels()]. + + The function returns an image with ssthe labels of the superpixel segmentation. The labels are in + the range [0, getNumberOfSuperpixels()]. */ CV_WRAP virtual void getLabels(OutputArray labels_out) = 0; - /*! - * get an image mask with the contour of the superpixels. useful for test output. - * @param image Return: CV_8UC1 image mask where -1 is a superpixel border - * pixel and 0 an interior pixel. - * @param thick_line if false, border is only one pixel wide, otherwise - * all border pixels are masked + /** @brief Returns the mask of the superpixel segmentation stored in SuperpixelSEEDS object. + + @param image Return: CV_8UC1 image mask where -1 indicates that the pixel is a superpixel border, + and 0 otherwise. + + @param thick_line If false, the border is only one pixel wide, otherwise all pixels at the border + are masked. + + The function return the boundaries of the superpixel segmentation. + + @note + - (Python) A demo on how to generate superpixels in images from the webcam can be found at + opencv_source_code/samples/python2/seeds.py + - (cpp) A demo on how to generate superpixels in images from the webcam can be found at + opencv_source_code/modules/ximgproc/samples/seeds.cpp. By adding a file image as a command + line argument, the static image will be used instead of the webcam. + - It will show a window with the video from the webcam with the superpixel boundaries marked + in red (see below). Use Space to switch between different output modes. At the top of the + window there are 4 sliders, from which the user can change on-the-fly the number of + superpixels, the number of block levels, the strength of the boundary prior term to modify + the shape, and the number of iterations at pixel level. This is useful to play with the + parameters and set them to the user convenience. In the console the frame-rate of the + algorithm is indicated. + + ![image](pics/superpixels_demo.png) */ CV_WRAP virtual void getLabelContourMask(OutputArray image, bool thick_line = false) = 0; virtual ~SuperpixelSEEDS() {} }; -/*! Creates a SuperpixelSEEDS object. - * @param image_width image width - * @param image_height image height - * @param image_channels number of channels the image has - * @param num_superpixels desired number of superpixels. Note that the actual - * number can be smaller due to further restrictions. - * use getNumberOfSuperpixels to get the actual number. - * @param num_levels number of block levels: the more levels, the more - * accurate is the segmentation, but needs more memory - * and CPU time. - * @param histogram_bins number of histogram bins. - * @param prior enable 3x3 shape smoothing term if >0. a larger value - * leads to smoother shapes. - * range: [0, 5] - * @param double_step if true, iterate each block level twice for higher - * accuracy. +/** @brief Initializes a SuperpixelSEEDS object. + +@param image_width Image width. +@param image_height Image height. +@param image_channels Number of channels of the image. +@param num_superpixels Desired number of superpixels. Note that the actual number may be smaller +due to restrictions (depending on the image size and num_levels). Use getNumberOfSuperpixels() to +get the actual number. +@param num_levels Number of block levels. The more levels, the more accurate is the segmentation, +but needs more memory and CPU time. +@param prior enable 3x3 shape smoothing term if \>0. A larger value leads to smoother shapes. prior +must be in the range [0, 5]. +@param histogram_bins Number of histogram bins. +@param double_step If true, iterate each block level twice for higher accuracy. + +The function initializes a SuperpixelSEEDS object for the input image. It stores the parameters of +the image: image_width, image_height and image_channels. It also sets the parameters of the SEEDS +superpixel algorithm, which are: num_superpixels, num_levels, use_prior, histogram_bins and +double_step. + +The number of levels in num_levels defines the amount of block levels that the algorithm use in the +optimization. The initialization is a grid, in which the superpixels are equally distributed through +the width and the height of the image. The larger blocks correspond to the superpixel size, and the +levels with smaller blocks are formed by dividing the larger blocks into 2 x 2 blocks of pixels, +recursively until the smaller block level. An example of initialization of 4 block levels is +illustrated in the following figure. + +![image](pics/superpixels_blocks.png) */ CV_EXPORTS_W Ptr createSuperpixelSEEDS( int image_width, int image_height, int image_channels, int num_superpixels, int num_levels, int prior = 2, int histogram_bins=5, bool double_step = false); +//! @} } } diff --git a/modules/ximgproc/include/opencv2/ximgproc/structured_edge_detection.hpp b/modules/ximgproc/include/opencv2/ximgproc/structured_edge_detection.hpp index 3763b4472..db6e90696 100644 --- a/modules/ximgproc/include/opencv2/ximgproc/structured_edge_detection.hpp +++ b/modules/ximgproc/include/opencv2/ximgproc/structured_edge_detection.hpp @@ -44,23 +44,22 @@ #define __OPENCV_STRUCTURED_EDGE_DETECTION_HPP__ #ifdef __cplusplus -/* - * structured_edge_detection.hpp - * - * Created on: Jun 17, 2014 - * Author: Yury Gitman +/** @file +@date Jun 17, 2014 +@author Yury Gitman */ #include -/*! \namespace cv - Namespace where all the C++ OpenCV functionality resides - */ namespace cv { namespace ximgproc { -/*! \class RFFeatureGetter + +//! @addtogroup ximgproc_edge +//! @{ + +/*! Helper class for training part of [P. Dollar and C. L. Zitnick. Structured Forests for Fast Edge Detection, 2013]. */ class CV_EXPORTS_W RFFeatureGetter : public Algorithm @@ -93,19 +92,19 @@ CV_EXPORTS_W Ptr createRFFeatureGetter(); -/*! \class StructuredEdgeDetection - Prediction part of [P. Dollar and C. L. Zitnick. Structured Forests for Fast Edge Detection, 2013]. +/** @brief Class implementing edge detection algorithm from @cite Dollar2013 : */ class CV_EXPORTS_W StructuredEdgeDetection : public Algorithm { public: - /*! - * The function detects edges in src and draw them to dst - * - * \param src : source image (RGB, float, in [0;1]) to detect edges - * \param dst : destination image (grayscale, float, in [0;1]) - * where edges are drawn + /** @brief The function detects edges in src and draw them to dst. + + The algorithm underlies this function is much more robust to texture presence, than common + approaches, e.g. Sobel + @param src source image (RGB, float, in [0;1]) to detect edges + @param dst destination image (grayscale, float, in [0;1]) where edges are drawn + @sa Sobel, Canny */ CV_WRAP virtual void detectEdges(const Mat &src, CV_OUT Mat &dst) const = 0; }; @@ -121,7 +120,9 @@ public: CV_EXPORTS_W Ptr createStructuredEdgeDetection(const String &model, Ptr howToGetFeatures = Ptr()); +//! @} + } } #endif -#endif /* __OPENCV_STRUCTURED_EDGE_DETECTION_HPP__ */ \ No newline at end of file +#endif /* __OPENCV_STRUCTURED_EDGE_DETECTION_HPP__ */ diff --git a/modules/xobjdetect/doc/xobjdetect.bib b/modules/xobjdetect/doc/xobjdetect.bib new file mode 100644 index 000000000..563a800d5 --- /dev/null +++ b/modules/xobjdetect/doc/xobjdetect.bib @@ -0,0 +1,19 @@ +@inproceedings{Dollar09, + title={Integral Channel Features.}, + author={Doll{\'a}r, Piotr and Tu, Zhuowen and Perona, Pietro and Belongie, Serge}, + booktitle={BMVC}, + volume={2}, + number={3}, + pages={5}, + year={2009} +} + +@inproceedings{Sochman05, + title={Waldboost-learning for time constrained sequential detection}, + author={Sochman, Jan and Matas, Jiri}, + booktitle={Computer Vision and Pattern Recognition, 2005. CVPR 2005. IEEE Computer Society Conference on}, + volume={2}, + pages={150--156}, + year={2005}, + organization={IEEE} +} diff --git a/modules/xobjdetect/include/opencv2/xobjdetect.hpp b/modules/xobjdetect/include/opencv2/xobjdetect.hpp index 00b5135b5..3086d52eb 100644 --- a/modules/xobjdetect/include/opencv2/xobjdetect.hpp +++ b/modules/xobjdetect/include/opencv2/xobjdetect.hpp @@ -47,58 +47,72 @@ the use of this software, even if advised of the possibility of such damage. #include #include +/** @defgroup xobjdetect Extended object detection +*/ + namespace cv { namespace xobjdetect { -/* Compute channel pyramid for acf features +//! @addtogroup xobjdetect +//! @{ - image — image, for which channels should be computed +/** @brief Compute channels for integral channel features evaluation - channels — output array for computed channels - -*/ +@param image image for which channels should be computed +@param channels output array for computed channels + */ CV_EXPORTS void computeChannels(InputArray image, std::vector& channels); +/** @brief Feature evaluation interface + */ class CV_EXPORTS FeatureEvaluator : public Algorithm { public: - /* Set channels for feature evaluation */ + /** @brief Set channels for feature evaluation + + @param channels array of channels to be set + */ virtual void setChannels(InputArrayOfArrays channels) = 0; - /* Set window position */ + /** @brief Set window position to sample features with shift. By default position is (0, 0). + + @param position position to be set + */ virtual void setPosition(Size position) = 0; - /* Evaluate feature with given index for current channels - and window position */ + /** @brief Evaluate feature value with given index for current channels and window position. + + @param feature_ind index of feature to be evaluated + */ virtual int evaluate(size_t feature_ind) const = 0; - /* Evaluate all features for current channels and window position + /** @brief Evaluate all features for current channels and window position. - Returns matrix-column of features - */ + @param feature_values matrix-column of evaluated feature values + */ virtual void evaluateAll(OutputArray feature_values) const = 0; virtual void assertChannels() = 0; }; -/* Construct feature evaluator, set features to evaluate - type can "icf" or "acf" */ +/** @brief Construct feature evaluator. + +@param features features for evaluation +@param type feature type. Can be "icf" or "acf" + */ CV_EXPORTS Ptr createFeatureEvaluator(const std::vector >& features, const std::string& type); -/* Generate acf features +/** @brief Generate integral features. Returns vector of features. - window_size — size of window in which features should be evaluated - - type — type of features, can be "icf" or "acf" - count — number of features to generate. - Max number of features is min(count, # possible distinct features) - -Returns vector of distinct acf features -*/ +@param window_size size of window in which features should be evaluated +@param type feature type. Can be "icf" or "acf" +@param count number of features to generate. +@param channel_count number of feature channels + */ std::vector > generateFeatures(Size window_size, const std::string& type, int count = INT_MAX, int channel_count = 10); @@ -106,6 +120,8 @@ generateFeatures(Size window_size, const std::string& type, //sort in-place of columns of the input matrix void sort_columns_without_copy(Mat& m, Mat indices = Mat()); +/** @brief Parameters for WaldBoost. weak_count — number of weak learners, alpha — cascade thresholding param. + */ struct CV_EXPORTS WaldBoostParams { int weak_count; @@ -115,44 +131,48 @@ struct CV_EXPORTS WaldBoostParams {} }; - - +/** @brief WaldBoost object detector from @cite Sochman05 +*/ class CV_EXPORTS WaldBoost : public Algorithm { public: - /* Train WaldBoost cascade for given data - - data — matrix of feature values, size M x N, one feature per row - - labels — matrix of sample class labels, size 1 x N. Labels can be from - {-1, +1} + /** @brief Train WaldBoost cascade for given data. - Returns feature indices chosen for cascade. - Feature enumeration starts from 0 - */ - virtual std::vector train(Mat& /*data*/, - const Mat& /*labels*/, bool use_fast_log=false) = 0; + Returns feature indices chosen for cascade. Feature enumeration starts from 0. + @param data matrix of feature values, size M x N, one feature per row + @param labels matrix of samples class labels, size 1 x N. Labels can be from {-1, +1} + @param use_fast_log + */ + virtual std::vector train(Mat& data, + const Mat& labels, bool use_fast_log=false) = 0; - /* Predict object class given object that can compute object features + /** @brief Predict objects class given object that can compute object features. - feature_evaluator — object that can compute features by demand - - Returns confidence_value — measure of confidense that object - is from class +1 - */ + Returns unnormed confidence value — measure of confidence that object is from class +1. + @param feature_evaluator object that can compute features by demand + */ virtual float predict( - const Ptr& /*feature_evaluator*/) const = 0; + const Ptr& feature_evaluator) const = 0; + + /** @brief Write WaldBoost to FileStorage + @param fs FileStorage for output + */ + virtual void write(FileStorage& fs) const = 0; - /* Write WaldBoost to FileStorage */ - virtual void write(FileStorage& /*fs*/) const = 0; + /** @brief Write WaldBoost to FileNode - /* Read WaldBoost */ - virtual void read(const FileNode& /*node*/) = 0; + @param node FileNode for reading + */ + virtual void read(const FileNode& node) = 0; }; +/** @brief Construct WaldBoost object. + */ CV_EXPORTS Ptr createWaldBoost(const WaldBoostParams& params = WaldBoostParams()); +/** @brief Params for ICFDetector training. + */ struct CV_EXPORTS ICFDetectorParams { int feature_count; @@ -170,69 +190,57 @@ struct CV_EXPORTS ICFDetectorParams {} }; +/** @brief Integral Channel Features from @cite Dollar09 +*/ class CV_EXPORTS ICFDetector { public: ICFDetector(): waldboost_(), features_(), ftype_() {} - /* Train detector - - pos_filenames — paths to objects images + /** @brief Train detector. - bg_filenames — path backgrounds images - - params — parameters for detector training - */ + @param pos_filenames path to folder with images of objects (wildcards like /my/path/\*.png are allowed) + @param bg_filenames path to folder with background images + @param params parameters for detector training + */ void train(const std::vector& pos_filenames, const std::vector& bg_filenames, ICFDetectorParams params = ICFDetectorParams()); - /* Detect object on image - - image — image for detection - - object — output array of bounding boxes - - scaleFactor — scale between layers in detection pyramid - - minSize — min size of objects in pixels - - maxSize — max size of objects in pixels - - slidingStep — sliding window step - - values — output vector with values of positive samples - - */ - + /** @brief Detect objects on image. + @param image image for detection + @param objects output array of bounding boxes + @param scaleFactor scale between layers in detection pyramid + @param minSize min size of objects in pixels + @param maxSize max size of objects in pixels + @param threshold + @param slidingStep sliding window step + @param values output vector with values of positive samples + */ void detect(const Mat& image, std::vector& objects, float scaleFactor, Size minSize, Size maxSize, float threshold, int slidingStep, std::vector& values); - /* Detect object on image - - image — image for detection - - object — output array of bounding boxes - - minScaleFactor — min factor image will be resized - - maxScaleFactor — max factor image will be resized - - factorStep — scaling factor is incremented according to factorStep - - slidingStep — sliding window step - - values — output vector with values of positive samples - - - */ + /** @brief Detect objects on image. + @param img image for detection + @param objects output array of bounding boxes + @param minScaleFactor min factor by which the image will be resized + @param maxScaleFactor max factor by which the image will be resized + @param factorStep scaling factor is incremented each pyramid layer according to this parameter + @param threshold + @param slidingStep sliding window step + @param values output vector with values of positive samples + */ void detect(const Mat& img, std::vector& objects, float minScaleFactor, float maxScaleFactor, float factorStep, float threshold, int slidingStep, std::vector& values); - /* Write detector to FileStorage */ + /** @brief Write detector to FileStorage. + @param fs FileStorage for output + */ void write(FileStorage &fs) const; - /* Read detector */ + /** @brief Write ICFDetector to FileNode + @param node FileNode for reading + */ void read(const FileNode &node); private: @@ -248,6 +256,8 @@ CV_EXPORTS void write(FileStorage& fs, String&, const ICFDetector& detector); CV_EXPORTS void read(const FileNode& node, ICFDetector& d, const ICFDetector& default_value = ICFDetector()); +//! @} + } /* namespace xobjdetect */ } /* namespace cv */ diff --git a/modules/xphoto/doc/denoising/denoising.rst b/modules/xphoto/doc/denoising.rst similarity index 100% rename from modules/xphoto/doc/denoising/denoising.rst rename to modules/xphoto/doc/denoising.rst diff --git a/modules/xphoto/doc/inpainting/inpainting.rst b/modules/xphoto/doc/inpainting.rst similarity index 100% rename from modules/xphoto/doc/inpainting/inpainting.rst rename to modules/xphoto/doc/inpainting.rst diff --git a/modules/xphoto/doc/colorbalance/whitebalance.rst b/modules/xphoto/doc/whitebalance.rst similarity index 100% rename from modules/xphoto/doc/colorbalance/whitebalance.rst rename to modules/xphoto/doc/whitebalance.rst diff --git a/modules/xphoto/doc/xphoto.bib b/modules/xphoto/doc/xphoto.bib new file mode 100644 index 000000000..b3db180ca --- /dev/null +++ b/modules/xphoto/doc/xphoto.bib @@ -0,0 +1,8 @@ +@incollection{He2012, + title={Statistics of patch offsets for image completion}, + author={He, Kaiming and Sun, Jian}, + booktitle={Computer Vision--ECCV 2012}, + pages={16--29}, + year={2012}, + publisher={Springer} +} diff --git a/modules/xphoto/doc/xphoto.rst b/modules/xphoto/doc/xphoto.rst index 6c55be67c..028362fd2 100644 --- a/modules/xphoto/doc/xphoto.rst +++ b/modules/xphoto/doc/xphoto.rst @@ -5,6 +5,6 @@ xphoto. Additional photo processing algorithms .. toctree:: :maxdepth: 2 - Color balance - Denoising - Inpainting + Color balance + Denoising + Inpainting diff --git a/modules/xphoto/include/opencv2/xphoto.hpp b/modules/xphoto/include/opencv2/xphoto.hpp index a0d434642..347e8d1e6 100644 --- a/modules/xphoto/include/opencv2/xphoto.hpp +++ b/modules/xphoto/include/opencv2/xphoto.hpp @@ -43,6 +43,9 @@ #ifndef __OPENCV_XPHOTO_HPP__ #define __OPENCV_XPHOTO_HPP__ +/** @defgroup xphoto Additional photo processing algorithms +*/ + #include "xphoto/inpainting.hpp" #include "xphoto/simple_color_balance.hpp" #include "xphoto/dct_image_denoising.hpp" diff --git a/modules/xphoto/include/opencv2/xphoto/dct_image_denoising.hpp b/modules/xphoto/include/opencv2/xphoto/dct_image_denoising.hpp index 3980f92af..bfb77fecc 100644 --- a/modules/xphoto/include/opencv2/xphoto/dct_image_denoising.hpp +++ b/modules/xphoto/include/opencv2/xphoto/dct_image_denoising.hpp @@ -43,32 +43,37 @@ #ifndef __OPENCV_DCT_IMAGE_DENOISING_HPP__ #define __OPENCV_DCT_IMAGE_DENOISING_HPP__ -/* -* dct_image_denoising.hpp -* -* Created on: Jun 26, 2014 -* Author: Yury Gitman +/** @file +@date Jun 26, 2014 +@author Yury Gitman */ #include -/*! \namespace cv -Namespace where all the C++ OpenCV functionality resides -*/ namespace cv { namespace xphoto { - /*! This function implements simple dct-based image denoising, - * link: http://www.ipol.im/pub/art/2011/ys-dct/ - * - * \param src : source image - * \param dst : destination image - * \param sigma : expected noise standard deviation - * \param psize : size of block side where dct is computed + +//! @addtogroup xphoto +//! @{ + + /** @brief The function implements simple dct-based denoising + + . + @param src source image + @param dst destination image + @param sigma expected noise standard deviation + @param psize size of block side where dct is computed + + @sa + fastNlMeansDenoising */ CV_EXPORTS_W void dctDenoising(const Mat &src, Mat &dst, const double sigma, const int psize = 16); + +//! @} + } } -#endif // __OPENCV_DCT_IMAGE_DENOISING_HPP__ \ No newline at end of file +#endif // __OPENCV_DCT_IMAGE_DENOISING_HPP__ diff --git a/modules/xphoto/include/opencv2/xphoto/inpainting.hpp b/modules/xphoto/include/opencv2/xphoto/inpainting.hpp index 3f66394af..9c40e8c9c 100644 --- a/modules/xphoto/include/opencv2/xphoto/inpainting.hpp +++ b/modules/xphoto/include/opencv2/xphoto/inpainting.hpp @@ -43,36 +43,48 @@ #ifndef __OPENCV_INPAINTING_HPP__ #define __OPENCV_INPAINTING_HPP__ -/* -* inpainting.hpp -* -* Created on: Jul 22, 2014 -* Author: Yury Gitman +/** @file +@date Jul 22, 2014 +@author Yury Gitman */ #include -/*! \namespace cv -Namespace where all the C++ OpenCV functionality resides -*/ namespace cv { namespace xphoto { + +//! @addtogroup xphoto +//! @{ + //! various inpainting algorithms - enum + enum InpaintTypes { + /** This algorithm searches for dominant correspondences (transformations) of + image patches and tries to seamlessly fill-in the area to be inpainted using this + transformations */ INPAINT_SHIFTMAP = 0 }; - /*! The function reconstructs the selected image area from known area. - * \param src : source image. - * \param mask : inpainting mask, 8-bit 1-channel image. Zero pixels indicate the area that needs to be inpainted. - * \param dst : destination image. - * \param algorithmType : inpainting method. + /** @brief The function implements different single-image inpainting algorithms. + + See the original paper @cite He2012 for details. + + @param src source image, it could be of any type and any number of channels from 1 to 4. In case of + 3- and 4-channels images the function expect them in CIELab colorspace or similar one, where first + color component shows intensity, while second and third shows colors. Nonetheless you can try any + colorspaces. + @param mask mask (CV_8UC1), where non-zero pixels indicate valid image area, while zero pixels + indicate area to be inpainted + @param dst destination image + @param algorithmType see xphoto::InpaintTypes */ CV_EXPORTS_W void inpaint(const Mat &src, const Mat &mask, Mat &dst, const int algorithmType); + +//! @} + } } -#endif // __OPENCV_INPAINTING_HPP__ \ No newline at end of file +#endif // __OPENCV_INPAINTING_HPP__ diff --git a/modules/xphoto/include/opencv2/xphoto/simple_color_balance.hpp b/modules/xphoto/include/opencv2/xphoto/simple_color_balance.hpp index 42d376487..416d76e21 100644 --- a/modules/xphoto/include/opencv2/xphoto/simple_color_balance.hpp +++ b/modules/xphoto/include/opencv2/xphoto/simple_color_balance.hpp @@ -43,42 +43,51 @@ #ifndef __OPENCV_SIMPLE_COLOR_BALANCE_HPP__ #define __OPENCV_SIMPLE_COLOR_BALANCE_HPP__ -/* -* simple_color_balance.hpp -* -* Created on: Jun 26, 2014 -* Author: Yury Gitman +/** @file +@date Jun 26, 2014 +@author Yury Gitman */ #include -/*! \namespace cv -Namespace where all the C++ OpenCV functionality resides -*/ namespace cv { namespace xphoto { + +//! @addtogroup xphoto +//! @{ + //! various white balance algorithms - enum + enum WhitebalanceTypes { + /** perform smart histogram adjustments (ignoring 4% pixels with minimal and maximal + values) for each channel */ WHITE_BALANCE_SIMPLE = 0, WHITE_BALANCE_GRAYWORLD = 1 }; - /*! This function implements different white balance algorithms - * \param src : source image - * \param dst : destination image - * \param algorithmType : type of the algorithm to use - * \param inputMin : minimum input value - * \param inputMax : maximum output value - * \param outputMin : minimum input value - * \param outputMax : maximum output value - */ + /** @brief The function implements different algorithm of automatic white balance, + + i.e. it tries to map image's white color to perceptual white (this can be violated due to + specific illumination or camera settings). + + @param src + @param dst + @param algorithmType see xphoto::WhitebalanceTypes + @param inputMin minimum value in the input image + @param inputMax maximum value in the input image + @param outputMin minimum value in the output image + @param outputMax maximum value in the output image + @sa cvtColor, equalizeHist + */ CV_EXPORTS_W void balanceWhite(const Mat &src, Mat &dst, const int algorithmType, const float inputMin = 0.0f, const float inputMax = 255.0f, const float outputMin = 0.0f, const float outputMax = 255.0f); + +//! @} + } } -#endif // __OPENCV_SIMPLE_COLOR_BALANCE_HPP__ \ No newline at end of file +#endif // __OPENCV_SIMPLE_COLOR_BALANCE_HPP__