diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 81e18973f7..42e8650ad4 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -146,46 +146,76 @@ if(BUILD_DOCS AND HAVE_SPHINX) endif() # ========= Doxygen docs ========= + +macro(make_reference result modules_list black_list) + set(_res) + foreach(m ${${modules_list}}) + list(FIND ${black_list} ${m} _pos) + if(${_pos} EQUAL -1) + set(_res "${_res} @ref ${m} | ${m} \n") + endif() + endforeach() + set(${result} ${_res}) +endmacro() + if(BUILD_DOCS AND HAVE_DOXYGEN) - # documented modules list - set(candidates) - list(APPEND candidates ${BASE_MODULES} ${EXTRA_MODULES}) - # blacklisted modules - ocv_list_filterout(candidates "^ts$") + # not documented modules list + list(APPEND blacklist "ts" "java" "python2" "python3" "world") # gathering headers - set(all_headers) # files and dirs to process - set(all_images) # image search paths - set(reflist) # modules reference - foreach(m ${candidates}) - set(reflist "${reflist} \n- @subpage ${m}") - set(all_headers ${all_headers} "${OPENCV_MODULE_opencv_${m}_HEADERS}") - set(docs_dir "${OPENCV_MODULE_opencv_${m}_LOCATION}/doc") - if(EXISTS ${docs_dir}) - set(all_images ${all_images} ${docs_dir}) - set(all_headers ${all_headers} ${docs_dir}) + set(paths_include) + set(paths_doc) + set(paths_bib) + set(deps) + foreach(m ${BASE_MODULES} ${EXTRA_MODULES}) + list(FIND blacklist ${m} _pos) + if(${_pos} EQUAL -1) + # include folder + set(header_dir "${OPENCV_MODULE_opencv_${m}_LOCATION}/include") + if(EXISTS "${header_dir}") + list(APPEND paths_include "${header_dir}") + list(APPEND deps ${header_dir}) + endif() + # doc folder + set(docs_dir "${OPENCV_MODULE_opencv_${m}_LOCATION}/doc") + if(EXISTS "${docs_dir}") + list(APPEND paths_doc "${docs_dir}") + list(APPEND deps ${docs_dir}) + endif() + # BiBTeX file + set(bib_file "${docs_dir}/${m}.bib") + if(EXISTS "${bib_file}") + set(paths_bib "${paths_bib} ${bib_file}") + list(APPEND deps ${bib_file}) + endif() endif() endforeach() + # additional config set(doxyfile "${CMAKE_CURRENT_BINARY_DIR}/Doxyfile") set(rootfile "${CMAKE_CURRENT_BINARY_DIR}/root.markdown") - set(all_headers ${all_headers} ${rootfile}) - string(REGEX REPLACE ";" " \\\\\\n" CMAKE_DOXYGEN_INPUT_LIST "${all_headers}") - string(REGEX REPLACE ";" " \\\\\\n" CMAKE_DOXYGEN_IMAGE_PATH "${all_images}") + set(bibfile "${CMAKE_CURRENT_SOURCE_DIR}/opencv.bib") + string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INPUT_LIST "${rootfile} ; ${paths_include} ; ${paths_doc}") + string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_IMAGE_PATH "${paths_doc}") + string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_EXAMPLE_PATH "${CMAKE_SOURCE_DIR}/samples/cpp ; ${paths_doc}") set(CMAKE_DOXYGEN_LAYOUT "${CMAKE_CURRENT_SOURCE_DIR}/DoxygenLayout.xml") set(CMAKE_DOXYGEN_OUTPUT_PATH "doxygen") - set(CMAKE_DOXYGEN_MODULES_REFERENCE "${reflist}") - set(CMAKE_DOXYGEN_EXAMPLE_PATH "${CMAKE_SOURCE_DIR}/samples/cpp") + set(CMAKE_EXTRA_BIB_FILES "${bibfile} ${paths_bib}") + + # generate references + make_reference(CMAKE_DOXYGEN_MAIN_REFERENCE BASE_MODULES blacklist) + make_reference(CMAKE_DOXYGEN_EXTRA_REFERENCE EXTRA_MODULES blacklist) # writing file configure_file(Doxyfile.in ${doxyfile} @ONLY) configure_file(root.markdown.in ${rootfile} @ONLY) configure_file(mymath.sty "${CMAKE_DOXYGEN_OUTPUT_PATH}/html/mymath.sty" @ONLY) + configure_file(mymath.sty "${CMAKE_DOXYGEN_OUTPUT_PATH}/latex/mymath.sty" @ONLY) add_custom_target(doxygen COMMAND ${DOXYGEN_BUILD} ${doxyfile} - DEPENDS ${doxyfile} ${all_headers} ${all_images}) +DEPENDS ${doxyfile} ${rootfile} ${bibfile} ${deps}) endif() if(HAVE_DOC_GENERATOR) diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index c8222c77b6..624e83bae9 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -85,7 +85,7 @@ SHOW_FILES = YES SHOW_NAMESPACES = YES FILE_VERSION_FILTER = LAYOUT_FILE = @CMAKE_DOXYGEN_LAYOUT@ -CITE_BIB_FILES = @CMAKE_CURRENT_SOURCE_DIR@/opencv.bib +CITE_BIB_FILES = @CMAKE_EXTRA_BIB_FILES@ QUIET = YES WARNINGS = YES WARN_IF_UNDOCUMENTED = YES @@ -99,7 +99,7 @@ FILE_PATTERNS = RECURSIVE = YES EXCLUDE = EXCLUDE_SYMLINKS = NO -EXCLUDE_PATTERNS = +EXCLUDE_PATTERNS = *.inl.hpp *.impl.hpp *_detail.hpp */cudev/**/detail/*.hpp EXCLUDE_SYMBOLS = cv::DataType<*> int EXAMPLE_PATH = @CMAKE_DOXYGEN_EXAMPLE_PATH@ EXAMPLE_PATTERNS = * @@ -119,7 +119,7 @@ REFERENCES_LINK_SOURCE = YES SOURCE_TOOLTIPS = YES USE_HTAGS = NO VERBATIM_HEADERS = NO -ALPHABETICAL_INDEX = NO +ALPHABETICAL_INDEX = YES COLS_IN_ALPHA_INDEX = 5 IGNORE_PREFIX = GENERATE_HTML = YES @@ -222,6 +222,7 @@ INCLUDE_FILE_PATTERNS = PREDEFINED = __cplusplus=1 \ HAVE_IPP_A=1 \ CVAPI(x)=x \ + CV_DOXYGEN= \ CV_EXPORTS= \ CV_EXPORTS_W= \ CV_EXPORTS_W_SIMPLE= \ @@ -241,7 +242,8 @@ PREDEFINED = __cplusplus=1 \ CV_INLINE= \ CV_NORETURN= \ CV_DEFAULT(x)=" = x" \ - CV_NEON=1 + CV_NEON=1 \ + FLANN_DEPRECATED= EXPAND_AS_DEFINED = SKIP_FUNCTION_MACROS = YES TAGFILES = diff --git a/doc/disabled_doc_warnings.txt b/doc/disabled_doc_warnings.txt new file mode 100644 index 0000000000..8c81b8dd65 --- /dev/null +++ b/doc/disabled_doc_warnings.txt @@ -0,0 +1,2 @@ +# doxygen citelist build workaround +citelist : .*Unexpected new line character.* diff --git a/doc/mymath.js b/doc/mymath.js index 13ee86a533..d9af0350fe 100644 --- a/doc/mymath.js +++ b/doc/mymath.js @@ -8,7 +8,8 @@ MathJax.Hub.Config( forkthree: ["\\left\\{ \\begin{array}{l l} #1 & \\mbox{#2}\\\\ #3 & \\mbox{#4}\\\\ #5 & \\mbox{#6}\\\\ \\end{array} \\right.", 6], vecthree: ["\\begin{bmatrix} #1\\\\ #2\\\\ #3 \\end{bmatrix}", 3], vecthreethree: ["\\begin{bmatrix} #1 & #2 & #3\\\\ #4 & #5 & #6\\\\ #7 & #8 & #9 \\end{bmatrix}", 9], - hdotsfor: ["\\dots", 1] + hdotsfor: ["\\dots", 1], + mathbbm: ["\\mathbb{#1}", 1] } } } diff --git a/doc/mymath.sty b/doc/mymath.sty index 24dae263a5..08ab50d2b4 100644 --- a/doc/mymath.sty +++ b/doc/mymath.sty @@ -3,6 +3,7 @@ \usepackage{euler} \usepackage{amssymb} \usepackage{amsmath} +\usepackage{bbm} \newcommand{\matTT}[9]{ \[ diff --git a/doc/opencv.bib b/doc/opencv.bib index ad993b07ab..09206587a2 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -1,427 +1,826 @@ -@inproceedings{Agrawal08, - author = {Agrawal, M. and Konolige, K. and Blas, M.R.}, - title = {CenSurE: Center Surround Extremas for Realtime Feature Detection and Matching}, - booktitle = {ECCV08}, - year = {2008}, - pages = {IV: 102-115}, - bibsource = {http://www.visionbib.com/bibliography/twod276.html#TT22337} +@comment{Bib-it, + This file was created by Bib-it 1.4 + 97 entries written +} + +@INCOLLECTION{ABD12, + author = {Alcantarilla, Pablo Fern{\'a}ndez and Bartoli, Adrien and Davison, Andrew J}, + title = {KAZE features}, + booktitle = {Computer Vision--ECCV 2012}, + year = {2012}, + pages = {214--227}, + publisher = {Springer} +} +@ARTICLE{ANB13, + author = {Alcantarilla, Pablo F and Nuevo, Jes{\'u}s and Bartoli, Adrien}, + title = {Fast Explicit Diffusion for Accelerated Features in Nonlinear Scale Spaces}, + year = {2011}, + pages = {1281--1298}, + journal = {Trans. Pattern Anal. Machine Intell}, + volume = {34}, + number = {7} +} +@ARTICLE{BA83, + author = {Burt, Peter J and Adelson, Edward H}, + title = {A multiresolution spline with application to image mosaics}, + year = {1983}, + pages = {217--236}, + journal = {ACM Transactions on Graphics (TOG)}, + volume = {2}, + number = {4}, + publisher = {ACM} +} +@ARTICLE{BL07, + author = {Brown, Matthew and Lowe, David G}, + title = {Automatic panoramic image stitching using invariant features}, + year = {2007}, + pages = {59--73}, + journal = {International journal of computer vision}, + volume = {74}, + number = {1}, + publisher = {Springer} +} +@ARTICLE{BT96, + author = {Birchfield, Stan and Tomasi, Carlo}, + title = {Depth discontinuities by pixel-to-pixel stereo}, + year = {1999}, + pages = {269--293}, + journal = {International Journal of Computer Vision}, + volume = {35}, + number = {3}, + publisher = {Springer} +} +@ARTICLE{BT98, + author = {Birchfield, Stan and Tomasi, Carlo}, + title = {A pixel dissimilarity measure that is insensitive to image sampling}, + year = {1998}, + pages = {401--406}, + journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume = {20}, + number = {4}, + publisher = {IEEE} +} +@ARTICLE{Ballard1981, + author = {Ballard, Dana H}, + title = {Generalizing the Hough transform to detect arbitrary shapes}, + year = {1981}, + pages = {111--122}, + journal = {Pattern recognition}, + volume = {13}, + number = {2}, + publisher = {Elsevier} +} +@ARTICLE{Borgefors86, + author = {Borgefors, Gunilla}, + title = {Distance transformations in digital images}, + year = {1986}, + pages = {344--371}, + journal = {Computer vision, graphics, and image processing}, + volume = {34}, + number = {3}, + publisher = {Elsevier} +} +@ARTICLE{Bouguet00, + author = {Bouguet, Jean-Yves}, + title = {Pyramidal implementation of the affine lucas kanade feature tracker description of the algorithm}, + year = {2001}, + journal = {Intel Corporation}, + volume = {5} +} +@MISC{BouguetMCT, + author = {Bouguet, Jean-Yves}, + title = {Camera Calibration Tool box for Matlab [EB/OL]}, + year = {2004} +} +@INPROCEEDINGS{Bradski00, + author = {Bradski, GR and Davis, J}, + title = {Motion segmentation and pose recognition with motion history gradients}, + booktitle = {Applications of Computer Vision, 2000, Fifth IEEE Workshop on.}, + year = {2000}, + pages = {238--244}, + organization = {IEEE} +} +@ARTICLE{Bradski98, + author = {Bradski, Gary R}, + title = {Computer vision face tracking for use in a perceptual user interface}, + year = {1998}, + publisher = {Citeseer} +} +@ARTICLE{Breiman84, + author = {Olshen, LBJFR and Stone, Charles J}, + title = {Classification and regression trees}, + year = {1984}, + journal = {Wadsworth International Group} +} +@INCOLLECTION{Brox2004, + author = {Brox, Thomas and Bruhn, Andres and Papenberg, Nils and Weickert, Joachim}, + title = {High accuracy optical flow estimation based on a theory for warping}, + booktitle = {Computer Vision-ECCV 2004}, + year = {2004}, + pages = {25--36}, + publisher = {Springer} +} +@ARTICLE{Burges98, + author = {Burges, Christopher JC}, + title = {A tutorial on support vector machines for pattern recognition}, + year = {1998}, + pages = {121--167}, + journal = {Data mining and knowledge discovery}, + volume = {2}, + number = {2}, + publisher = {Springer} +} +@INPROCEEDINGS{CL12, + author = {Lu, Cewu and Xu, Li and Jia, Jiaya}, + title = {Contrast preserving decolorization}, + booktitle = {Computational Photography (ICCP), 2012 IEEE International Conference on}, + year = {2012}, + pages = {1--7}, + organization = {IEEE} +} +@ARTICLE{Canny86, + author = {Canny, John}, + title = {A computational approach to edge detection}, + year = {1986}, + pages = {679--698}, + journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + number = {6}, + publisher = {IEEE} +} +@ARTICLE{ChambolleEtAl, + author = {Chambolle, Antonin and Caselles, Vicent and Cremers, Daniel and Novaga, Matteo and Pock, Thomas}, + title = {An introduction to total variation for image analysis}, + year = {2010}, + pages = {263--340}, + journal = {Theoretical foundations and numerical methods for sparse recovery}, + volume = {9}, + publisher = {Walter de Gruyter} +} +@INPROCEEDINGS{DD02, + author = {Durand, Fr{\'e}do and Dorsey, Julie}, + title = {Fast bilateral filtering for the display of high-dynamic-range images}, + booktitle = {ACM Transactions on Graphics (TOG)}, + year = {2002}, + pages = {257--266}, + volume = {21}, + number = {3}, + organization = {ACM} +} +@INPROCEEDINGS{DM03, + author = {Drago, Fr{\'e}d{\'e}ric and Myszkowski, Karol and Annen, Thomas and Chiba, Norishige}, + title = {Adaptive logarithmic mapping for displaying high contrast scenes}, + booktitle = {Computer Graphics Forum}, + year = {2003}, + pages = {419--426}, + volume = {22}, + number = {3}, + organization = {Wiley Online Library} +} +@INPROCEEDINGS{DM97, + author = {Debevec, Paul E and Malik, Jitendra}, + title = {Recovering high dynamic range radiance maps from photographs}, + booktitle = {ACM SIGGRAPH 2008 classes}, + year = {2008}, + pages = {31}, + organization = {ACM} +} +@INPROCEEDINGS{Dalal2005, + author = {Dalal, Navneet and Triggs, Bill}, + title = {Histograms of oriented gradients for human detection}, + booktitle = {Computer Vision and Pattern Recognition, 2005. CVPR 2005. IEEE Computer Society Conference on}, + year = {2005}, + pages = {886--893}, + volume = {1}, + organization = {IEEE} +} +@INPROCEEDINGS{Davis97, + author = {Davis, James W and Bobick, Aaron F}, + title = {The representation and recognition of human movement using temporal templates}, + booktitle = {Computer Vision and Pattern Recognition, 1997. Proceedings., 1997 IEEE Computer Society Conference on}, + year = {1997}, + pages = {928--934}, + organization = {IEEE} +} +@INPROCEEDINGS{EM11, + author = {Gastal, Eduardo SL and Oliveira, Manuel M}, + title = {Domain transform for edge-aware image and video processing}, + booktitle = {ACM Transactions on Graphics (TOG)}, + year = {2011}, + pages = {69}, + volume = {30}, + number = {4}, + organization = {ACM} +} +@ARTICLE{EP08, + author = {Evangelidis, Georgios D and Psarakis, Emmanouil Z}, + title = {Parametric image alignment using enhanced correlation coefficient maximization}, + year = {2008}, + pages = {1858--1865}, + journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume = {30}, + number = {10}, + publisher = {IEEE} +} +@INPROCEEDINGS{FGD2003, + author = {Li, Liyuan and Huang, Weimin and Gu, Irene YH and Tian, Qi}, + title = {Foreground object detection from videos containing complex background}, + booktitle = {Proceedings of the eleventh ACM international conference on Multimedia}, + year = {2003}, + pages = {2--10}, + organization = {ACM} +} +@ARTICLE{FHT98, + author = {Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert}, + title = {Additive Logistic Regression: a Statistical View of Boosting}, + year = {1998} +} +@INPROCEEDINGS{FL02, + author = {Fattal, Raanan and Lischinski, Dani and Werman, Michael}, + title = {Gradient domain high dynamic range compression}, + booktitle = {ACM Transactions on Graphics (TOG)}, + year = {2002}, + pages = {249--256}, + volume = {21}, + number = {3}, + organization = {ACM} +} +@INCOLLECTION{Farneback2003, + author = {Farneb{\"a}ck, Gunnar}, + title = {Two-frame motion estimation based on polynomial expansion}, + booktitle = {Image Analysis}, + year = {2003}, + pages = {363--370}, + publisher = {Springer} +} +@INPROCEEDINGS{Farsiu03, + author = {Farsiu, Sina and Robinson, Dirk and Elad, Michael and Milanfar, Peyman}, + title = {Fast and robust super-resolution}, + booktitle = {Image Processing, 2003. ICIP 2003. Proceedings. 2003 International Conference on}, + year = {2003}, + pages = {II--291}, + volume = {2}, + organization = {IEEE} +} +@TECHREPORT{Felzenszwalb04, + author = {Felzenszwalb, Pedro and Huttenlocher, Daniel}, + title = {Distance transforms of sampled functions}, + year = {2004}, + institution = {Cornell University} +} +@ARTICLE{Felzenszwalb10, + author = {Felzenszwalb, Pedro F and Girshick, Ross B and McAllester, David and Ramanan, Deva}, + title = {Object detection with discriminatively trained part-based models}, + year = {2010}, + pages = {1627--1645}, + journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume = {32}, + number = {9}, + publisher = {IEEE} +} +@ARTICLE{Felzenszwalb2006, + author = {Felzenszwalb, Pedro F and Huttenlocher, Daniel P}, + title = {Efficient belief propagation for early vision}, + year = {2006}, + pages = {41--54}, + journal = {International journal of computer vision}, + volume = {70}, + number = {1}, + publisher = {Springer} +} +@INPROCEEDINGS{Fitzgibbon95, + author = {Fitzgibbon, Andrew W and Fisher, Robert B}, + title = {A buyer's guide to conic fitting}, + booktitle = {Proceedings of the 6th British conference on Machine vision (Vol. 2)}, + year = {1995}, + pages = {513--522}, + organization = {BMVA Press} +} +@INPROCEEDINGS{G11, + author = {Grundmann, Matthias and Kwatra, Vivek and Essa, Irfan}, + title = {Auto-directed video stabilization with robust l1 optimal camera paths}, + booktitle = {Computer Vision and Pattern Recognition (CVPR), 2011 IEEE Conference on}, + year = {2011}, + pages = {225--232}, + organization = {IEEE} +} +@ARTICLE{GW03, + author = {Ward, Greg}, + title = {Fast, robust image registration for compositing high dynamic range photographs from hand-held exposures}, + year = {2003}, + pages = {17--30}, + journal = {Journal of graphics tools}, + volume = {8}, + number = {2}, + publisher = {Taylor \& Francis} +} +@INPROCEEDINGS{Gold2012, + author = {Godbehere, Andrew B and Matsukawa, Akihiro and Goldberg, Ken}, + title = {Visual tracking of human visitors under variable-lighting conditions for a responsive audio art installation}, + booktitle = {American Control Conference (ACC), 2012}, + year = {2012}, + pages = {4305--4312}, + organization = {IEEE} +} +@ARTICLE{Guil1999, + author = {Guil, N and Gonzalez-Linares, Jos{\'e} Mar{\'\i}a and Zapata, Emilio L}, + title = {Bidimensional shape detection using an invariant approach}, + year = {1999}, + pages = {1025--1038}, + journal = {Pattern Recognition}, + volume = {32}, + number = {6}, + publisher = {Elsevier} +} +@ARTICLE{HH08, + author = {Hirschmuller, Heiko}, + title = {Stereo processing by semiglobal matching and mutual information}, + year = {2008}, + pages = {328--341}, + journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume = {30}, + number = {2}, + publisher = {IEEE} +} +@ARTICLE{HTF01, + author = {Trevor, Hastie and Robert, Tibshirani and Jerome, Friedman}, + title = {The elements of statistical learning: data mining, inference and prediction}, + year = {2001}, + pages = {371--406}, + journal = {New York: Springer-Verlag}, + volume = {1}, + number = {8} +} +@ARTICLE{Hartley99, + author = {Hartley, Richard I}, + title = {Theory and practice of projective rectification}, + year = {1999}, + pages = {115--127}, + journal = {International Journal of Computer Vision}, + volume = {35}, + number = {2}, + publisher = {Springer} +} +@BOOK{HartleyZ00, + author = {Hartley, Richard and Zisserman, Andrew}, + title = {Multiple view geometry in computer vision}, + year = {2003}, + publisher = {Cambridge university press} +} +@ARTICLE{Horn81, + author = {Horn, Berthold KP and Schunck, Brian G}, + title = {Determining Optical Flow}, + year = {1981}, + pages = {185--203}, + journal = {Artificial Intelligence}, + volume = {17} +} +@ARTICLE{Hu62, + author = {Hu, Ming-Kuei}, + title = {Visual pattern recognition by moment invariants}, + year = {1962}, + pages = {179--187}, + journal = {Information Theory, IRE Transactions on}, + volume = {8}, + number = {2}, + publisher = {IEEE} +} +@ARTICLE{Javier2012, + author = {S{\'a}nchez P{\'e}rez, Javier and Meinhardt-Llopis, Enric and Facciolo, Gabriele}, + title = {TV-L1 optical flow estimation}, + year = {2012} +} +@ARTICLE{KleeLaskowski85, + author = {Klee, Victor and Laskowski, Michael C}, + title = {Finding the smallest triangles containing a given convex polygon}, + year = {1985}, + pages = {359--375}, + journal = {Journal of Algorithms}, + volume = {6}, + number = {3}, + publisher = {Elsevier} +} +@INPROCEEDINGS{Kolmogorov03, + author = {Kim, Junhwan and Kolmogorov, Vladimir and Zabih, Ramin}, + title = {Visual correspondence using energy minimization and mutual information}, + booktitle = {Computer Vision, 2003. Proceedings. Ninth IEEE International Conference on}, + year = {2003}, + pages = {1033--1040}, + organization = {IEEE} +} +@INPROCEEDINGS{LCS11, + author = {Leutenegger, Stefan and Chli, Margarita and Siegwart, Roland Yves}, + title = {BRISK: Binary robust invariant scalable keypoints}, + booktitle = {Computer Vision (ICCV), 2011 IEEE International Conference on}, + year = {2011}, + pages = {2548--2555}, + organization = {IEEE} +} +@ARTICLE{LibSVM, + author = {Chang, Chih-Chung and Lin, Chih-Jen}, + title = {LIBSVM: a library for support vector machines}, + year = {2011}, + pages = {27}, + journal = {ACM Transactions on Intelligent Systems and Technology (TIST)}, + volume = {2}, + number = {3}, + publisher = {ACM} +} +@INPROCEEDINGS{Lienhart02, + author = {Lienhart, Rainer and Maydt, Jochen}, + title = {An extended set of haar-like features for rapid object detection}, + booktitle = {Image Processing. 2002. Proceedings. 2002 International Conference on}, + year = {2002}, + pages = {I--900}, + volume = {1}, + organization = {IEEE} +} +@INPROCEEDINGS{Lucas81, + author = {Lucas, Bruce D and Kanade, Takeo and others}, + title = {An iterative image registration technique with an application to stereo vision.}, + booktitle = {IJCAI}, + year = {1981}, + pages = {674--679}, + volume = {81} +} +@MISC{MA13, + author = {Mordvintsev, Alexander}, + title = {ROF and TV-L1 denoising with Primal-Dual algorithm}, + url = {http://znah.net/rof-and-tv-l1-denoising-with-primal-dual-algorithm.html} +} +@ARTICLE{MHT2011, + author = {Getreuer, Pascal}, + title = {Malvar-He-Cutler Linear Image Demosaicking}, + year = {2011}, + journal = {Image Processing on Line} +} +@INPROCEEDINGS{MK07, + author = {Mertens, Tom and Kautz, Jan and Van Reeth, Frank}, + title = {Exposure fusion}, + booktitle = {Computer Graphics and Applications, 2007. PG'07. 15th Pacific Conference on}, + year = {2007}, + pages = {382--390}, + organization = {IEEE} +} +@ARTICLE{MM06, + author = {Mantiuk, Rafal and Myszkowski, Karol and Seidel, Hans-Peter}, + title = {A perceptual framework for contrast processing of high dynamic range images}, + year = {2006}, + pages = {286--308}, + journal = {ACM Transactions on Applied Perception (TAP)}, + volume = {3}, + number = {3}, + publisher = {ACM} +} +@INCOLLECTION{MOG2001, + author = {KaewTraKulPong, Pakorn and Bowden, Richard}, + title = {An improved adaptive background mixture model for real-time tracking with shadow detection}, + booktitle = {Video-Based Surveillance Systems}, + year = {2002}, + pages = {135--144}, + publisher = {Springer} +} +@ARTICLE{Malis, + author = {Malis, Ezio and Vargas, Manuel and others}, + title = {Deeper understanding of the homography decomposition for vision-based control}, + year = {2007} +} +@ARTICLE{Matas00, + author = {Matas, Jiri and Galambos, Charles and Kittler, Josef}, + title = {Robust detection of lines using the progressive probabilistic hough transform}, + year = {2000}, + pages = {119--137}, + journal = {Computer Vision and Image Understanding}, + volume = {78}, + number = {1}, + publisher = {Elsevier} +} +@INPROCEEDINGS{Meyer92, + author = {Meyer, Fernand}, + title = {Color image segmentation}, + booktitle = {Image Processing and its Applications, 1992., International Conference on}, + year = {1992}, + pages = {303--306}, + organization = {IET} +} +@INCOLLECTION{Mitzel09, + author = {Mitzel, Dennis and Pock, Thomas and Schoenemann, Thomas and Cremers, Daniel}, + title = {Video super resolution using duality based tv-l 1 optical flow}, + booktitle = {Pattern Recognition}, + year = {2009}, + pages = {432--441}, + publisher = {Springer} +} +@INPROCEEDINGS{Muja2009, + author = {Muja, Marius and Lowe, David G}, + title = {Fast Approximate Nearest Neighbors with Automatic Algorithm Configuration}, + booktitle = {VISAPP (1)}, + year = {2009}, + pages = {331--340} +} +@ARTICLE{Nister03, + author = {Nist{\'e}r, David}, + title = {An efficient solution to the five-point relative pose problem}, + year = {2004}, + pages = {756--770}, + journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume = {26}, + number = {6}, + publisher = {IEEE} +} +@ARTICLE{OF06, + author = {Matsushita, Yasuyuki and Ofek, Eyal and Ge, Weina and Tang, Xiaoou and Shum, Heung-Yeung}, + title = {Full-frame video stabilization with motion inpainting}, + year = {2006}, + pages = {1150--1163}, + journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume = {28}, + number = {7}, + publisher = {IEEE} +} +@ARTICLE{ORourke86, + author = {O'Rourke, Joseph and Aggarwal, Alok and Maddila, Sanjeev and Baldwin, Michael}, + title = {An optimal algorithm for finding minimal enclosing triangles}, + year = {1986}, + pages = {258--269}, + journal = {Journal of Algorithms}, + volume = {7}, + number = {2}, + publisher = {Elsevier} +} +@INPROCEEDINGS{PM03, + author = {P{\'e}rez, Patrick and Gangnet, Michel and Blake, Andrew}, + title = {Poisson image editing}, + booktitle = {ACM Transactions on Graphics (TOG)}, + year = {2003}, + pages = {313--318}, + volume = {22}, + number = {3}, + organization = {ACM} +} +@INPROCEEDINGS{Puzicha1997, + author = {Puzicha, Jan and Hofmann, Thomas and Buhmann, Joachim M}, + title = {Non-parametric similarity measures for unsupervised texture segmentation and image retrieval}, + booktitle = {Computer Vision and Pattern Recognition, 1997. Proceedings., 1997 IEEE Computer Society Conference on}, + year = {1997}, + pages = {267--272}, + organization = {IEEE} +} +@INPROCEEDINGS{RB99, + author = {Robertson, Mark A and Borman, Sean and Stevenson, Robert L}, + title = {Dynamic range improvement through multiple exposures}, + booktitle = {Image Processing, 1999. ICIP 99. Proceedings. 1999 International Conference on}, + year = {1999}, + pages = {159--163}, + volume = {3}, + organization = {IEEE} +} +@ARTICLE{RD05, + author = {Reinhard, Erik and Devlin, Kate}, + title = {Dynamic range reduction inspired by photoreceptor physiology}, + year = {2005}, + pages = {13--24}, + journal = {Visualization and Computer Graphics, IEEE Transactions on}, + volume = {11}, + number = {1}, + publisher = {IEEE} +} +@INPROCEEDINGS{RPROP93, + author = {Riedmiller, Martin and Braun, Heinrich}, + title = {A direct adaptive method for faster backpropagation learning: The RPROP algorithm}, + booktitle = {Neural Networks, 1993., IEEE International Conference on}, + year = {1993}, + pages = {586--591}, + organization = {IEEE} +} +@INPROCEEDINGS{RRKB11, + author = {Rublee, Ethan and Rabaud, Vincent and Konolige, Kurt and Bradski, Gary}, + title = {ORB: an efficient alternative to SIFT or SURF}, + booktitle = {Computer Vision (ICCV), 2011 IEEE International Conference on}, + year = {2011}, + pages = {2564--2571}, + organization = {IEEE} +} +@TECHREPORT{RS04, + author = {Szeliski, R}, + title = {Image alignment and stitching: a tutorial, Microsoft Corporation, Redmond, WA}, + year = {2004}, + institution = {MSR-TR-2004-92} +} +@BOOK{RS10, + author = {Szeliski, Richard}, + title = {Computer vision: algorithms and applications}, + year = {2010}, + publisher = {Springer} +} +@ARTICLE{Rafael12, + author = {von Gioi, Rafael Grompone and Jakubowicz, J{\'e}r{\'e}mie and Morel, Jean-Michel and Randall, Gregory}, + title = {LSD: a line segment detector}, + year = {2012} +} +@INCOLLECTION{Rosten06, + author = {Rosten, Edward and Drummond, Tom}, + title = {Machine learning for high-speed corner detection}, + booktitle = {Computer Vision--ECCV 2006}, + year = {2006}, + pages = {430--443}, + publisher = {Springer} +} +@ARTICLE{Rubner2000, + author = {Rubner, Yossi and Tomasi, Carlo and Guibas, Leonidas J}, + title = {The earth mover's distance as a metric for image retrieval}, + year = {2000}, + pages = {99--121}, + journal = {International Journal of Computer Vision}, + volume = {40}, + number = {2}, + publisher = {Springer} +} +@ARTICLE{RubnerSept98, + author = {Rubner, Yossi and Tomasi, Carlo and Guibas, Leonidas J}, + title = {The Earth Mover''s Distance as a Metric for Image Retrieval}, + year = {1998}, + publisher = {Stanford University} +} +@ARTICLE{SS00, + author = {Shum, Heung-Yeung and Szeliski, Richard}, + title = {Systems and experiment paper: Construction of panoramic image mosaics with global and local alignment}, + year = {2000}, + pages = {101--130}, + journal = {International Journal of Computer Vision}, + volume = {36}, + number = {2}, + publisher = {Springer} +} +@INPROCEEDINGS{Shi94, + author = {Shi, Jianbo and Tomasi, Carlo}, + title = {Good features to track}, + booktitle = {Computer Vision and Pattern Recognition, 1994. Proceedings CVPR'94., 1994 IEEE Computer Society Conference on}, + year = {1994}, + pages = {593--600}, + organization = {IEEE} +} +@ARTICLE{Sklansky82, + author = {Sklansky, Jack}, + title = {Finding the convex hull of a simple polygon}, + year = {1982}, + pages = {79--83}, + journal = {Pattern Recognition Letters}, + volume = {1}, + number = {2}, + publisher = {Elsevier} +} +@ARTICLE{Slabaugh, + author = {Slabaugh, Gregory G}, + title = {Computing Euler angles from a rotation matrix}, + year = {1999}, + pages = {2000}, + journal = {Retrieved on August}, + volume = {6} +} +@MISC{SteweniusCFS, + author = {Stewenius, Henrik}, + title = {Calibrated Fivepoint solver}, + url = {http://www.vis.uky.edu/~stewe/FIVEPOINT/} +} +@ARTICLE{Suzuki85, + author = {Suzuki, Satoshi and others}, + title = {Topological structural analysis of digitized binary images by border following}, + year = {1985}, + pages = {32--46}, + journal = {Computer Vision, Graphics, and Image Processing}, + volume = {30}, + number = {1}, + publisher = {Elsevier} +} +@ARTICLE{TehChin89, + author = {Teh, C-H and Chin, Roland T.}, + title = {On the detection of dominant points on digital curves}, + year = {1989}, + pages = {859--872}, + journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume = {11}, + number = {8}, + publisher = {IEEE} +} +@ARTICLE{Telea04, + author = {Telea, Alexandru}, + title = {An image inpainting technique based on the fast marching method}, + year = {2004}, + pages = {23--34}, + journal = {Journal of graphics tools}, + volume = {9}, + number = {1}, + publisher = {Taylor \& Francis} +} +@INPROCEEDINGS{UES01, + author = {Uyttendaele, Matthew and Eden, Ashley and Skeliski, R}, + title = {Eliminating ghosting and exposure artifacts in image mosaics}, + booktitle = {Computer Vision and Pattern Recognition, 2001. CVPR 2001. Proceedings of the 2001 IEEE Computer Society Conference on}, + year = {2001}, + pages = {II--509}, + volume = {2}, + organization = {IEEE} +} +@INPROCEEDINGS{V03, + author = {Kwatra, Vivek and Sch{\"o}dl, Arno and Essa, Irfan and Turk, Greg and Bobick, Aaron}, + title = {Graphcut textures: image and video synthesis using graph cuts}, + booktitle = {ACM Transactions on Graphics (ToG)}, + year = {2003}, + pages = {277--286}, + volume = {22}, + number = {3}, + organization = {ACM} +} +@INPROCEEDINGS{Viola01, + author = {Viola, Paul and Jones, Michael}, + title = {Rapid object detection using a boosted cascade of simple features}, + booktitle = {Computer Vision and Pattern Recognition, 2001. CVPR 2001. Proceedings of the 2001 IEEE Computer Society Conference on}, + year = {2001}, + pages = {I--511}, + volume = {1}, + organization = {IEEE} +} +@INPROCEEDINGS{WJ10, + author = {Xu, Wei and Mulligan, Jane}, + title = {Performance evaluation of color correction approaches for automatic multi-view image and video stitching}, + booktitle = {Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on}, + year = {2010}, + pages = {263--270}, + organization = {IEEE} +} +@MISC{Welch95, + author = {Welch, Greg and Bishop, Gary}, + title = {An introduction to the Kalman filter}, + year = {1995} +} +@INPROCEEDINGS{Yang2010, + author = {Yang, Qingxiong and Wang, Liang and Ahuja, Narendra}, + title = {A constant-space belief propagation algorithm for stereo matching}, + booktitle = {Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on}, + year = {2010}, + pages = {1458--1465}, + organization = {IEEE} +} +@ARTICLE{Yuen90, + author = {Yuen, HK and Princen, John and Illingworth, John and Kittler, Josef}, + title = {Comparative study of Hough transform methods for circle finding}, + year = {1990}, + pages = {71--77}, + journal = {Image and Vision Computing}, + volume = {8}, + number = {1}, + publisher = {Elsevier} +} +@INCOLLECTION{Zach2007, + author = {Zach, Christopher and Pock, Thomas and Bischof, Horst}, + title = {A duality based approach for realtime TV-L 1 optical flow}, + booktitle = {Pattern Recognition}, + year = {2007}, + pages = {214--223}, + publisher = {Springer} +} +@ARTICLE{Zhang2000, + author = {Zhang, Zhengyou}, + title = {A flexible new technique for camera calibration}, + year = {2000}, + pages = {1330--1334}, + journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on}, + volume = {22}, + number = {11}, + publisher = {IEEE} +} +@INPROCEEDINGS{Zivkovic2004, + author = {Zivkovic, Zoran}, + title = {Improved adaptive Gaussian mixture model for background subtraction}, + booktitle = {Pattern Recognition, 2004. ICPR 2004. Proceedings of the 17th International Conference on}, + year = {2004}, + pages = {28--31}, + volume = {2}, + organization = {IEEE} +} +@ARTICLE{Zivkovic2006, + author = {Zivkovic, Zoran and van der Heijden, Ferdinand}, + title = {Efficient adaptive density estimation per image pixel for the task of background subtraction}, + year = {2006}, + pages = {773--780}, + journal = {Pattern recognition letters}, + volume = {27}, + number = {7}, + publisher = {Elsevier} +} +@INPROCEEDINGS{arthur_kmeanspp_2007, + author = {Arthur, David and Vassilvitskii, Sergei}, + title = {k-means++: The advantages of careful seeding}, + booktitle = {Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms}, + year = {2007}, + pages = {1027--1035}, + organization = {Society for Industrial and Applied Mathematics} +} +@ARTICLE{mitchell2005logistic, + author = {Mitchell, Tom M}, + title = {Logistic Regression}, + year = {2005}, + pages = {701}, + journal = {Machine learning}, + volume = {10} } - -@inproceedings{Bay06, - address = {Graz Austria}, - author = {Bay, H. and Tuytelaars, T. and Van Gool, L.}, - booktitle = {9th European Conference on Computer Vision}, - keywords = {local-feature, sift}, - month = {May}, - title = {SURF: Speeded Up Robust Features}, - year = {2006} -} - -@inproceedings{BT96, - author = {Tomasi, C. and Birchfield, S.T.}, - title = {Depth Discontinuities by Pixel-to-Pixel Stereo}, - booktitle = {STAN-CS}, - year = {1996}, - bibsource = {http://www.visionbib.com/bibliography/stereo413.html#TT35577} -} - -@article{Borgefors86, - author = {Borgefors, Gunilla}, - title = {Distance transformations in digital images}, - journal = {Comput. Vision Graph. Image Process.}, - volume = {34}, - number = {3}, - year = {1986}, - issn = {0734-189X}, - pages = {344--371}, - doi = {http://dx.doi.org/10.1016/S0734-189X(86)80047-0}, - publisher = {Academic Press Professional, Inc.}, - address = {San Diego, CA, USA}, -} - -@MISC{Bouguet00, - author = {Jean-Yves Bouguet}, - title = {Pyramidal Implementation of the Lucas-Kanade Feature Tracker}, - year = {2000}, - abstract = {}, - keywords = {Optical Flow, Lucas Kanade, Pyramidal Method}, -} - - -@inproceedings{Bradski00, - author = {Davis, J.W. and Bradski, G.R.}, - title = {Motion Segmentation and Pose Recognition with Motion History Gradients}, - booktitle = {WACV00}, - year = {2000}, - pages = {238-244} -} - -@inproceedings{Bradski98, - author = {Bradski, G.R.}, - title = {Computer Vision Face Tracking for Use in a Perceptual User Interface}, - booktitle = {Intel}, - year = {1998}, - bibsource = {http://www.visionbib.com/bibliography/people911.html#TT90944} -} - -@inproceedings{Davis97, - author = {Davis, J.W. and Bobick, A.F.}, - title = {The Representation and Recognition of Action Using Temporal Templates}, - booktitle = {CVPR97}, - year = {1997}, - pages = {928-934} -} - -@techreport{Felzenszwalb04, - author = {Felzenszwalb, Pedro F. and Huttenlocher, Daniel P.}, - edition = {TR2004-1963}, - institution = {Cornell Computing and Information Science}, - keywords = {Distance Transform, Hausdorff}, - month = {September}, - title = {Distance Transforms of Sampled Functions}, - year = {2004} -} - -@article{Felzenszwalb10, - author = {Felzenszwalb, P.F. and Girshick, R.B. and McAllester, D. and Ramanan, D.}, - title = {Object Detection with Discriminatively Trained Part Based Models}, - journal = {PAMI}, - volume = {32}, - year = {2010}, - number = {9}, - month = {September}, - pages = {1627-1645}, - bibsource = {http://www.visionbib.com/bibliography/bib/457.html#BB45794} -} - -@article{Hartley99, - author = {Hartley, R.I.}, - title = {Theory and Practice of Projective Rectification}, - journal = {IJCV}, - volume = {35}, - year = {1999}, - number = {2}, - month = {November}, - pages = {115-127}, - bibsource = {http://www.visionbib.com/bibliography/image-proc118.html#TT9097} -} - -@article{HH08, - author = {Hirschmuller, H.}, - title = "Stereo Processing by Semiglobal Matching and Mutual Information", - journal = {PAMI}, - volume = {30}, - year = {2008}, - number = {2}, - month = {February}, - pages = {328-341}, - bibsource = {http://www.visionbib.com/bibliography/stereo422.html#TT36174} -} - -@article{Horn81, - author = {Horn, B.K.P. and Schunck, B.G.}, - title = {Determining Optical Flow}, - journal = {AI}, - volume = {17}, - year = {1981}, - number = {1-3}, - month = {August}, - pages = {185-203}, - bibsource = {http://www.visionbib.com/bibliography/optic-f733.html#TT69126} -} - -@inproceedings{Kolmogorov03, - author = {Kim, Junhwan and Kolmogorov, Vladimir and Zabih, Ramin}, - title = {Visual Correspondence Using Energy Minimization and Mutual Information}, - booktitle = {ICCV '03: Proceedings of the Ninth IEEE International Conference on Computer Vision}, - year = {2003}, - isbn = {0-7695-1950-4}, - pages = {1033}, - publisher = {IEEE Computer Society}, - address = {Washington, DC, USA}, -} - -@inproceedings{Lucas81, - author = {Lucas, B. D. and Kanade, T.}, - title = {An Iterative Image Registration Technique with an Application to Stereo Vision (IJCAI)}, - booktitle = {Proceedings of the 7th International Joint Conference on Artificial Intelligence (IJCAI '81)}, - pages = {674-679}, - month = {April}, - year = {1981}, - Notes = {A more complete version is available as Proceedings DARPA Image Understanding Workshop, April 1981, pp.121-130. When you refer to this work, please refer to the IJCAI paper.} -} - - -@article{Matas00, - author = {Matas, J. and Galambos, C. and Kittler, J.V.}, - title = {Robust Detection of Lines Using the Progressive Probabilistic Hough Transform}, - journal = {CVIU}, - volume = {78}, - year = {2000}, - number = {1}, - month = {April}, - pages = {119-137}, - bibsource = {http://www.visionbib.com/bibliography/edge264.html#TT21167} -} - - -@inproceedings{Meyer92, - author = {Meyer, F.}, - title = {Color image segmentation}, - booktitle = {ICIP92}, - year = {1992}, - pages = {303–306} -} - - -@inproceedings{Shi94, - author = {Tomasi, C. and Shi, J.}, - title = {Good Features to Track}, - booktitle = {CVPR94}, - year = {1994}, - pages = {593-600}, - bibsource = {http://www.visionbib.com/bibliography/motion-f716.html#TT61248} -} - - -@article{Sklansky82, - author = {Sklansky, J.}, - title = {Finding the Convex Hull of a Simple Polygon}, - journal = {PRL}, - volume = {1}, - year = {1982}, - pages = {79-83}, - bibsource = {http://www.visionbib.com/bibliography/twod283.html#TT22999} -} - - -@article{Suzuki85, - author = {Suzuki, S. and Abe, K.}, - title = {Topological Structural Analysis of Digitized Binary Images by Border Following}, - journal = {CVGIP}, - volume = {30}, - year = {1985}, - number = {1}, - month = {April}, - pages = {32-46}, - bibsource = {http://www.visionbib.com/bibliography/twod289.html#TT23296} -} - - -@article{TehChin89, - author = {Teh, C.H. and Chin, R.T.}, - title = {On the Detection of Dominant Points on Digital Curve}, - journal = {PAMI}, - volume = {11}, - year = {1989}, - number = {8}, - month = {August}, - pages = {859-872}, - bibsource = {http://www.visionbib.com/bibliography/edge257.html#TT20546} -} - -@article{Telea04, - author = {Alexandru Telea}, - title = {An Image Inpainting Technique Based on the Fast Marching Method}, - journal = {Journal of Graphics, GPU, and Game Tools}, - volume = {9}, - number = {1}, - pages = {23-34}, - year = {2004}, -} - -@misc{Welch95, - author = {Greg Welch and Gary Bishop}, - title = {An Introduction to the Kalman Filter}, - year = {1995} -} - -@article{Yuen90, - author = {Yuen, H. K. and Princen, J. and Illingworth, J. and Kittler, J.}, - title = {Comparative study of Hough transform methods for circle finding}, - journal = {Image Vision Comput.}, - volume = {8}, - number = {1}, - year = {1990}, - issn = {0262-8856}, - pages = {71--77}, - doi = {http://dx.doi.org/10.1016/0262-8856(90)90059-E}, - publisher = {Butterworth-Heinemann}, - address = {Newton, MA, USA}, -} - -@inproceedings{arthur_kmeanspp_2007, - title = {k-means++: the advantages of careful seeding}, - booktitle = {Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms}, - publisher = {Society for Industrial and Applied Mathematics Philadelphia, PA, USA}, - author = {D. Arthur and S. Vassilvitskii}, - year = {2007}, - pages = {1027--1035} -} - -@inproceedings{muja_flann_2009, - author = {Marius Muja and David G. Lowe}, - title = {Fast Approximate Nearest Neighbors with Automatic Algorithm Configuration}, - booktitle = {International Conference on Computer Vision Theory and Applications (VISSAPP'09)}, - year = {2009}, - pages = {331-340}, -} - -@inproceedings{qx_csbp, - author = {Q. Yang and L. Wang and N. Ahuja}, - title = {A Constant-Space Belief Propagation Algorithm for Stereo Matching}, - booktitle = {CVPR}, - year = {2010} -} - -@article{felzenszwalb_bp, - author = {Pedro F. Felzenszwalb and Daniel P. Huttenlocher}, - title = {Efficient Belief Propagation for Early Vision}, - journal = {International Journal of Computer Vision}, - volume = {70}, - number = {1}, - year = {2006}, - month = {October} -} - -@article{dalal_hog, - author = {Navneet Dalal and Bill Triggs}, - title = {Histogram of Oriented Gradients for Human Detection}, - booktitle = {CVPR}, - year = {2005} -} - -@inproceedings{Puzicha1997, - author = {Puzicha, Jan and Hofmann, Thomas and Buhmann, Joachim M.}, - title = {Non-parametric Similarity Measures for Unsupervised Texture Segmentation and Image Retrieval}, - booktitle = {Proceedings of the 1997 Conference on Computer Vision and Pattern Recognition (CVPR '97)}, - series = {CVPR '97}, - year = {1997}, - isbn = {0-8186-7822-4}, - pages = {267--}, - url = {http://dl.acm.org/citation.cfm?id=794189.794386}, - acmid = {794386}, - publisher = {IEEE Computer Society}, - address = {Washington, DC, USA}, -} - -@techreport{RubnerSept98, - author = {Rubner, Yossi and Tomasi, Carlo and Guibas, Leonidas J.}, - title = {The Earth Mover's Distance As a Metric for Image Retrieval}, - year = {1998}, - source = {http://www.ncstrl.org:8900/ncstrl/servlet/search?formname=detail\&id=oai%3Ancstrlh%3Astan%3ASTAN%2F%2FCS-TN-98-86}, - publisher = {Stanford University}, - address = {Stanford, CA, USA}, -} - -@article{Rubner2000, - author = {Rubner, Yossi and Tomasi, Carlo and Guibas, Leonidas J.}, - title = {The Earth Mover's Distance As a Metric for Image Retrieval}, - journal = {Int. J. Comput. Vision}, - issue_date = {Nov. 2000}, - volume = {40}, - number = {2}, - month = nov, - year = {2000}, - issn = {0920-5691}, - pages = {99--121}, - numpages = {23}, - url = {http://dx.doi.org/10.1023/A:1026543900054}, - doi = {10.1023/A:1026543900054}, - acmid = {365881}, - publisher = {Kluwer Academic Publishers}, - address = {Hingham, MA, USA}, -} - -@article{Hu62, - author={Ming-Kuei Hu}, - journal={Information Theory, IRE Transactions on}, - title={Visual pattern recognition by moment invariants}, - year={1962}, - month={February}, - volume={8}, - number={2}, - pages={179-187}, - doi={10.1109/TIT.1962.1057692}, - ISSN={0096-1000}, -} - -@inproceedings{Fitzgibbon95, - author = {Fitzgibbon, Andrew W. and Fisher, Robert B.}, - title = {A Buyer's Guide to Conic Fitting}, - booktitle = {Proceedings of the 6th British Conference on Machine Vision (Vol. 2)}, - series = {BMVC '95}, - year = {1995}, - isbn = {0-9521898-2-8}, - location = {Birmingham, United Kingdom}, - pages = {513--522}, - numpages = {10}, - url = {http://dl.acm.org/citation.cfm?id=243124.243148}, - acmid = {243148}, - publisher = {BMVA Press}, - address = {Surrey, UK, UK}, -} - -@article{KleeLaskowski85, - author = {Klee, Victor and Laskowski, Michael C.}, - ee = {http://dx.doi.org/10.1016/0196-6774(85)90005-7}, - journal = {J. Algorithms}, - number = 3, - pages = {359-375}, - title = {Finding the Smallest Triangles Containing a Given Convex Polygon.}, - url = {http://dblp.uni-trier.de/db/journals/jal/jal6.html#KleeL85}, - volume = 6, - year = 1985 -} - -@article{Canny86, - author = {Canny, J}, - title = {A Computational Approach to Edge Detection}, - journal = {IEEE Trans. Pattern Anal. Mach. Intell.}, - issue_date = {June 1986}, - volume = {8}, - number = {6}, - month = jun, - year = {1986}, - issn = {0162-8828}, - pages = {679--698}, - numpages = {20}, - url = {http://dx.doi.org/10.1109/TPAMI.1986.4767851}, - doi = {10.1109/TPAMI.1986.4767851}, - acmid = {11275}, - publisher = {IEEE Computer Society}, - address = {Washington, DC, USA} -} - -# '''[Bradski98]''' G.R. Bradski. Computer vision face tracking as a component of a perceptual user interface. In Workshop on Applications of Computer Vision, pages 214?219, Princeton, NJ, Oct. 1998.<
> Updated version can be found at http://www.intel.com/technology/itj/q21998/articles/art\_2.htm.<
> Also, it is included into OpenCV distribution ([[attachment:camshift.pdf]]) -# '''[Burt81]''' P. J. Burt, T. H. Hong, A. Rosenfeld. Segmentation and Estimation of Image Region Properties Through Cooperative Hierarchical Computation. IEEE Tran. On SMC, Vol. 11, N.12, 1981, pp. 802-809. -# '''[Canny86]''' J. Canny. A Computational Approach to Edge Detection, IEEE Trans. on Pattern Analysis and Machine Intelligence, 8(6), pp. 679-698 (1986). -# '''[Davis97]''' J. Davis and Bobick. The Representation and Recognition of Action Using Temporal Templates. MIT Media Lab Technical Report 402, 1997. -# '''[DeMenthon92]''' Daniel F. DeMenthon and Larry S. Davis. Model-Based Object Pose in 25 Lines of Code. In Proceedings of ECCV '92, pp. 335-343, 1992. -# '''[Fitzgibbon95]''' Andrew W. Fitzgibbon, R.B.Fisher. A Buyer?s Guide to Conic Fitting. Proc.5th British Machine Vision Conference, Birmingham, pp. 513-522, 1995. -# '''[Ford98]''' Adrian Ford, Alan Roberts. Colour Space Conversions. http://www.poynton.com/PDFs/coloureq.pdf -# '''[Horn81]''' Berthold K.P. Horn and Brian G. Schunck. Determining Optical Flow. Artificial Intelligence, 17, pp. 185-203, 1981. -# '''[Hu62]''' M. Hu. Visual Pattern Recognition by Moment Invariants, IRE Transactions on Information Theory, 8:2, pp. 179-187, 1962. -# '''[Iivarinen97]''' Jukka Iivarinen, Markus Peura, Jaakko Srel, and Ari Visa. Comparison of Combined Shape Descriptors for Irregular Objects, 8th British Machine Vision Conference, BMVC'97.<
>http://www.cis.hut.fi/research/IA/paper/publications/bmvc97/bmvc97.html -# '''[Jahne97]''' B. Jahne. Digital Image Processing. Springer, New York, 1997. -# '''[Lucas81]''' Lucas, B., and Kanade, T. An Iterative Image Registration Technique with an Application to Stereo Vision, Proc. of 7th International Joint Conference on Artificial Intelligence (IJCAI), pp. 674-679. -# '''[Kass88]''' M. Kass, A. Witkin, and D. Terzopoulos. Snakes: Active Contour Models, International Journal of Computer Vision, pp. 321-331, 1988. -# '''[Lienhart02]''' Rainer Lienhart and Jochen Maydt. An Extended Set of Haar-like Features for Rapid Object Detection. IEEE ICIP 2002, Vol. 1, pp. 900-903, Sep. 2002.<
> This paper, as well as the extended technical report, can be retrieved at http://www.lienhart.de/Publications/publications.html -# '''[Matas98]''' J.Matas, C.Galambos, J.Kittler. Progressive Probabilistic Hough Transform. British Machine Vision Conference, 1998. -# '''[Rosenfeld73]''' A. Rosenfeld and E. Johnston. Angle Detection on Digital Curves. IEEE Trans. Computers, 22:875-878, 1973. -# '''[RubnerJan98]''' Y. Rubner. C. Tomasi, L.J. Guibas. Metrics for Distributions with Applications to Image Databases. Proceedings of the 1998 IEEE International Conference on Computer Vision, Bombay, India, January 1998, pp. 59-66. -# '''[RubnerSept98]''' Y. Rubner. C. Tomasi, L.J. Guibas. The Earth Mover?s Distance as a Metric for Image Retrieval. Technical Report STAN-CS-TN-98-86, Department of Computer Science, Stanford University, September 1998. -# '''[RubnerOct98]''' Y. Rubner. C. Tomasi. Texture Metrics. Proceeding of the IEEE International Conference on Systems, Man, and Cybernetics, San-Diego, CA, October 1998, pp. 4601-4607. http://robotics.stanford.edu/~rubner/publications.html -# '''[Serra82]''' J. Serra. Image Analysis and Mathematical Morphology. Academic Press, 1982. -# '''[Schiele00]''' Bernt Schiele and James L. Crowley. Recognition without Correspondence Using Multidimensional Receptive Field Histograms. In International Journal of Computer Vision 36 (1), pp. 31-50, January 2000. -# '''[Suzuki85]''' S. Suzuki, K. Abe. Topological Structural Analysis of Digital Binary Images by Border Following. CVGIP, v.30, n.1. 1985, pp. 32-46. -# '''[Teh89]''' C.H. Teh, R.T. Chin. On the Detection of Dominant Points on Digital Curves. - IEEE Tr. PAMI, 1989, v.11, No.8, p. 859-872. -# '''[Trucco98]''' Emanuele Trucco, Alessandro Verri. Introductory Techniques for 3-D Computer Vision. Prentice Hall, Inc., 1998. -# '''[Viola01]''' Paul Viola and Michael J. Jones. Rapid Object Detection using a Boosted Cascade of Simple Features. IEEE CVPR, 2001.<
> The paper is available online at http://www.ai.mit.edu/people/viola/ -# '''[Welch95]''' Greg Welch, Gary Bishop. An Introduction To the Kalman Filter. Technical Report TR95-041, University of North Carolina at Chapel Hill, 1995.<
> Online version is available at http://www.cs.unc.edu/~welch/kalman/kalmanIntro.html -# '''[Williams92]''' D. J. Williams and M. Shah. A Fast Algorithm for Active Contours and Curvature Estimation. CVGIP: Image Understanding, Vol. 55, No. 1, pp. 14-26, Jan., 1992. http://www.cs.ucf.edu/~vision/papers/shah/92/WIS92A.pdf. -# '''[Yuen03]''' H.K. Yuen, J. Princen, J. Illingworth and J. Kittler. Comparative study of Hough Transform methods for circle finding.<
>http://www.sciencedirect.com/science/article/B6V09-48TCV4N-5Y/2/91f551d124777f7a4cf7b18325235673 -# '''[Yuille89]''' A.Y.Yuille, D.S.Cohen, and P.W.Hallinan. Feature Extraction from Faces Using Deformable Templates in CVPR, pp. 104-109, 1989. -# '''[Zhang96]''' Z. Zhang. Parameter Estimation Techniques: A Tutorial with Application to Conic Fitting, Image and Vision Computing Journal, 1996. -# '''[Zhang99]''' Z. Zhang. Flexible Camera Calibration By Viewing a Plane From Unknown Orientations. International Conference on Computer Vision (ICCV'99), Corfu, Greece, pages 666-673, September 1999. -# '''[Zhang00]''' Z. Zhang. A Flexible New Technique for Camera Calibration. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(11):1330-1334, 2000. diff --git a/doc/root.markdown.in b/doc/root.markdown.in index c98bb35317..72095780cf 100644 --- a/doc/root.markdown.in +++ b/doc/root.markdown.in @@ -3,9 +3,14 @@ OpenCV modules {#mainpage} @subpage intro -Module name | Folder -------------- | ------------- -@ref core | core -@ref imgproc | imgproc +### Main modules - + Module name | Folder +-------------- | ------------- +@CMAKE_DOXYGEN_MAIN_REFERENCE@ + +### Extra modules + + Module name | Folder +-------------- | ------------- +@CMAKE_DOXYGEN_EXTRA_REFERENCE@ diff --git a/modules/androidcamera/include/camera_activity.hpp b/modules/androidcamera/include/camera_activity.hpp index 2af7befe30..7e79aafd53 100644 --- a/modules/androidcamera/include/camera_activity.hpp +++ b/modules/androidcamera/include/camera_activity.hpp @@ -3,6 +3,12 @@ #include +/** @defgroup androidcamera Android Camera Support +*/ + +//! @addtogroup androidcamera +//! @{ + class CameraActivity { public: @@ -44,4 +50,6 @@ private: int frameHeight; }; +//! @} + #endif diff --git a/modules/androidcamera/include/camera_properties.h b/modules/androidcamera/include/camera_properties.h index 65499be2d6..5078401951 100644 --- a/modules/androidcamera/include/camera_properties.h +++ b/modules/androidcamera/include/camera_properties.h @@ -1,6 +1,9 @@ #ifndef CAMERA_PROPERTIES_H #define CAMERA_PROPERTIES_H +//! @addtogroup androidcamera +//! @{ + enum { ANDROID_CAMERA_PROPERTY_FRAMEWIDTH = 0, ANDROID_CAMERA_PROPERTY_FRAMEHEIGHT = 1, @@ -67,4 +70,6 @@ enum { ANDROID_CAMERA_FOCUS_DISTANCE_FAR_INDEX }; +//! @} + #endif // CAMERA_PROPERTIES_H diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 4f405afc66..396b666ee1 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -48,9 +48,140 @@ #include "opencv2/features2d.hpp" #include "opencv2/core/affine.hpp" +/** + @defgroup calib3d Camera Calibration and 3D Reconstruction + +The functions in this section use a so-called pinhole camera model. In this model, a scene view is +formed by projecting 3D points into the image plane using a perspective transformation. + +\f[s \; m' = A [R|t] M'\f] + +or + +\f[s \vecthree{u}{v}{1} = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1} +\begin{bmatrix} +r_{11} & r_{12} & r_{13} & t_1 \\ +r_{21} & r_{22} & r_{23} & t_2 \\ +r_{31} & r_{32} & r_{33} & t_3 +\end{bmatrix} +\begin{bmatrix} +X \\ +Y \\ +Z \\ +1 +\end{bmatrix}\f] + +where: + +- \f$(X, Y, Z)\f$ are the coordinates of a 3D point in the world coordinate space +- \f$(u, v)\f$ are the coordinates of the projection point in pixels +- \f$A\f$ is a camera matrix, or a matrix of intrinsic parameters +- \f$(cx, cy)\f$ is a principal point that is usually at the image center +- \f$fx, fy\f$ are the focal lengths expressed in pixel units. + +Thus, if an image from the camera is scaled by a factor, all of these parameters should be scaled +(multiplied/divided, respectively) by the same factor. The matrix of intrinsic parameters does not +depend on the scene viewed. So, once estimated, it can be re-used as long as the focal length is +fixed (in case of zoom lens). The joint rotation-translation matrix \f$[R|t]\f$ is called a matrix of +extrinsic parameters. It is used to describe the camera motion around a static scene, or vice versa, +rigid motion of an object in front of a still camera. That is, \f$[R|t]\f$ translates coordinates of a +point \f$(X, Y, Z)\f$ to a coordinate system, fixed with respect to the camera. The transformation above +is equivalent to the following (when \f$z \ne 0\f$ ): + +\f[\begin{array}{l} +\vecthree{x}{y}{z} = R \vecthree{X}{Y}{Z} + t \\ +x' = x/z \\ +y' = y/z \\ +u = f_x*x' + c_x \\ +v = f_y*y' + c_y +\end{array}\f] + +Real lenses usually have some distortion, mostly radial distortion and slight tangential distortion. +So, the above model is extended as: + +\f[\begin{array}{l} \vecthree{x}{y}{z} = R \vecthree{X}{Y}{Z} + t \\ x' = x/z \\ y' = y/z \\ x'' = x' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + 2 p_1 x' y' + p_2(r^2 + 2 x'^2) + s_1 r^2 + s_2 r^4 \\ y'' = y' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' + s_1 r^2 + s_2 r^4 \\ \text{where} \quad r^2 = x'^2 + y'^2 \\ u = f_x*x'' + c_x \\ v = f_y*y'' + c_y \end{array}\f] + +\f$k_1\f$, \f$k_2\f$, \f$k_3\f$, \f$k_4\f$, \f$k_5\f$, and \f$k_6\f$ are radial distortion coefficients. \f$p_1\f$ and \f$p_2\f$ are +tangential distortion coefficients. \f$s_1\f$, \f$s_2\f$, \f$s_3\f$, and \f$s_4\f$, are the thin prism distortion +coefficients. Higher-order coefficients are not considered in OpenCV. In the functions below the +coefficients are passed or returned as + +\f[(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f] + +vector. That is, if the vector contains four elements, it means that \f$k_3=0\f$ . The distortion +coefficients do not depend on the scene viewed. Thus, they also belong to the intrinsic camera +parameters. And they remain the same regardless of the captured image resolution. If, for example, a +camera has been calibrated on images of 320 x 240 resolution, absolutely the same distortion +coefficients can be used for 640 x 480 images from the same camera while \f$f_x\f$, \f$f_y\f$, \f$c_x\f$, and +\f$c_y\f$ need to be scaled appropriately. + +The functions below use the above model to do the following: + +- Project 3D points to the image plane given intrinsic and extrinsic parameters. +- Compute extrinsic parameters given intrinsic parameters, a few 3D points, and their +projections. +- Estimate intrinsic and extrinsic camera parameters from several views of a known calibration +pattern (every view is described by several 3D-2D point correspondences). +- Estimate the relative position and orientation of the stereo camera "heads" and compute the +*rectification* transformation that makes the camera optical axes parallel. + +@note + - A calibration sample for 3 cameras in horizontal position can be found at + opencv_source_code/samples/cpp/3calibration.cpp + - A calibration sample based on a sequence of images can be found at + opencv_source_code/samples/cpp/calibration.cpp + - A calibration sample in order to do 3D reconstruction can be found at + opencv_source_code/samples/cpp/build3dmodel.cpp + - A calibration sample of an artificially generated camera and chessboard patterns can be + found at opencv_source_code/samples/cpp/calibration_artificial.cpp + - A calibration example on stereo calibration can be found at + opencv_source_code/samples/cpp/stereo_calib.cpp + - A calibration example on stereo matching can be found at + opencv_source_code/samples/cpp/stereo_match.cpp + - (Python) A camera calibration sample can be found at + opencv_source_code/samples/python2/calibrate.py + + @{ + @defgroup calib3d_fisheye Fisheye camera model + + Definitions: Let P be a point in 3D of coordinates X in the world reference frame (stored in the + matrix X) The coordinate vector of P in the camera reference frame is: + + \f[Xc = R X + T\f] + + where R is the rotation matrix corresponding to the rotation vector om: R = rodrigues(om); call x, y + and z the 3 coordinates of Xc: + + \f[x = Xc_1 \\ y = Xc_2 \\ z = Xc_3\f] + + The pinehole projection coordinates of P is [a; b] where + + \f[a = x / z \ and \ b = y / z \\ r^2 = a^2 + b^2 \\ \theta = atan(r)\f] + + Fisheye distortion: + + \f[\theta_d = \theta (1 + k_1 \theta^2 + k_2 \theta^4 + k_3 \theta^6 + k_4 \theta^8)\f] + + The distorted point coordinates are [x'; y'] where + + \f[x' = (\theta_d / r) x \\ y' = (\theta_d / r) y \f] + + Finally, convertion into pixel coordinates: The final pixel coordinates vector [u; v] where: + + \f[u = f_x (x' + \alpha y') + c_x \\ + v = f_y yy + c_y\f] + + @defgroup calib3d_c C API + + @} + */ + namespace cv { +//! @addtogroup calib3d +//! @{ + //! type of the robust estimation algorithm enum { LMEDS = 4, //!< least-median algorithm RANSAC = 8 //!< RANSAC algorithm @@ -105,26 +236,143 @@ enum { FM_7POINT = 1, //!< 7-point algorithm -//! converts rotation vector to rotation matrix or vice versa using Rodrigues transformation +/** @brief Converts a rotation matrix to a rotation vector or vice versa. + +@param src Input rotation vector (3x1 or 1x3) or rotation matrix (3x3). +@param dst Output rotation matrix (3x3) or rotation vector (3x1 or 1x3), respectively. +@param jacobian Optional output Jacobian matrix, 3x9 or 9x3, which is a matrix of partial +derivatives of the output array components with respect to the input array components. + +\f[\begin{array}{l} \theta \leftarrow norm(r) \\ r \leftarrow r/ \theta \\ R = \cos{\theta} I + (1- \cos{\theta} ) r r^T + \sin{\theta} \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} \end{array}\f] + +Inverse transformation can be also done easily, since + +\f[\sin ( \theta ) \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} = \frac{R - R^T}{2}\f] + +A rotation vector is a convenient and most compact representation of a rotation matrix (since any +rotation matrix has just 3 degrees of freedom). The representation is used in the global 3D geometry +optimization procedures like calibrateCamera, stereoCalibrate, or solvePnP . + */ CV_EXPORTS_W void Rodrigues( InputArray src, OutputArray dst, OutputArray jacobian = noArray() ); -//! computes the best-fit perspective transformation mapping srcPoints to dstPoints. +/** @brief Finds a perspective transformation between two planes. + +@param srcPoints Coordinates of the points in the original plane, a matrix of the type CV_32FC2 +or vector\ . +@param dstPoints Coordinates of the points in the target plane, a matrix of the type CV_32FC2 or +a vector\ . +@param method Method used to computed a homography matrix. The following methods are possible: +- **0** - a regular method using all the points +- **RANSAC** - RANSAC-based robust method +- **LMEDS** - Least-Median robust method +@param ransacReprojThreshold Maximum allowed reprojection error to treat a point pair as an inlier +(used in the RANSAC method only). That is, if +\f[\| \texttt{dstPoints} _i - \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \| > \texttt{ransacReprojThreshold}\f] +then the point \f$i\f$ is considered an outlier. If srcPoints and dstPoints are measured in pixels, +it usually makes sense to set this parameter somewhere in the range of 1 to 10. +@param mask Optional output mask set by a robust method ( RANSAC or LMEDS ). Note that the input +mask values are ignored. +@param maxIters The maximum number of RANSAC iterations, 2000 is the maximum it can be. +@param confidence Confidence level, between 0 and 1. + +The functions find and return the perspective transformation \f$H\f$ between the source and the +destination planes: + +\f[s_i \vecthree{x'_i}{y'_i}{1} \sim H \vecthree{x_i}{y_i}{1}\f] + +so that the back-projection error + +\f[\sum _i \left ( x'_i- \frac{h_{11} x_i + h_{12} y_i + h_{13}}{h_{31} x_i + h_{32} y_i + h_{33}} \right )^2+ \left ( y'_i- \frac{h_{21} x_i + h_{22} y_i + h_{23}}{h_{31} x_i + h_{32} y_i + h_{33}} \right )^2\f] + +is minimized. If the parameter method is set to the default value 0, the function uses all the point +pairs to compute an initial homography estimate with a simple least-squares scheme. + +However, if not all of the point pairs ( \f$srcPoints_i\f$, \f$dstPoints_i\f$ ) fit the rigid perspective +transformation (that is, there are some outliers), this initial estimate will be poor. In this case, +you can use one of the two robust methods. Both methods, RANSAC and LMeDS , try many different +random subsets of the corresponding point pairs (of four pairs each), estimate the homography matrix +using this subset and a simple least-square algorithm, and then compute the quality/goodness of the +computed homography (which is the number of inliers for RANSAC or the median re-projection error for +LMeDs). The best subset is then used to produce the initial estimate of the homography matrix and +the mask of inliers/outliers. + +Regardless of the method, robust or not, the computed homography matrix is refined further (using +inliers only in case of a robust method) with the Levenberg-Marquardt method to reduce the +re-projection error even more. + +The method RANSAC can handle practically any ratio of outliers but it needs a threshold to +distinguish inliers from outliers. The method LMeDS does not need any threshold but it works +correctly only when there are more than 50% of inliers. Finally, if there are no outliers and the +noise is rather small, use the default method (method=0). + +The function is used to find initial intrinsic and extrinsic matrices. Homography matrix is +determined up to a scale. Thus, it is normalized so that \f$h_{33}=1\f$. Note that whenever an H matrix +cannot be estimated, an empty one will be returned. + +@sa + getAffineTransform, getPerspectiveTransform, estimateRigidTransform, warpPerspective, + perspectiveTransform + +@note + - A example on calculating a homography for image matching can be found at + opencv_source_code/samples/cpp/video_homography.cpp + + */ CV_EXPORTS_W Mat findHomography( InputArray srcPoints, InputArray dstPoints, int method = 0, double ransacReprojThreshold = 3, OutputArray mask=noArray(), const int maxIters = 2000, const double confidence = 0.995); -//! variant of findHomography for backward compatibility +/** @overload */ CV_EXPORTS Mat findHomography( InputArray srcPoints, InputArray dstPoints, OutputArray mask, int method = 0, double ransacReprojThreshold = 3 ); -//! Computes RQ decomposition of 3x3 matrix +/** @brief Computes an RQ decomposition of 3x3 matrices. + +@param src 3x3 input matrix. +@param mtxR Output 3x3 upper-triangular matrix. +@param mtxQ Output 3x3 orthogonal matrix. +@param Qx Optional output 3x3 rotation matrix around x-axis. +@param Qy Optional output 3x3 rotation matrix around y-axis. +@param Qz Optional output 3x3 rotation matrix around z-axis. + +The function computes a RQ decomposition using the given rotations. This function is used in +decomposeProjectionMatrix to decompose the left 3x3 submatrix of a projection matrix into a camera +and a rotation matrix. + +It optionally returns three rotation matrices, one for each axis, and the three Euler angles in +degrees (as the return value) that could be used in OpenGL. Note, there is always more than one +sequence of rotations about the three principle axes that results in the same orientation of an +object, eg. see @cite Slabaugh . Returned tree rotation matrices and corresponding three Euler angules +are only one of the possible solutions. + */ CV_EXPORTS_W Vec3d RQDecomp3x3( InputArray src, OutputArray mtxR, OutputArray mtxQ, OutputArray Qx = noArray(), OutputArray Qy = noArray(), OutputArray Qz = noArray()); -//! Decomposes the projection matrix into camera matrix and the rotation martix and the translation vector +/** @brief Decomposes a projection matrix into a rotation matrix and a camera matrix. + +@param projMatrix 3x4 input projection matrix P. +@param cameraMatrix Output 3x3 camera matrix K. +@param rotMatrix Output 3x3 external rotation matrix R. +@param transVect Output 4x1 translation vector T. +@param rotMatrixX Optional 3x3 rotation matrix around x-axis. +@param rotMatrixY Optional 3x3 rotation matrix around y-axis. +@param rotMatrixZ Optional 3x3 rotation matrix around z-axis. +@param eulerAngles Optional three-element vector containing three Euler angles of rotation in +degrees. + +The function computes a decomposition of a projection matrix into a calibration and a rotation +matrix and the position of a camera. + +It optionally returns three rotation matrices, one for each axis, and three Euler angles that could +be used in OpenGL. Note, there is always more than one sequence of rotations about the three +principle axes that results in the same orientation of an object, eg. see @cite Slabaugh . Returned +tree rotation matrices and corresponding three Euler angules are only one of the possible solutions. + +The function is based on RQDecomp3x3 . + */ CV_EXPORTS_W void decomposeProjectionMatrix( InputArray projMatrix, OutputArray cameraMatrix, OutputArray rotMatrix, OutputArray transVect, OutputArray rotMatrixX = noArray(), @@ -132,10 +380,51 @@ CV_EXPORTS_W void decomposeProjectionMatrix( InputArray projMatrix, OutputArray OutputArray rotMatrixZ = noArray(), OutputArray eulerAngles =noArray() ); -//! computes derivatives of the matrix product w.r.t each of the multiplied matrix coefficients +/** @brief Computes partial derivatives of the matrix product for each multiplied matrix. + +@param A First multiplied matrix. +@param B Second multiplied matrix. +@param dABdA First output derivative matrix d(A\*B)/dA of size +\f$\texttt{A.rows*B.cols} \times {A.rows*A.cols}\f$ . +@param dABdB Second output derivative matrix d(A\*B)/dB of size +\f$\texttt{A.rows*B.cols} \times {B.rows*B.cols}\f$ . + +The function computes partial derivatives of the elements of the matrix product \f$A*B\f$ with regard to +the elements of each of the two input matrices. The function is used to compute the Jacobian +matrices in stereoCalibrate but can also be used in any other similar optimization function. + */ CV_EXPORTS_W void matMulDeriv( InputArray A, InputArray B, OutputArray dABdA, OutputArray dABdB ); -//! composes 2 [R|t] transformations together. Also computes the derivatives of the result w.r.t the arguments +/** @brief Combines two rotation-and-shift transformations. + +@param rvec1 First rotation vector. +@param tvec1 First translation vector. +@param rvec2 Second rotation vector. +@param tvec2 Second translation vector. +@param rvec3 Output rotation vector of the superposition. +@param tvec3 Output translation vector of the superposition. +@param dr3dr1 +@param dr3dt1 +@param dr3dr2 +@param dr3dt2 +@param dt3dr1 +@param dt3dt1 +@param dt3dr2 +@param dt3dt2 Optional output derivatives of rvec3 or tvec3 with regard to rvec1, rvec2, tvec1 and +tvec2, respectively. + +The functions compute: + +\f[\begin{array}{l} \texttt{rvec3} = \mathrm{rodrigues} ^{-1} \left ( \mathrm{rodrigues} ( \texttt{rvec2} ) \cdot \mathrm{rodrigues} ( \texttt{rvec1} ) \right ) \\ \texttt{tvec3} = \mathrm{rodrigues} ( \texttt{rvec2} ) \cdot \texttt{tvec1} + \texttt{tvec2} \end{array} ,\f] + +where \f$\mathrm{rodrigues}\f$ denotes a rotation vector to a rotation matrix transformation, and +\f$\mathrm{rodrigues}^{-1}\f$ denotes the inverse transformation. See Rodrigues for details. + +Also, the functions can compute the derivatives of the output vectors with regards to the input +vectors (see matMulDeriv ). The functions are used inside stereoCalibrate but can also be used in +your own code where Levenberg-Marquardt or another gradient-based solver is used to optimize a +function that contains a matrix multiplication. + */ CV_EXPORTS_W void composeRT( InputArray rvec1, InputArray tvec1, InputArray rvec2, InputArray tvec2, OutputArray rvec3, OutputArray tvec3, @@ -144,7 +433,38 @@ CV_EXPORTS_W void composeRT( InputArray rvec1, InputArray tvec1, OutputArray dt3dr1 = noArray(), OutputArray dt3dt1 = noArray(), OutputArray dt3dr2 = noArray(), OutputArray dt3dt2 = noArray() ); -//! projects points from the model coordinate space to the image coordinates. Also computes derivatives of the image coordinates w.r.t the intrinsic and extrinsic camera parameters +/** @brief Projects 3D points to an image plane. + +@param objectPoints Array of object points, 3xN/Nx3 1-channel or 1xN/Nx1 3-channel (or +vector\ ), where N is the number of points in the view. +@param rvec Rotation vector. See Rodrigues for details. +@param tvec Translation vector. +@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements. If +the vector is NULL/empty, the zero distortion coefficients are assumed. +@param imagePoints Output array of image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, or +vector\ . +@param jacobian Optional output 2Nx(10+\) jacobian matrix of derivatives of image +points with respect to components of the rotation vector, translation vector, focal lengths, +coordinates of the principal point and the distortion coefficients. In the old interface different +components of the jacobian are returned via different output parameters. +@param aspectRatio Optional "fixed aspect ratio" parameter. If the parameter is not 0, the +function assumes that the aspect ratio (*fx/fy*) is fixed and correspondingly adjusts the jacobian +matrix. + +The function computes projections of 3D points to the image plane given intrinsic and extrinsic +camera parameters. Optionally, the function computes Jacobians - matrices of partial derivatives of +image points coordinates (as functions of all the input parameters) with respect to the particular +parameters, intrinsic and/or extrinsic. The Jacobians are used during the global optimization in +calibrateCamera, solvePnP, and stereoCalibrate . The function itself can also be used to compute a +re-projection error given the current intrinsic and extrinsic parameters. + +@note By setting rvec=tvec=(0,0,0) or by setting cameraMatrix to a 3x3 identity matrix, or by +passing zero distortion coefficients, you can get various useful partial cases of the function. This +means that you can compute the distorted coordinates for a sparse set of points or apply a +perspective transformation (and also compute the derivatives) in the ideal zero-distortion setup. + */ CV_EXPORTS_W void projectPoints( InputArray objectPoints, InputArray rvec, InputArray tvec, InputArray cameraMatrix, InputArray distCoeffs, @@ -152,13 +472,86 @@ CV_EXPORTS_W void projectPoints( InputArray objectPoints, OutputArray jacobian = noArray(), double aspectRatio = 0 ); -//! computes the camera pose from a few 3D points and the corresponding projections. The outliers are not handled. +/** @brief Finds an object pose from 3D-2D point correspondences. + +@param objectPoints Array of object points in the object coordinate space, 3xN/Nx3 1-channel or +1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. +@param imagePoints Array of corresponding image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, +where N is the number of points. vector\ can be also passed here. +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements. If +the vector is NULL/empty, the zero distortion coefficients are assumed. +@param rvec Output rotation vector (see Rodrigues ) that, together with tvec , brings points from +the model coordinate system to the camera coordinate system. +@param tvec Output translation vector. +@param useExtrinsicGuess Parameter used for SOLVEPNP_ITERATIVE. If true (1), the function uses +the provided rvec and tvec values as initial approximations of the rotation and translation +vectors, respectively, and further optimizes them. +@param flags Method for solving a PnP problem: +- **SOLVEPNP_ITERATIVE** Iterative method is based on Levenberg-Marquardt optimization. In +this case the function finds such a pose that minimizes reprojection error, that is the sum +of squared distances between the observed projections imagePoints and the projected (using +projectPoints ) objectPoints . +- **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang +"Complete Solution Classification for the Perspective-Three-Point Problem". In this case the +function requires exactly four object and image points. +- **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the +paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation". +- **SOLVEPNP_DLS** Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis. +"A Direct Least-Squares (DLS) Method for PnP". +- **SOLVEPNP_UPNP** Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto, +F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length +Estimation". In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ +assuming that both have the same value. Then the cameraMatrix is updated with the estimated +focal length. + +The function estimates the object pose given a set of object points, their corresponding image +projections, as well as the camera matrix and the distortion coefficients. + +@note + - An example of how to use solvePnP for planar augmented reality can be found at + opencv_source_code/samples/python2/plane_ar.py + */ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE ); -//! computes the camera pose from a few 3D points and the corresponding projections. The outliers are possible. +/** @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme. + +@param objectPoints Array of object points in the object coordinate space, 3xN/Nx3 1-channel or +1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. +@param imagePoints Array of corresponding image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, +where N is the number of points. vector\ can be also passed here. +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements. If +the vector is NULL/empty, the zero distortion coefficients are assumed. +@param rvec Output rotation vector (see Rodrigues ) that, together with tvec , brings points from +the model coordinate system to the camera coordinate system. +@param tvec Output translation vector. +@param useExtrinsicGuess Parameter used for SOLVEPNP_ITERATIVE. If true (1), the function uses +the provided rvec and tvec values as initial approximations of the rotation and translation +vectors, respectively, and further optimizes them. +@param iterationsCount Number of iterations. +@param reprojectionError Inlier threshold value used by the RANSAC procedure. The parameter value +is the maximum allowed distance between the observed and computed point projections to consider it +an inlier. +@param confidence The probability that the algorithm produces a useful result. +@param inliers Output vector that contains indices of inliers in objectPoints and imagePoints . +@param flags Method for solving a PnP problem (see solvePnP ). + +The function estimates an object pose given a set of object points, their corresponding image +projections, as well as the camera matrix and the distortion coefficients. This function finds such +a pose that minimizes reprojection error, that is, the sum of squared distances between the observed +projections imagePoints and the projected (using projectPoints ) objectPoints. The use of RANSAC +makes the function resistant to outliers. + +@note + - An example of how to use solvePNPRansac for object detection can be found at + opencv_source_code/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/ + */ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, OutputArray rvec, OutputArray tvec, @@ -166,28 +559,227 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint float reprojectionError = 8.0, double confidence = 0.99, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE ); -//! initializes camera matrix from a few 3D points and the corresponding projections. +/** @brief Finds an initial camera matrix from 3D-2D point correspondences. + +@param objectPoints Vector of vectors of the calibration pattern points in the calibration pattern +coordinate space. In the old interface all the per-view vectors are concatenated. See +calibrateCamera for details. +@param imagePoints Vector of vectors of the projections of the calibration pattern points. In the +old interface all the per-view vectors are concatenated. +@param imageSize Image size in pixels used to initialize the principal point. +@param aspectRatio If it is zero or negative, both \f$f_x\f$ and \f$f_y\f$ are estimated independently. +Otherwise, \f$f_x = f_y * \texttt{aspectRatio}\f$ . + +The function estimates and returns an initial camera matrix for the camera calibration process. +Currently, the function only supports planar calibration patterns, which are patterns where each +object point has z-coordinate =0. + */ CV_EXPORTS_W Mat initCameraMatrix2D( InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, Size imageSize, double aspectRatio = 1.0 ); -//! finds checkerboard pattern of the specified size in the image +/** @brief Finds the positions of internal corners of the chessboard. + +@param image Source chessboard view. It must be an 8-bit grayscale or color image. +@param patternSize Number of inner corners per a chessboard row and column +( patternSize = cvSize(points_per_row,points_per_colum) = cvSize(columns,rows) ). +@param corners Output array of detected corners. +@param flags Various operation flags that can be zero or a combination of the following values: +- **CV_CALIB_CB_ADAPTIVE_THRESH** Use adaptive thresholding to convert the image to black +and white, rather than a fixed threshold level (computed from the average image brightness). +- **CV_CALIB_CB_NORMALIZE_IMAGE** Normalize the image gamma with equalizeHist before +applying fixed or adaptive thresholding. +- **CV_CALIB_CB_FILTER_QUADS** Use additional criteria (like contour area, perimeter, +square-like shape) to filter out false quads extracted at the contour retrieval stage. +- **CALIB_CB_FAST_CHECK** Run a fast check on the image that looks for chessboard corners, +and shortcut the call if none is found. This can drastically speed up the call in the +degenerate condition when no chessboard is observed. + +The function attempts to determine whether the input image is a view of the chessboard pattern and +locate the internal chessboard corners. The function returns a non-zero value if all of the corners +are found and they are placed in a certain order (row by row, left to right in every row). +Otherwise, if the function fails to find all the corners or reorder them, it returns 0. For example, +a regular chessboard has 8 x 8 squares and 7 x 7 internal corners, that is, points where the black +squares touch each other. The detected coordinates are approximate, and to determine their positions +more accurately, the function calls cornerSubPix. You also may use the function cornerSubPix with +different parameters if returned coordinates are not accurate enough. + +Sample usage of detecting and drawing chessboard corners: : +@code + Size patternsize(8,6); //interior number of corners + Mat gray = ....; //source image + vector corners; //this will be filled by the detected corners + + //CALIB_CB_FAST_CHECK saves a lot of time on images + //that do not contain any chessboard corners + bool patternfound = findChessboardCorners(gray, patternsize, corners, + CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE + + CALIB_CB_FAST_CHECK); + + if(patternfound) + cornerSubPix(gray, corners, Size(11, 11), Size(-1, -1), + TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 30, 0.1)); + + drawChessboardCorners(img, patternsize, Mat(corners), patternfound); +@endcode +@note The function requires white space (like a square-thick border, the wider the better) around +the board to make the detection more robust in various environments. Otherwise, if there is no +border and the background is dark, the outer black squares cannot be segmented properly and so the +square grouping and ordering algorithm fails. + */ CV_EXPORTS_W bool findChessboardCorners( InputArray image, Size patternSize, OutputArray corners, int flags = CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE ); //! finds subpixel-accurate positions of the chessboard corners CV_EXPORTS bool find4QuadCornerSubpix( InputArray img, InputOutputArray corners, Size region_size ); -//! draws the checkerboard pattern (found or partly found) in the image +/** @brief Renders the detected chessboard corners. + +@param image Destination image. It must be an 8-bit color image. +@param patternSize Number of inner corners per a chessboard row and column +(patternSize = cv::Size(points_per_row,points_per_column)). +@param corners Array of detected corners, the output of findChessboardCorners. +@param patternWasFound Parameter indicating whether the complete board was found or not. The +return value of findChessboardCorners should be passed here. + +The function draws individual chessboard corners detected either as red circles if the board was not +found, or as colored corners connected with lines if the board was found. + */ CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSize, InputArray corners, bool patternWasFound ); -//! finds circles' grid pattern of the specified size in the image +/** @brief Finds centers in the grid of circles. + +@param image grid view of input circles; it must be an 8-bit grayscale or color image. +@param patternSize number of circles per row and column +( patternSize = Size(points_per_row, points_per_colum) ). +@param centers output array of detected centers. +@param flags various operation flags that can be one of the following values: +- **CALIB_CB_SYMMETRIC_GRID** uses symmetric pattern of circles. +- **CALIB_CB_ASYMMETRIC_GRID** uses asymmetric pattern of circles. +- **CALIB_CB_CLUSTERING** uses a special algorithm for grid detection. It is more robust to +perspective distortions but much more sensitive to background clutter. +@param blobDetector feature detector that finds blobs like dark circles on light background. + +The function attempts to determine whether the input image contains a grid of circles. If it is, the +function locates centers of the circles. The function returns a non-zero value if all of the centers +have been found and they have been placed in a certain order (row by row, left to right in every +row). Otherwise, if the function fails to find all the corners or reorder them, it returns 0. + +Sample usage of detecting and drawing the centers of circles: : +@code + Size patternsize(7,7); //number of centers + Mat gray = ....; //source image + vector centers; //this will be filled by the detected centers + + bool patternfound = findCirclesGrid(gray, patternsize, centers); + + drawChessboardCorners(img, patternsize, Mat(centers), patternfound); +@endcode +@note The function requires white space (like a square-thick border, the wider the better) around +the board to make the detection more robust in various environments. + */ CV_EXPORTS_W bool findCirclesGrid( InputArray image, Size patternSize, OutputArray centers, int flags = CALIB_CB_SYMMETRIC_GRID, const Ptr &blobDetector = SimpleBlobDetector::create()); -//! finds intrinsic and extrinsic camera parameters from several fews of a known calibration pattern. +/** @brief Finds the camera intrinsic and extrinsic parameters from several views of a calibration pattern. + +@param objectPoints In the new interface it is a vector of vectors of calibration pattern points +in the calibration pattern coordinate space. The outer vector contains as many elements as the +number of the pattern views. If the same calibration pattern is shown in each view and it is fully +visible, all the vectors will be the same. Although, it is possible to use partially occluded +patterns, or even different patterns in different views. Then, the vectors will be different. The +points are 3D, but since they are in a pattern coordinate system, then, if the rig is planar, it +may make sense to put the model to a XY coordinate plane so that Z-coordinate of each input object +point is 0. +In the old interface all the vectors of object points from different views are concatenated +together. +@param imagePoints In the new interface it is a vector of vectors of the projections of +calibration pattern points. imagePoints.size() and objectPoints.size() and imagePoints[i].size() +must be equal to objectPoints[i].size() for each i. +In the old interface all the vectors of object points from different views are concatenated +together. +@param imageSize Size of the image used only to initialize the intrinsic camera matrix. +@param cameraMatrix Output 3x3 floating-point camera matrix +\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS +and/or CV_CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be +initialized before calling the function. +@param distCoeffs Output vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements. +@param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view. +That is, each k-th rotation vector together with the corresponding k-th translation vector (see +the next output parameter description) brings the calibration pattern from the model coordinate +space (in which object points are specified) to the world coordinate space, that is, a real +position of the calibration pattern in the k-th pattern view (k=0.. *M* -1). +@param tvecs Output vector of translation vectors estimated for each pattern view. +@param flags Different flags that may be zero or a combination of the following values: +- **CV_CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of +fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image +center ( imageSize is used), and focal distances are computed in a least-squares fashion. +Note, that if intrinsic parameters are known, there is no need to use this function just to +estimate extrinsic parameters. Use solvePnP instead. +- **CV_CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global +optimization. It stays at the center or at a different location specified when +CV_CALIB_USE_INTRINSIC_GUESS is set too. +- **CV_CALIB_FIX_ASPECT_RATIO** The functions considers only fy as a free parameter. The +ratio fx/fy stays the same as in the input cameraMatrix . When +CV_CALIB_USE_INTRINSIC_GUESS is not set, the actual input values of fx and fy are +ignored, only their ratio is computed and used further. +- **CV_CALIB_ZERO_TANGENT_DIST** Tangential distortion coefficients \f$(p_1, p_2)\f$ are set +to zeros and stay zero. +- **CV_CALIB_FIX_K1,...,CV_CALIB_FIX_K6** The corresponding radial distortion +coefficient is not changed during the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is +set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. +- **CV_CALIB_RATIONAL_MODEL** Coefficients k4, k5, and k6 are enabled. To provide the +backward compatibility, this extra flag should be explicitly specified to make the +calibration function use the rational model and return 8 coefficients. If the flag is not +set, the function computes and returns only 5 distortion coefficients. +- **CALIB_THIN_PRISM_MODEL** Coefficients s1, s2, s3 and s4 are enabled. To provide the +backward compatibility, this extra flag should be explicitly specified to make the +calibration function use the thin prism model and return 12 coefficients. If the flag is not +set, the function computes and returns only 5 distortion coefficients. +- **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during +the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the +supplied distCoeffs matrix is used. Otherwise, it is set to 0. +@param criteria Termination criteria for the iterative optimization algorithm. + +The function estimates the intrinsic camera parameters and extrinsic parameters for each of the +views. The algorithm is based on @cite Zhang2000 and @cite BouguetMCT . The coordinates of 3D object +points and their corresponding 2D projections in each view must be specified. That may be achieved +by using an object with a known geometry and easily detectable feature points. Such an object is +called a calibration rig or calibration pattern, and OpenCV has built-in support for a chessboard as +a calibration rig (see findChessboardCorners ). Currently, initialization of intrinsic parameters +(when CV_CALIB_USE_INTRINSIC_GUESS is not set) is only implemented for planar calibration +patterns (where Z-coordinates of the object points must be all zeros). 3D calibration rigs can also +be used as long as initial cameraMatrix is provided. + +The algorithm performs the following steps: + +- Compute the initial intrinsic parameters (the option only available for planar calibration + patterns) or read them from the input parameters. The distortion coefficients are all set to + zeros initially unless some of CV_CALIB_FIX_K? are specified. + +- Estimate the initial camera pose as if the intrinsic parameters have been already known. This is + done using solvePnP . + +- Run the global Levenberg-Marquardt optimization algorithm to minimize the reprojection error, + that is, the total sum of squared distances between the observed feature points imagePoints and + the projected (using the current estimates for camera parameters and the poses) object points + objectPoints. See projectPoints for details. + +The function returns the final re-projection error. + +@note + If you use a non-square (=non-NxN) grid and findChessboardCorners for calibration, and + calibrateCamera returns bad values (zero distortion coefficients, an image center very far from + (w/2-0.5,h/2-0.5), and/or large differences between \f$f_x\f$ and \f$f_y\f$ (ratios of 10:1 or more)), + then you have probably used patternSize=cvSize(rows,cols) instead of using + patternSize=cvSize(cols,rows) in findChessboardCorners . + +@sa + findChessboardCorners, solvePnP, initCameraMatrix2D, stereoCalibrate, undistort + */ CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs, @@ -195,14 +787,117 @@ CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints, int flags = 0, TermCriteria criteria = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) ); -//! computes several useful camera characteristics from the camera matrix, camera frame resolution and the physical sensor size. +/** @brief Computes useful camera characteristics from the camera matrix. + +@param cameraMatrix Input camera matrix that can be estimated by calibrateCamera or +stereoCalibrate . +@param imageSize Input image size in pixels. +@param apertureWidth Physical width in mm of the sensor. +@param apertureHeight Physical height in mm of the sensor. +@param fovx Output field of view in degrees along the horizontal sensor axis. +@param fovy Output field of view in degrees along the vertical sensor axis. +@param focalLength Focal length of the lens in mm. +@param principalPoint Principal point in mm. +@param aspectRatio \f$f_y/f_x\f$ + +The function computes various useful camera characteristics from the previously estimated camera +matrix. + +@note + Do keep in mind that the unity measure 'mm' stands for whatever unit of measure one chooses for + the chessboard pitch (it can thus be any value). + */ CV_EXPORTS_W void calibrationMatrixValues( InputArray cameraMatrix, Size imageSize, double apertureWidth, double apertureHeight, CV_OUT double& fovx, CV_OUT double& fovy, CV_OUT double& focalLength, CV_OUT Point2d& principalPoint, CV_OUT double& aspectRatio ); -//! finds intrinsic and extrinsic parameters of a stereo camera +/** @brief Calibrates the stereo camera. + +@param objectPoints Vector of vectors of the calibration pattern points. +@param imagePoints1 Vector of vectors of the projections of the calibration pattern points, +observed by the first camera. +@param imagePoints2 Vector of vectors of the projections of the calibration pattern points, +observed by the second camera. +@param cameraMatrix1 Input/output first camera matrix: +\f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If +any of CV_CALIB_USE_INTRINSIC_GUESS , CV_CALIB_FIX_ASPECT_RATIO , +CV_CALIB_FIX_INTRINSIC , or CV_CALIB_FIX_FOCAL_LENGTH are specified, some or all of the +matrix components must be initialized. See the flags description for details. +@param distCoeffs1 Input/output vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 ot 12 elements. The +output vector length depends on the flags. +@param cameraMatrix2 Input/output second camera matrix. The parameter is similar to cameraMatrix1 +@param distCoeffs2 Input/output lens distortion coefficients for the second camera. The parameter +is similar to distCoeffs1 . +@param imageSize Size of the image used only to initialize intrinsic camera matrix. +@param R Output rotation matrix between the 1st and the 2nd camera coordinate systems. +@param T Output translation vector between the coordinate systems of the cameras. +@param E Output essential matrix. +@param F Output fundamental matrix. +@param flags Different flags that may be zero or a combination of the following values: +- **CV_CALIB_FIX_INTRINSIC** Fix cameraMatrix? and distCoeffs? so that only R, T, E , and F +matrices are estimated. +- **CV_CALIB_USE_INTRINSIC_GUESS** Optimize some or all of the intrinsic parameters +according to the specified flags. Initial values are provided by the user. +- **CV_CALIB_FIX_PRINCIPAL_POINT** Fix the principal points during the optimization. +- **CV_CALIB_FIX_FOCAL_LENGTH** Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ . +- **CV_CALIB_FIX_ASPECT_RATIO** Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$ +. +- **CV_CALIB_SAME_FOCAL_LENGTH** Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ . +- **CV_CALIB_ZERO_TANGENT_DIST** Set tangential distortion coefficients for each camera to +zeros and fix there. +- **CV_CALIB_FIX_K1,...,CV_CALIB_FIX_K6** Do not change the corresponding radial +distortion coefficient during the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set, +the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. +- **CV_CALIB_RATIONAL_MODEL** Enable coefficients k4, k5, and k6. To provide the backward +compatibility, this extra flag should be explicitly specified to make the calibration +function use the rational model and return 8 coefficients. If the flag is not set, the +function computes and returns only 5 distortion coefficients. +- **CALIB_THIN_PRISM_MODEL** Coefficients s1, s2, s3 and s4 are enabled. To provide the +backward compatibility, this extra flag should be explicitly specified to make the +calibration function use the thin prism model and return 12 coefficients. If the flag is not +set, the function computes and returns only 5 distortion coefficients. +- **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during +the optimization. If CV_CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the +supplied distCoeffs matrix is used. Otherwise, it is set to 0. +@param criteria Termination criteria for the iterative optimization algorithm. + +The function estimates transformation between two cameras making a stereo pair. If you have a stereo +camera where the relative position and orientation of two cameras is fixed, and if you computed +poses of an object relative to the first camera and to the second camera, (R1, T1) and (R2, T2), +respectively (this can be done with solvePnP ), then those poses definitely relate to each other. +This means that, given ( \f$R_1\f$,\f$T_1\f$ ), it should be possible to compute ( \f$R_2\f$,\f$T_2\f$ ). You only +need to know the position and orientation of the second camera relative to the first camera. This is +what the described function does. It computes ( \f$R\f$,\f$T\f$ ) so that: + +\f[R_2=R*R_1 +T_2=R*T_1 + T,\f] + +Optionally, it computes the essential matrix E: + +\f[E= \vecthreethree{0}{-T_2}{T_1}{T_2}{0}{-T_0}{-T_1}{T_0}{0} *R\f] + +where \f$T_i\f$ are components of the translation vector \f$T\f$ : \f$T=[T_0, T_1, T_2]^T\f$ . And the function +can also compute the fundamental matrix F: + +\f[F = cameraMatrix2^{-T} E cameraMatrix1^{-1}\f] + +Besides the stereo-related information, the function can also perform a full calibration of each of +two cameras. However, due to the high dimensionality of the parameter space and noise in the input +data, the function can diverge from the correct solution. If the intrinsic parameters can be +estimated with high accuracy for each of the cameras individually (for example, using +calibrateCamera ), you are recommended to do so and then pass CV_CALIB_FIX_INTRINSIC flag to the +function along with the computed intrinsic parameters. Otherwise, if all the parameters are +estimated at once, it makes sense to restrict some parameters, for example, pass +CV_CALIB_SAME_FOCAL_LENGTH and CV_CALIB_ZERO_TANGENT_DIST flags, which is usually a +reasonable assumption. + +Similarly to calibrateCamera , the function minimizes the total re-projection error for all the +points in all the available views from both cameras. The function returns the final value of the +re-projection error. + */ CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2, InputOutputArray cameraMatrix1, InputOutputArray distCoeffs1, @@ -212,7 +907,85 @@ CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints, TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 1e-6) ); -//! computes the rectification transformation for a stereo camera from its intrinsic and extrinsic parameters +/** @brief Computes rectification transforms for each head of a calibrated stereo camera. + +@param cameraMatrix1 First camera matrix. +@param cameraMatrix2 Second camera matrix. +@param distCoeffs1 First camera distortion parameters. +@param distCoeffs2 Second camera distortion parameters. +@param imageSize Size of the image used for stereo calibration. +@param R Rotation matrix between the coordinate systems of the first and the second cameras. +@param T Translation vector between coordinate systems of the cameras. +@param R1 Output 3x3 rectification transform (rotation matrix) for the first camera. +@param R2 Output 3x3 rectification transform (rotation matrix) for the second camera. +@param P1 Output 3x4 projection matrix in the new (rectified) coordinate systems for the first +camera. +@param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second +camera. +@param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ). +@param flags Operation flags that may be zero or CV_CALIB_ZERO_DISPARITY . If the flag is set, +the function makes the principal points of each camera have the same pixel coordinates in the +rectified views. And if the flag is not set, the function may still shift the images in the +horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the +useful image area. +@param alpha Free scaling parameter. If it is -1 or absent, the function performs the default +scaling. Otherwise, the parameter should be between 0 and 1. alpha=0 means that the rectified +images are zoomed and shifted so that only valid pixels are visible (no black areas after +rectification). alpha=1 means that the rectified image is decimated and shifted so that all the +pixels from the original images from the cameras are retained in the rectified images (no source +image pixels are lost). Obviously, any intermediate value yields an intermediate result between +those two extreme cases. +@param newImageSize New image resolution after rectification. The same size should be passed to +initUndistortRectifyMap (see the stereo_calib.cpp sample in OpenCV samples directory). When (0,0) +is passed (default), it is set to the original imageSize . Setting it to larger value can help you +preserve details in the original image, especially when there is a big radial distortion. +@param validPixROI1 Optional output rectangles inside the rectified images where all the pixels +are valid. If alpha=0 , the ROIs cover the whole images. Otherwise, they are likely to be smaller +(see the picture below). +@param validPixROI2 Optional output rectangles inside the rectified images where all the pixels +are valid. If alpha=0 , the ROIs cover the whole images. Otherwise, they are likely to be smaller +(see the picture below). + +The function computes the rotation matrices for each camera that (virtually) make both camera image +planes the same plane. Consequently, this makes all the epipolar lines parallel and thus simplifies +the dense stereo correspondence problem. The function takes the matrices computed by stereoCalibrate +as input. As output, it provides two rotation matrices and also two projection matrices in the new +coordinates. The function distinguishes the following two cases: + +- **Horizontal stereo**: the first and the second camera views are shifted relative to each other + mainly along the x axis (with possible small vertical shift). In the rectified images, the + corresponding epipolar lines in the left and right cameras are horizontal and have the same + y-coordinate. P1 and P2 look like: + + \f[\texttt{P1} = \begin{bmatrix} f & 0 & cx_1 & 0 \\ 0 & f & cy & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix}\f] + + \f[\texttt{P2} = \begin{bmatrix} f & 0 & cx_2 & T_x*f \\ 0 & f & cy & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix} ,\f] + + where \f$T_x\f$ is a horizontal shift between the cameras and \f$cx_1=cx_2\f$ if + CV_CALIB_ZERO_DISPARITY is set. + +- **Vertical stereo**: the first and the second camera views are shifted relative to each other + mainly in vertical direction (and probably a bit in the horizontal direction too). The epipolar + lines in the rectified images are vertical and have the same x-coordinate. P1 and P2 look like: + + \f[\texttt{P1} = \begin{bmatrix} f & 0 & cx & 0 \\ 0 & f & cy_1 & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix}\f] + + \f[\texttt{P2} = \begin{bmatrix} f & 0 & cx & 0 \\ 0 & f & cy_2 & T_y*f \\ 0 & 0 & 1 & 0 \end{bmatrix} ,\f] + + where \f$T_y\f$ is a vertical shift between the cameras and \f$cy_1=cy_2\f$ if CALIB_ZERO_DISPARITY is + set. + +As you can see, the first three columns of P1 and P2 will effectively be the new "rectified" camera +matrices. The matrices, together with R1 and R2 , can then be passed to initUndistortRectifyMap to +initialize the rectification map for each camera. + +See below the screenshot from the stereo_calib.cpp sample. Some red horizontal lines pass through +the corresponding image regions. This means that the images are well rectified, which is what most +stereo correspondence algorithms rely on. The green rectangles are roi1 and roi2 . You see that +their interiors are all valid pixels. + +![image](pics/stereo_undistort.jpg) + */ CV_EXPORTS_W void stereoRectify( InputArray cameraMatrix1, InputArray distCoeffs1, InputArray cameraMatrix2, InputArray distCoeffs2, Size imageSize, InputArray R, InputArray T, @@ -222,7 +995,35 @@ CV_EXPORTS_W void stereoRectify( InputArray cameraMatrix1, InputArray distCoeffs double alpha = -1, Size newImageSize = Size(), CV_OUT Rect* validPixROI1 = 0, CV_OUT Rect* validPixROI2 = 0 ); -//! computes the rectification transformation for an uncalibrated stereo camera (zero distortion is assumed) +/** @brief Computes a rectification transform for an uncalibrated stereo camera. + +@param points1 Array of feature points in the first image. +@param points2 The corresponding points in the second image. The same formats as in +findFundamentalMat are supported. +@param F Input fundamental matrix. It can be computed from the same set of point pairs using +findFundamentalMat . +@param imgSize Size of the image. +@param H1 Output rectification homography matrix for the first image. +@param H2 Output rectification homography matrix for the second image. +@param threshold Optional threshold used to filter out the outliers. If the parameter is greater +than zero, all the point pairs that do not comply with the epipolar geometry (that is, the points +for which \f$|\texttt{points2[i]}^T*\texttt{F}*\texttt{points1[i]}|>\texttt{threshold}\f$ ) are +rejected prior to computing the homographies. Otherwise,all the points are considered inliers. + +The function computes the rectification transformations without knowing intrinsic parameters of the +cameras and their relative position in the space, which explains the suffix "uncalibrated". Another +related difference from stereoRectify is that the function outputs not the rectification +transformations in the object (3D) space, but the planar perspective transformations encoded by the +homography matrices H1 and H2 . The function implements the algorithm @cite Hartley99 . + +@note + While the algorithm does not need to know the intrinsic parameters of the cameras, it heavily + depends on the epipolar geometry. Therefore, if the camera lenses have a significant distortion, + it would be better to correct it before computing the fundamental matrix and calling this + function. For example, distortion coefficients can be estimated for each head of stereo camera + separately by using calibrateCamera . Then, the images can be corrected using undistort , or + just the point coordinates can be corrected with undistortPoints . + */ CV_EXPORTS_W bool stereoRectifyUncalibrated( InputArray points1, InputArray points2, InputArray F, Size imgSize, OutputArray H1, OutputArray H2, @@ -240,60 +1041,311 @@ CV_EXPORTS_W float rectify3Collinear( InputArray cameraMatrix1, InputArray distC OutputArray Q, double alpha, Size newImgSize, CV_OUT Rect* roi1, CV_OUT Rect* roi2, int flags ); -//! returns the optimal new camera matrix +/** @brief Returns the new camera matrix based on the free scaling parameter. + +@param cameraMatrix Input camera matrix. +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements. If +the vector is NULL/empty, the zero distortion coefficients are assumed. +@param imageSize Original image size. +@param alpha Free scaling parameter between 0 (when all the pixels in the undistorted image are +valid) and 1 (when all the source image pixels are retained in the undistorted image). See +stereoRectify for details. +@param newImgSize Image size after rectification. By default,it is set to imageSize . +@param validPixROI Optional output rectangle that outlines all-good-pixels region in the +undistorted image. See roi1, roi2 description in stereoRectify . +@param centerPrincipalPoint Optional flag that indicates whether in the new camera matrix the +principal point should be at the image center or not. By default, the principal point is chosen to +best fit a subset of the source image (determined by alpha) to the corrected image. +@return new_camera_matrix Output new camera matrix. + +The function computes and returns the optimal new camera matrix based on the free scaling parameter. +By varying this parameter, you may retrieve only sensible pixels alpha=0 , keep all the original +image pixels if there is valuable information in the corners alpha=1 , or get something in between. +When alpha\>0 , the undistortion result is likely to have some black pixels corresponding to +"virtual" pixels outside of the captured distorted image. The original camera matrix, distortion +coefficients, the computed new camera matrix, and newImageSize should be passed to +initUndistortRectifyMap to produce the maps for remap . + */ CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray distCoeffs, Size imageSize, double alpha, Size newImgSize = Size(), CV_OUT Rect* validPixROI = 0, bool centerPrincipalPoint = false); -//! converts point coordinates from normal pixel coordinates to homogeneous coordinates ((x,y)->(x,y,1)) +/** @brief Converts points from Euclidean to homogeneous space. + +@param src Input vector of N-dimensional points. +@param dst Output vector of N+1-dimensional points. + +The function converts points from Euclidean to homogeneous space by appending 1's to the tuple of +point coordinates. That is, each point (x1, x2, ..., xn) is converted to (x1, x2, ..., xn, 1). + */ CV_EXPORTS_W void convertPointsToHomogeneous( InputArray src, OutputArray dst ); -//! converts point coordinates from homogeneous to normal pixel coordinates ((x,y,z)->(x/z, y/z)) +/** @brief Converts points from homogeneous to Euclidean space. + +@param src Input vector of N-dimensional points. +@param dst Output vector of N-1-dimensional points. + +The function converts points homogeneous to Euclidean space using perspective projection. That is, +each point (x1, x2, ... x(n-1), xn) is converted to (x1/xn, x2/xn, ..., x(n-1)/xn). When xn=0, the +output point coordinates will be (0,0,0,...). + */ CV_EXPORTS_W void convertPointsFromHomogeneous( InputArray src, OutputArray dst ); -//! for backward compatibility +/** @brief Converts points to/from homogeneous coordinates. + +@param src Input array or vector of 2D, 3D, or 4D points. +@param dst Output vector of 2D, 3D, or 4D points. + +The function converts 2D or 3D points from/to homogeneous coordinates by calling either +convertPointsToHomogeneous or convertPointsFromHomogeneous. + +@note The function is obsolete. Use one of the previous two functions instead. + */ CV_EXPORTS void convertPointsHomogeneous( InputArray src, OutputArray dst ); -//! finds fundamental matrix from a set of corresponding 2D points +/** @brief Calculates a fundamental matrix from the corresponding points in two images. + +@param points1 Array of N points from the first image. The point coordinates should be +floating-point (single or double precision). +@param points2 Array of the second image points of the same size and format as points1 . +@param method Method for computing a fundamental matrix. +- **CV_FM_7POINT** for a 7-point algorithm. \f$N = 7\f$ +- **CV_FM_8POINT** for an 8-point algorithm. \f$N \ge 8\f$ +- **CV_FM_RANSAC** for the RANSAC algorithm. \f$N \ge 8\f$ +- **CV_FM_LMEDS** for the LMedS algorithm. \f$N \ge 8\f$ +@param param1 Parameter used for RANSAC. It is the maximum distance from a point to an epipolar +line in pixels, beyond which the point is considered an outlier and is not used for computing the +final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the +point localization, image resolution, and the image noise. +@param param2 Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level +of confidence (probability) that the estimated matrix is correct. +@param mask + +The epipolar geometry is described by the following equation: + +\f[[p_2; 1]^T F [p_1; 1] = 0\f] + +where \f$F\f$ is a fundamental matrix, \f$p_1\f$ and \f$p_2\f$ are corresponding points in the first and the +second images, respectively. + +The function calculates the fundamental matrix using one of four methods listed above and returns +the found fundamental matrix. Normally just one matrix is found. But in case of the 7-point +algorithm, the function may return up to 3 solutions ( \f$9 \times 3\f$ matrix that stores all 3 +matrices sequentially). + +The calculated fundamental matrix may be passed further to computeCorrespondEpilines that finds the +epipolar lines corresponding to the specified points. It can also be passed to +stereoRectifyUncalibrated to compute the rectification transformation. : +@code + // Example. Estimation of fundamental matrix using the RANSAC algorithm + int point_count = 100; + vector points1(point_count); + vector points2(point_count); + + // initialize the points here ... + for( int i = 0; i < point_count; i++ ) + { + points1[i] = ...; + points2[i] = ...; + } + + Mat fundamental_matrix = + findFundamentalMat(points1, points2, FM_RANSAC, 3, 0.99); +@endcode + */ CV_EXPORTS_W Mat findFundamentalMat( InputArray points1, InputArray points2, int method = FM_RANSAC, double param1 = 3., double param2 = 0.99, OutputArray mask = noArray() ); -//! variant of findFundamentalMat for backward compatibility +/** @overload */ CV_EXPORTS Mat findFundamentalMat( InputArray points1, InputArray points2, OutputArray mask, int method = FM_RANSAC, double param1 = 3., double param2 = 0.99 ); -//! finds essential matrix from a set of corresponding 2D points using five-point algorithm +/** @brief Calculates an essential matrix from the corresponding points in two images. + +@param points1 Array of N (N \>= 5) 2D points from the first image. The point coordinates should +be floating-point (single or double precision). +@param points2 Array of the second image points of the same size and format as points1 . +@param focal focal length of the camera. Note that this function assumes that points1 and points2 +are feature points from cameras with same focal length and principle point. +@param pp principle point of the camera. +@param method Method for computing a fundamental matrix. +- **RANSAC** for the RANSAC algorithm. +- **MEDS** for the LMedS algorithm. +@param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar +line in pixels, beyond which the point is considered an outlier and is not used for computing the +final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the +point localization, image resolution, and the image noise. +@param prob Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level of +confidence (probability) that the estimated matrix is correct. +@param mask Output array of N elements, every element of which is set to 0 for outliers and to 1 +for the other points. The array is computed only in the RANSAC and LMedS methods. + +This function estimates essential matrix based on the five-point algorithm solver in @cite Nister03 . +@cite SteweniusCFS is also a related. The epipolar geometry is described by the following equation: + +\f[[p_2; 1]^T K^T E K [p_1; 1] = 0 \\\f]\f[K = +\begin{bmatrix} +f & 0 & x_{pp} \\ +0 & f & y_{pp} \\ +0 & 0 & 1 +\end{bmatrix}\f] + +where \f$E\f$ is an essential matrix, \f$p_1\f$ and \f$p_2\f$ are corresponding points in the first and the +second images, respectively. The result of this function may be passed further to +decomposeEssentialMat or recoverPose to recover the relative pose between cameras. + */ CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2, double focal = 1.0, Point2d pp = Point2d(0, 0), int method = RANSAC, double prob = 0.999, double threshold = 1.0, OutputArray mask = noArray() ); -//! decompose essential matrix to possible rotation matrix and one translation vector +/** @brief Decompose an essential matrix to possible rotations and translation. + +@param E The input essential matrix. +@param R1 One possible rotation matrix. +@param R2 Another possible rotation matrix. +@param t One possible translation. + +This function decompose an essential matrix E using svd decomposition @cite HartleyZ00 . Generally 4 +possible poses exists for a given E. They are \f$[R_1, t]\f$, \f$[R_1, -t]\f$, \f$[R_2, t]\f$, \f$[R_2, -t]\f$. By +decomposing E, you can only get the direction of the translation, so the function returns unit t. + */ CV_EXPORTS_W void decomposeEssentialMat( InputArray E, OutputArray R1, OutputArray R2, OutputArray t ); -//! recover relative camera pose from a set of corresponding 2D points +/** @brief Recover relative camera rotation and translation from an estimated essential matrix and the +corresponding points in two images, using cheirality check. Returns the number of inliers which pass +the check. + +@param E The input essential matrix. +@param points1 Array of N 2D points from the first image. The point coordinates should be +floating-point (single or double precision). +@param points2 Array of the second image points of the same size and format as points1 . +@param R Recovered relative rotation. +@param t Recoverd relative translation. +@param focal Focal length of the camera. Note that this function assumes that points1 and points2 +are feature points from cameras with same focal length and principle point. +@param pp Principle point of the camera. +@param mask Input/output mask for inliers in points1 and points2. +: If it is not empty, then it marks inliers in points1 and points2 for then given essential +matrix E. Only these inliers will be used to recover pose. In the output mask only inliers +which pass the cheirality check. +This function decomposes an essential matrix using decomposeEssentialMat and then verifies possible +pose hypotheses by doing cheirality check. The cheirality check basically means that the +triangulated 3D points should have positive depth. Some details can be found in @cite Nister03 . + +This function can be used to process output E and mask from findEssentialMat. In this scenario, +points1 and points2 are the same input for findEssentialMat. : +@code + // Example. Estimation of fundamental matrix using the RANSAC algorithm + int point_count = 100; + vector points1(point_count); + vector points2(point_count); + + // initialize the points here ... + for( int i = 0; i < point_count; i++ ) + { + points1[i] = ...; + points2[i] = ...; + } + + double focal = 1.0; + cv::Point2d pp(0.0, 0.0); + Mat E, R, t, mask; + + E = findEssentialMat(points1, points2, focal, pp, RANSAC, 0.999, 1.0, mask); + recoverPose(E, points1, points2, R, t, focal, pp, mask); +@endcode + */ CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray points2, OutputArray R, OutputArray t, double focal = 1.0, Point2d pp = Point2d(0, 0), InputOutputArray mask = noArray() ); -//! finds coordinates of epipolar lines corresponding the specified points +/** @brief For points in an image of a stereo pair, computes the corresponding epilines in the other image. + +@param points Input points. \f$N \times 1\f$ or \f$1 \times N\f$ matrix of type CV_32FC2 or +vector\ . +@param whichImage Index of the image (1 or 2) that contains the points . +@param F Fundamental matrix that can be estimated using findFundamentalMat or stereoRectify . +@param lines Output vector of the epipolar lines corresponding to the points in the other image. +Each line \f$ax + by + c=0\f$ is encoded by 3 numbers \f$(a, b, c)\f$ . + +For every point in one of the two images of a stereo pair, the function finds the equation of the +corresponding epipolar line in the other image. + +From the fundamental matrix definition (see findFundamentalMat ), line \f$l^{(2)}_i\f$ in the second +image for the point \f$p^{(1)}_i\f$ in the first image (when whichImage=1 ) is computed as: + +\f[l^{(2)}_i = F p^{(1)}_i\f] + +And vice versa, when whichImage=2, \f$l^{(1)}_i\f$ is computed from \f$p^{(2)}_i\f$ as: + +\f[l^{(1)}_i = F^T p^{(2)}_i\f] + +Line coefficients are defined up to a scale. They are normalized so that \f$a_i^2+b_i^2=1\f$ . + */ CV_EXPORTS_W void computeCorrespondEpilines( InputArray points, int whichImage, InputArray F, OutputArray lines ); +/** @brief Reconstructs points by triangulation. + +@param projMatr1 3x4 projection matrix of the first camera. +@param projMatr2 3x4 projection matrix of the second camera. +@param projPoints1 2xN array of feature points in the first image. In case of c++ version it can +be also a vector of feature points or two-channel matrix of size 1xN or Nx1. +@param projPoints2 2xN array of corresponding points in the second image. In case of c++ version +it can be also a vector of feature points or two-channel matrix of size 1xN or Nx1. +@param points4D 4xN array of reconstructed points in homogeneous coordinates. + +The function reconstructs 3-dimensional points (in homogeneous coordinates) by using their +observations with a stereo camera. Projections matrices can be obtained from stereoRectify. + +@note + Keep in mind that all input data should be of float type in order for this function to work. + +@sa + reprojectImageTo3D + */ CV_EXPORTS_W void triangulatePoints( InputArray projMatr1, InputArray projMatr2, InputArray projPoints1, InputArray projPoints2, OutputArray points4D ); +/** @brief Refines coordinates of corresponding points. + +@param F 3x3 fundamental matrix. +@param points1 1xN array containing the first set of points. +@param points2 1xN array containing the second set of points. +@param newPoints1 The optimized points1. +@param newPoints2 The optimized points2. + +The function implements the Optimal Triangulation Method (see Multiple View Geometry for details). +For each given point correspondence points1[i] \<-\> points2[i], and a fundamental matrix F, it +computes the corrected correspondences newPoints1[i] \<-\> newPoints2[i] that minimize the geometric +error \f$d(points1[i], newPoints1[i])^2 + d(points2[i],newPoints2[i])^2\f$ (where \f$d(a,b)\f$ is the +geometric distance between points \f$a\f$ and \f$b\f$ ) subject to the epipolar constraint +\f$newPoints2^T * F * newPoints1 = 0\f$ . + */ CV_EXPORTS_W void correctMatches( InputArray F, InputArray points1, InputArray points2, OutputArray newPoints1, OutputArray newPoints2 ); -//! filters off speckles (small regions of incorrectly computed disparity) +/** @brief Filters off small noise blobs (speckles) in the disparity map + +@param img The input 16-bit signed disparity image +@param newVal The disparity value used to paint-off the speckles +@param maxSpeckleSize The maximum speckle size to consider it a speckle. Larger blobs are not +affected by the algorithm +@param maxDiff Maximum difference between neighbor disparity pixels to put them into the same +blob. Note that since StereoBM, StereoSGBM and may be other algorithms return a fixed-point +disparity map, where disparity values are multiplied by 16, this scale factor should be taken into +account when specifying this parameter value. +@param buf The optional temporary buffer to avoid memory allocation within the function. + */ CV_EXPORTS_W void filterSpeckles( InputOutputArray img, double newVal, int maxSpeckleSize, double maxDiff, InputOutputArray buf = noArray() ); @@ -308,23 +1360,77 @@ CV_EXPORTS_W void validateDisparity( InputOutputArray disparity, InputArray cost int minDisparity, int numberOfDisparities, int disp12MaxDisp = 1 ); -//! reprojects disparity image to 3D: (x,y,d)->(X,Y,Z) using the matrix Q returned by cv::stereoRectify +/** @brief Reprojects a disparity image to 3D space. + +@param disparity Input single-channel 8-bit unsigned, 16-bit signed, 32-bit signed or 32-bit +floating-point disparity image. +@param _3dImage Output 3-channel floating-point image of the same size as disparity . Each +element of _3dImage(x,y) contains 3D coordinates of the point (x,y) computed from the disparity +map. +@param Q \f$4 \times 4\f$ perspective transformation matrix that can be obtained with stereoRectify. +@param handleMissingValues Indicates, whether the function should handle missing values (i.e. +points where the disparity was not computed). If handleMissingValues=true, then pixels with the +minimal disparity that corresponds to the outliers (see StereoMatcher::compute ) are transformed +to 3D points with a very large Z value (currently set to 10000). +@param ddepth The optional output array depth. If it is -1, the output image will have CV_32F +depth. ddepth can also be set to CV_16S, CV_32S or CV_32F. + +The function transforms a single-channel disparity map to a 3-channel image representing a 3D +surface. That is, for each pixel (x,y) andthe corresponding disparity d=disparity(x,y) , it +computes: + +\f[\begin{array}{l} [X \; Y \; Z \; W]^T = \texttt{Q} *[x \; y \; \texttt{disparity} (x,y) \; 1]^T \\ \texttt{\_3dImage} (x,y) = (X/W, \; Y/W, \; Z/W) \end{array}\f] + +The matrix Q can be an arbitrary \f$4 \times 4\f$ matrix (for example, the one computed by +stereoRectify). To reproject a sparse set of points {(x,y,d),...} to 3D space, use +perspectiveTransform . + */ CV_EXPORTS_W void reprojectImageTo3D( InputArray disparity, OutputArray _3dImage, InputArray Q, bool handleMissingValues = false, int ddepth = -1 ); +/** @brief Computes an optimal affine transformation between two 3D point sets. + +@param src First input 3D point set. +@param dst Second input 3D point set. +@param out Output 3D affine transformation matrix \f$3 \times 4\f$ . +@param inliers Output vector indicating which points are inliers. +@param ransacThreshold Maximum reprojection error in the RANSAC algorithm to consider a point as +an inlier. +@param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything +between 0.95 and 0.99 is usually good enough. Values too close to 1 can slow down the estimation +significantly. Values lower than 0.8-0.9 can result in an incorrectly estimated transformation. + +The function estimates an optimal 3D affine transformation between two 3D point sets using the +RANSAC algorithm. + */ CV_EXPORTS_W int estimateAffine3D(InputArray src, InputArray dst, OutputArray out, OutputArray inliers, double ransacThreshold = 3, double confidence = 0.99); +/** @brief Decompose a homography matrix to rotation(s), translation(s) and plane normal(s). + +@param H The input homography matrix between two images. +@param K The input intrinsic camera calibration matrix. +@param rotations Array of rotation matrices. +@param translations Array of translation matrices. +@param normals Array of plane normal matrices. +This function extracts relative camera motion between two views observing a planar object from the +homography H induced by the plane. The intrinsic camera matrix K must also be provided. The function +may return up to four mathematical solution sets. At least two of the solutions may further be +invalidated if point correspondences are available by applying positive depth constraint (all points +must be in front of the camera). The decomposition method is described in detail in @cite Malis . + */ CV_EXPORTS_W int decomposeHomographyMat(InputArray H, InputArray K, OutputArrayOfArrays rotations, OutputArrayOfArrays translations, OutputArrayOfArrays normals); +/** @brief The base class for stereo correspondence algorithms. + */ class CV_EXPORTS_W StereoMatcher : public Algorithm { public: @@ -332,6 +1438,14 @@ public: DISP_SCALE = (1 << DISP_SHIFT) }; + /** @brief Computes disparity map for the specified stereo pair + + @param left Left 8-bit single-channel image. + @param right Right image of the same size and the same type as the left one. + @param disparity Output disparity map. It has the same size as the input images. Some algorithms, + like StereoBM or StereoSGBM compute 16-bit fixed-point disparity map (where each disparity value + has 4 fractional bits), whereas other algorithms output 32-bit floating-point disparity map. + */ CV_WRAP virtual void compute( InputArray left, InputArray right, OutputArray disparity ) = 0; @@ -355,7 +1469,9 @@ public: }; - +/** @brief Class for computing stereo correspondence using the block matching algorithm, introduced and +contributed to OpenCV by K. Konolige. + */ class CV_EXPORTS_W StereoBM : public StereoMatcher { public: @@ -387,10 +1503,40 @@ public: CV_WRAP virtual Rect getROI2() const = 0; CV_WRAP virtual void setROI2(Rect roi2) = 0; + /** @brief Creates StereoBM object + + @param numDisparities the disparity search range. For each pixel algorithm will find the best + disparity from 0 (default minimum disparity) to numDisparities. The search range can then be + shifted by changing the minimum disparity. + @param blockSize the linear size of the blocks compared by the algorithm. The size should be odd + (as the block is centered at the current pixel). Larger block size implies smoother, though less + accurate disparity map. Smaller block size gives more detailed disparity map, but there is higher + chance for algorithm to find a wrong correspondence. + + The function create StereoBM object. You can then call StereoBM::compute() to compute disparity for + a specific stereo pair. + */ CV_WRAP static Ptr create(int numDisparities = 0, int blockSize = 21); }; - +/** @brief The class implements the modified H. Hirschmuller algorithm @cite HH08 that differs from the original +one as follows: + +- By default, the algorithm is single-pass, which means that you consider only 5 directions +instead of 8. Set mode=StereoSGBM::MODE_HH in createStereoSGBM to run the full variant of the +algorithm but beware that it may consume a lot of memory. +- The algorithm matches blocks, not individual pixels. Though, setting blockSize=1 reduces the +blocks to single pixels. +- Mutual information cost function is not implemented. Instead, a simpler Birchfield-Tomasi +sub-pixel metric from @cite BT98 is used. Though, the color images are supported as well. +- Some pre- and post- processing steps from K. Konolige algorithm StereoBM are included, for +example: pre-filtering (StereoBM::PREFILTER_XSOBEL type) and post-filtering (uniqueness +check, quadratic interpolation and speckle filtering). + +@note + - (Python) An example illustrating the use of the StereoSGBM matching algorithm can be found + at opencv_source_code/samples/python2/stereo_match.py + */ class CV_EXPORTS_W StereoSGBM : public StereoMatcher { public: @@ -415,6 +1561,43 @@ public: CV_WRAP virtual int getMode() const = 0; CV_WRAP virtual void setMode(int mode) = 0; + /** @brief Creates StereoSGBM object + + @param minDisparity Minimum possible disparity value. Normally, it is zero but sometimes + rectification algorithms can shift images, so this parameter needs to be adjusted accordingly. + @param numDisparities Maximum disparity minus minimum disparity. The value is always greater than + zero. In the current implementation, this parameter must be divisible by 16. + @param blockSize Matched block size. It must be an odd number \>=1 . Normally, it should be + somewhere in the 3..11 range. + @param P1 The first parameter controlling the disparity smoothness. See below. + @param P2 The second parameter controlling the disparity smoothness. The larger the values are, + the smoother the disparity is. P1 is the penalty on the disparity change by plus or minus 1 + between neighbor pixels. P2 is the penalty on the disparity change by more than 1 between neighbor + pixels. The algorithm requires P2 \> P1 . See stereo_match.cpp sample where some reasonably good + P1 and P2 values are shown (like 8\*number_of_image_channels\*SADWindowSize\*SADWindowSize and + 32\*number_of_image_channels\*SADWindowSize\*SADWindowSize , respectively). + @param disp12MaxDiff Maximum allowed difference (in integer pixel units) in the left-right + disparity check. Set it to a non-positive value to disable the check. + @param preFilterCap Truncation value for the prefiltered image pixels. The algorithm first + computes x-derivative at each pixel and clips its value by [-preFilterCap, preFilterCap] interval. + The result values are passed to the Birchfield-Tomasi pixel cost function. + @param uniquenessRatio Margin in percentage by which the best (minimum) computed cost function + value should "win" the second best value to consider the found match correct. Normally, a value + within the 5-15 range is good enough. + @param speckleWindowSize Maximum size of smooth disparity regions to consider their noise speckles + and invalidate. Set it to 0 to disable speckle filtering. Otherwise, set it somewhere in the + 50-200 range. + @param speckleRange Maximum disparity variation within each connected component. If you do speckle + filtering, set the parameter to a positive value, it will be implicitly multiplied by 16. + Normally, 1 or 2 is good enough. + @param mode Set it to StereoSGBM::MODE_HH to run the full-scale two-pass dynamic programming + algorithm. It will consume O(W\*H\*numDisparities) bytes, which is large for 640x480 stereo and + huge for HD-size pictures. By default, it is set to false . + + The first constructor initializes StereoSGBM with all the default parameters. So, you only have to + set StereoSGBM::numDisparities at minimum. The second constructor enables you to set each parameter + to a custom value. + */ CV_WRAP static Ptr create(int minDisparity, int numDisparities, int blockSize, int P1 = 0, int P2 = 0, int disp12MaxDiff = 0, int preFilterCap = 0, int uniquenessRatio = 0, @@ -422,8 +1605,16 @@ public: int mode = StereoSGBM::MODE_SGBM); }; +//! @} calib3d + +/** @brief The methods in this namespace use a so-called fisheye camera model. + @ingroup calib3d_fisheye +*/ namespace fisheye { +//! @addtogroup calib3d_fisheye +//! @{ + enum{ CALIB_USE_INTRINSIC_GUESS = 1, CALIB_RECOMPUTE_EXTRINSIC = 2, @@ -436,50 +1627,229 @@ namespace fisheye CALIB_FIX_INTRINSIC = 256 }; - //! projects 3D points using fisheye model + /** @brief Projects points using fisheye model + + @param objectPoints Array of object points, 1xN/Nx1 3-channel (or vector\ ), where N is + the number of points in the view. + @param imagePoints Output array of image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, or + vector\. + @param affine + @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param alpha The skew coefficient. + @param jacobian Optional output 2Nx15 jacobian matrix of derivatives of image points with respect + to components of the focal lengths, coordinates of the principal point, distortion coefficients, + rotation vector, translation vector, and the skew. In the old interface different components of + the jacobian are returned via different output parameters. + + The function computes projections of 3D points to the image plane given intrinsic and extrinsic + camera parameters. Optionally, the function computes Jacobians - matrices of partial derivatives of + image points coordinates (as functions of all the input parameters) with respect to the particular + parameters, intrinsic and/or extrinsic. + */ CV_EXPORTS void projectPoints(InputArray objectPoints, OutputArray imagePoints, const Affine3d& affine, InputArray K, InputArray D, double alpha = 0, OutputArray jacobian = noArray()); - //! projects points using fisheye model + /** @overload */ CV_EXPORTS void projectPoints(InputArray objectPoints, OutputArray imagePoints, InputArray rvec, InputArray tvec, InputArray K, InputArray D, double alpha = 0, OutputArray jacobian = noArray()); - //! distorts 2D points using fisheye model + /** @brief Distorts 2D points using fisheye model. + + @param undistorted Array of object points, 1xN/Nx1 2-channel (or vector\ ), where N is + the number of points in the view. + @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param alpha The skew coefficient. + @param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\ . + */ CV_EXPORTS void distortPoints(InputArray undistorted, OutputArray distorted, InputArray K, InputArray D, double alpha = 0); - //! undistorts 2D points using fisheye model + /** @brief Undistorts 2D points using fisheye model + + @param distorted Array of object points, 1xN/Nx1 2-channel (or vector\ ), where N is the + number of points in the view. + @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3 + 1-channel or 1x1 3-channel + @param P New camera matrix (3x3) or new projection matrix (3x4) + @param undistorted Output array of image points, 1xN/Nx1 2-channel, or vector\ . + */ CV_EXPORTS void undistortPoints(InputArray distorted, OutputArray undistorted, InputArray K, InputArray D, InputArray R = noArray(), InputArray P = noArray()); - //! computing undistortion and rectification maps for image transform by cv::remap() - //! If D is empty zero distortion is used, if R or P is empty identity matrixes are used + /** @brief Computes undistortion and rectification maps for image transform by cv::remap(). If D is empty zero + distortion is used, if R or P is empty identity matrixes are used. + + @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3 + 1-channel or 1x1 3-channel + @param P New camera matrix (3x3) or new projection matrix (3x4) + @param size Undistorted image size. + @param m1type Type of the first output map that can be CV_32FC1 or CV_16SC2 . See convertMaps() + for details. + @param map1 The first output map. + @param map2 The second output map. + */ CV_EXPORTS void initUndistortRectifyMap(InputArray K, InputArray D, InputArray R, InputArray P, const cv::Size& size, int m1type, OutputArray map1, OutputArray map2); - //! undistorts image, optionally changes resolution and camera matrix. If Knew zero identity matrix is used + /** @brief Transforms an image to compensate for fisheye lens distortion. + + @param distorted image with fisheye lens distortion. + @param undistorted Output image with compensated fisheye lens distortion. + @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param Knew Camera matrix of the distorted image. By default, it is the identity matrix but you + may additionally scale and shift the result by using a different matrix. + @param new_size + + The function transforms an image to compensate radial and tangential lens distortion. + + The function is simply a combination of fisheye::initUndistortRectifyMap (with unity R ) and remap + (with bilinear interpolation). See the former function for details of the transformation being + performed. + + See below the results of undistortImage. + - a\) result of undistort of perspective camera model (all possible coefficients (k_1, k_2, k_3, + k_4, k_5, k_6) of distortion were optimized under calibration) + - b\) result of fisheye::undistortImage of fisheye camera model (all possible coefficients (k_1, k_2, + k_3, k_4) of fisheye distortion were optimized under calibration) + - c\) original image was captured with fisheye lens + + Pictures a) and b) almost the same. But if we consider points of image located far from the center + of image, we can notice that on image a) these points are distorted. + + ![image](pics/fisheye_undistorted.jpg) + */ CV_EXPORTS void undistortImage(InputArray distorted, OutputArray undistorted, InputArray K, InputArray D, InputArray Knew = cv::noArray(), const Size& new_size = Size()); - //! estimates new camera matrix for undistortion or rectification + /** @brief Estimates new camera matrix for undistortion or rectification. + + @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param image_size + @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3 + 1-channel or 1x1 3-channel + @param P New camera matrix (3x3) or new projection matrix (3x4) + @param balance Sets the new focal length in range between the min focal length and the max focal + length. Balance is in range of [0, 1]. + @param new_size + @param fov_scale Divisor for new focal length. + */ CV_EXPORTS void estimateNewCameraMatrixForUndistortRectify(InputArray K, InputArray D, const Size &image_size, InputArray R, OutputArray P, double balance = 0.0, const Size& new_size = Size(), double fov_scale = 1.0); - //! performs camera calibaration + /** @brief Performs camera calibaration + + @param objectPoints vector of vectors of calibration pattern points in the calibration pattern + coordinate space. + @param imagePoints vector of vectors of the projections of calibration pattern points. + imagePoints.size() and objectPoints.size() and imagePoints[i].size() must be equal to + objectPoints[i].size() for each i. + @param image_size Size of the image used only to initialize the intrinsic camera matrix. + @param K Output 3x3 floating-point camera matrix + \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If + fisheye::CALIB_USE_INTRINSIC_GUESS/ is specified, some or all of fx, fy, cx, cy must be + initialized before calling the function. + @param D Output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view. + That is, each k-th rotation vector together with the corresponding k-th translation vector (see + the next output parameter description) brings the calibration pattern from the model coordinate + space (in which object points are specified) to the world coordinate space, that is, a real + position of the calibration pattern in the k-th pattern view (k=0.. *M* -1). + @param tvecs Output vector of translation vectors estimated for each pattern view. + @param flags Different flags that may be zero or a combination of the following values: + - **fisheye::CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of + fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image + center ( imageSize is used), and focal distances are computed in a least-squares fashion. + - **fisheye::CALIB_RECOMPUTE_EXTRINSIC** Extrinsic will be recomputed after each iteration + of intrinsic optimization. + - **fisheye::CALIB_CHECK_COND** The functions will check validity of condition number. + - **fisheye::CALIB_FIX_SKEW** Skew coefficient (alpha) is set to zero and stay zero. + - **fisheye::CALIB_FIX_K1..4** Selected distortion coefficients are set to zeros and stay + zero. + @param criteria Termination criteria for the iterative optimization algorithm. + */ CV_EXPORTS double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, const Size& image_size, InputOutputArray K, InputOutputArray D, OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, int flags = 0, TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 100, DBL_EPSILON)); - //! stereo rectification estimation + /** @brief Stereo rectification for fisheye camera model + + @param K1 First camera matrix. + @param D1 First camera distortion parameters. + @param K2 Second camera matrix. + @param D2 Second camera distortion parameters. + @param imageSize Size of the image used for stereo calibration. + @param R Rotation matrix between the coordinate systems of the first and the second + cameras. + @param tvec Translation vector between coordinate systems of the cameras. + @param R1 Output 3x3 rectification transform (rotation matrix) for the first camera. + @param R2 Output 3x3 rectification transform (rotation matrix) for the second camera. + @param P1 Output 3x4 projection matrix in the new (rectified) coordinate systems for the first + camera. + @param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second + camera. + @param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ). + @param flags Operation flags that may be zero or CV_CALIB_ZERO_DISPARITY . If the flag is set, + the function makes the principal points of each camera have the same pixel coordinates in the + rectified views. And if the flag is not set, the function may still shift the images in the + horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the + useful image area. + @param newImageSize New image resolution after rectification. The same size should be passed to + initUndistortRectifyMap (see the stereo_calib.cpp sample in OpenCV samples directory). When (0,0) + is passed (default), it is set to the original imageSize . Setting it to larger value can help you + preserve details in the original image, especially when there is a big radial distortion. + @param balance Sets the new focal length in range between the min focal length and the max focal + length. Balance is in range of [0, 1]. + @param fov_scale Divisor for new focal length. + */ CV_EXPORTS void stereoRectify(InputArray K1, InputArray D1, InputArray K2, InputArray D2, const Size &imageSize, InputArray R, InputArray tvec, OutputArray R1, OutputArray R2, OutputArray P1, OutputArray P2, OutputArray Q, int flags, const Size &newImageSize = Size(), double balance = 0.0, double fov_scale = 1.0); - //! performs stereo calibaration + /** @brief Performs stereo calibration + + @param objectPoints Vector of vectors of the calibration pattern points. + @param imagePoints1 Vector of vectors of the projections of the calibration pattern points, + observed by the first camera. + @param imagePoints2 Vector of vectors of the projections of the calibration pattern points, + observed by the second camera. + @param K1 Input/output first camera matrix: + \f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If + any of fisheye::CALIB_USE_INTRINSIC_GUESS , fisheye::CV_CALIB_FIX_INTRINSIC are specified, + some or all of the matrix components must be initialized. + @param D1 Input/output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$ of 4 elements. + @param K2 Input/output second camera matrix. The parameter is similar to K1 . + @param D2 Input/output lens distortion coefficients for the second camera. The parameter is + similar to D1 . + @param imageSize Size of the image used only to initialize intrinsic camera matrix. + @param R Output rotation matrix between the 1st and the 2nd camera coordinate systems. + @param T Output translation vector between the coordinate systems of the cameras. + @param flags Different flags that may be zero or a combination of the following values: + - **fisheye::CV_CALIB_FIX_INTRINSIC** Fix K1, K2? and D1, D2? so that only R, T matrices + are estimated. + - **fisheye::CALIB_USE_INTRINSIC_GUESS** K1, K2 contains valid initial values of + fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image + center (imageSize is used), and focal distances are computed in a least-squares fashion. + - **fisheye::CALIB_RECOMPUTE_EXTRINSIC** Extrinsic will be recomputed after each iteration + of intrinsic optimization. + - **fisheye::CALIB_CHECK_COND** The functions will check validity of condition number. + - **fisheye::CALIB_FIX_SKEW** Skew coefficient (alpha) is set to zero and stay zero. + - **fisheye::CALIB_FIX_K1..4** Selected distortion coefficients are set to zeros and stay + zero. + @param criteria Termination criteria for the iterative optimization algorithm. + */ CV_EXPORTS double stereoCalibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2, InputOutputArray K1, InputOutputArray D1, InputOutputArray K2, InputOutputArray D2, Size imageSize, OutputArray R, OutputArray T, int flags = CALIB_FIX_INTRINSIC, TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 100, DBL_EPSILON)); +//! @} calib3d_fisheye } } // cv diff --git a/modules/calib3d/include/opencv2/calib3d/calib3d_c.h b/modules/calib3d/include/opencv2/calib3d/calib3d_c.h index c99c25a4d9..2392692389 100644 --- a/modules/calib3d/include/opencv2/calib3d/calib3d_c.h +++ b/modules/calib3d/include/opencv2/calib3d/calib3d_c.h @@ -50,6 +50,10 @@ extern "C" { #endif +/** @addtogroup calib3d_c + @{ + */ + /****************************************************************************************\ * Camera Calibration, Pose Estimation and Stereo * \****************************************************************************************/ @@ -371,6 +375,8 @@ CVAPI(void) cvReprojectImageTo3D( const CvArr* disparityImage, CvArr* _3dImage, const CvMat* Q, int handleMissingValues CV_DEFAULT(0) ); +/** @} calib3d_c */ + #ifdef __cplusplus } // extern "C" diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 2b5ad7ffe3..a9011d0b33 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -75,6 +75,9 @@ @defgroup core_opengl OpenGL interoperability @defgroup core_ipp Intel IPP Asynchronous C/C++ Converters @defgroup core_optim Optimization Algorithms + @defgroup core_directx DirectX interoperability + @defgroup core_eigen Eigen support + @defgroup core_opencl OpenCL support @} */ diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp index 612b5dbd1d..15d526e802 100644 --- a/modules/core/include/opencv2/core/cuda.hpp +++ b/modules/core/include/opencv2/core/cuda.hpp @@ -51,13 +51,6 @@ #include "opencv2/core.hpp" #include "opencv2/core/cuda_types.hpp" -/** -@defgroup cuda CUDA-accelerated Computer Vision -@{ - @defgroup cuda_struct Data structures -@} - */ - namespace cv { namespace cuda { //! @addtogroup cuda_struct @@ -65,8 +58,28 @@ namespace cv { namespace cuda { //////////////////////////////// GpuMat /////////////////////////////// -//! Smart pointer for GPU memory with reference counting. -//! Its interface is mostly similar with cv::Mat. +/** @brief Base storage class for GPU memory with reference counting. + +Its interface matches the Mat interface with the following limitations: + +- no arbitrary dimensions support (only 2D) +- no functions that return references to their data (because references on GPU are not valid for + CPU) +- no expression templates technique support + +Beware that the latter limitation may lead to overloaded matrix operators that cause memory +allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be +passed directly to the kernel. + +@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are +aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix. + +@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely +on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory +release function returns error if the CUDA context has been destroyed before. + +@sa Mat + */ class CV_EXPORTS GpuMat { public: @@ -277,11 +290,28 @@ public: Allocator* allocator; }; -//! creates continuous matrix +/** @brief Creates a continuous matrix. + +@param rows Row count. +@param cols Column count. +@param type Type of the matrix. +@param arr Destination matrix. This parameter changes only if it has a proper type and area ( +\f$\texttt{rows} \times \texttt{cols}\f$ ). + +Matrix is called continuous if its elements are stored continuously, that is, without gaps at the +end of each row. + */ CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr); -//! ensures that size of the given matrix is not less than (rows, cols) size -//! and matrix type is match specified one too +/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type. + +@param rows Minimum desired number of rows. +@param cols Minimum desired number of columns. +@param type Desired matrix type. +@param arr Destination matrix. + +The function does not reallocate memory if the matrix has proper attributes already. + */ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr); CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat); @@ -292,10 +322,21 @@ CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCou //////////////////////////////// CudaMem //////////////////////////////// -//! CudaMem is limited cv::Mat with page locked memory allocation. -//! Page locked memory is only needed for async and faster coping to GPU. -//! It is convertable to cv::Mat header without reference counting -//! so you can use it with other opencv functions. +/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA. + +Its interface is also Mat-like but with additional memory type parameters. + +- **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous + uploading/downloading data from/to GPU. +- **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU + address space, if supported. +- **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are + used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache + utilization. + +@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2 +Pinned Memory APIs* document or *CUDA C Programming Guide*. + */ class CV_EXPORTS CudaMem { public: @@ -335,7 +376,13 @@ public: //! returns matrix header with disabled reference counting for CudaMem data. Mat createMatHeader() const; - //! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware. + /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting + for it. + + This can be done only if memory was allocated with the SHARED flag and if it is supported by the + hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which + eliminates an extra copy. + */ GpuMat createGpuMatHeader() const; // Please see cv::Mat for descriptions @@ -363,17 +410,28 @@ public: AllocType alloc_type; }; -//! page-locks the matrix m memory and maps it for the device(s) +/** @brief Page-locks the memory of matrix and maps it for the device(s). + +@param m Input matrix. + */ CV_EXPORTS void registerPageLocked(Mat& m); -//! unmaps the memory of matrix m, and makes it pageable again +/** @brief Unmaps the memory of matrix and makes it pageable again. + +@param m Input matrix. + */ CV_EXPORTS void unregisterPageLocked(Mat& m); ///////////////////////////////// Stream ////////////////////////////////// -//! Encapculates Cuda Stream. Provides interface for async coping. -//! Passed to each function that supports async kernel execution. -//! Reference counting is enabled. +/** @brief This class encapsulates a queue of asynchronous calls. + +@note Currently, you may face problems if an operation is enqueued twice with different data. Some +functions use the constant GPU memory, and next call may update the memory before the previous one +has been finished. But calling different operations asynchronously is safe because each operation +has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are +also safe. : + */ class CV_EXPORTS Stream { typedef void (Stream::*bool_type)() const; @@ -385,16 +443,26 @@ public: //! creates a new asynchronous stream Stream(); - //! queries an asynchronous stream for completion status + /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false. + */ bool queryIfComplete() const; - //! waits for stream tasks to complete + /** @brief Blocks the current CPU thread until all operations in the stream are complete. + */ void waitForCompletion(); - //! makes a compute stream wait on an event + /** @brief Makes a compute stream wait on an event. + */ void waitEvent(const Event& event); - //! adds a callback to be called on the host after all currently enqueued items in the stream have completed + /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have + completed. + + @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization + that may depend on outstanding device work or other callbacks that are not mandated to run earlier. + Callbacks without a mandated order (in independent streams) execute in undefined order and may be + serialized. + */ void enqueueHostCallback(StreamCallback callback, void* userData); //! return Stream object for default CUDA stream @@ -446,21 +514,41 @@ private: friend struct EventAccessor; }; +//! @} cuda_struct + //////////////////////////////// Initialization & Info //////////////////////// -//! this is the only function that do not throw exceptions if the library is compiled without CUDA +//! @addtogroup cuda_init +//! @{ + +/** @brief Returns the number of installed CUDA-enabled devices. + +Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support, +this function returns 0. + */ CV_EXPORTS int getCudaEnabledDeviceCount(); -//! set device to be used for GPU executions for the calling host thread +/** @brief Sets a device and initializes it for the current thread. + +@param device System index of a CUDA device starting with 0. + +If the call of this function is omitted, a default device is initialized at the fist CUDA usage. + */ CV_EXPORTS void setDevice(int device); -//! returns which device is currently being used for the calling host thread +/** @brief Returns the current device index set by cuda::setDevice or initialized by default. + */ CV_EXPORTS int getDevice(); -//! explicitly destroys and cleans up all resources associated with the current device in the current process -//! any subsequent API call to this device will reinitialize the device +/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current +process. + +Any subsequent API call to this device will reinitialize the device. + */ CV_EXPORTS void resetDevice(); +/** @brief Enumeration providing CUDA computing features. + */ enum FeatureSet { FEATURE_SET_COMPUTE_10 = 10, @@ -482,12 +570,27 @@ enum FeatureSet //! checks whether current device supports the given feature CV_EXPORTS bool deviceSupports(FeatureSet feature_set); -//! information about what GPU archs this OpenCV CUDA module was compiled for +/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was +built for. + +According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute +capability can always be compiled to binary code of greater or equal compute capability". + */ class CV_EXPORTS TargetArchs { public: + /** @brief The following method checks whether the module was built with the support of the given feature: + + @param feature_set Features to be checked. See :ocvcuda::FeatureSet. + */ static bool builtWith(FeatureSet feature_set); + /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA + code for the given architecture(s): + + @param major Major compute capability version. + @param minor Minor compute capability version. + */ static bool has(int major, int minor); static bool hasPtx(int major, int minor); static bool hasBin(int major, int minor); @@ -498,17 +601,25 @@ public: static bool hasEqualOrGreaterBin(int major, int minor); }; -//! information about the given GPU. +/** @brief Class providing functionality for querying the specified GPU properties. + */ class CV_EXPORTS DeviceInfo { public: //! creates DeviceInfo object for the current GPU DeviceInfo(); - //! creates DeviceInfo object for the given GPU + /** @brief The constructors. + + @param device_id System index of the CUDA device starting with 0. + + Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it + constructs an object for the current device. + */ DeviceInfo(int device_id); - //! device number. + /** @brief Returns system index of the CUDA device starting with 0. + */ int deviceID() const; //! ASCII string identifying device @@ -680,10 +791,19 @@ public: size_t freeMemory() const; size_t totalMemory() const; - //! checks whether device supports the given feature + /** @brief Provides information on CUDA feature support. + + @param feature_set Features to be checked. See cuda::FeatureSet. + + This function returns true if the device has the specified CUDA feature. Otherwise, it returns false + */ bool supports(FeatureSet feature_set) const; - //! checks whether the CUDA module can be run on the given device + /** @brief Checks the CUDA module and device compatibility. + + This function returns true if the CUDA module can be run on the specified device. Otherwise, it + returns false . + */ bool isCompatible() const; private: @@ -693,7 +813,7 @@ private: CV_EXPORTS void printCudaDeviceInfo(int device); CV_EXPORTS void printShortCudaDeviceInfo(int device); -//! @} +//! @} cuda_init }} // namespace cv { namespace cuda { diff --git a/modules/core/include/opencv2/core/cuda_stream_accessor.hpp b/modules/core/include/opencv2/core/cuda_stream_accessor.hpp index 4eb4ba61ad..66aaf56c52 100644 --- a/modules/core/include/opencv2/core/cuda_stream_accessor.hpp +++ b/modules/core/include/opencv2/core/cuda_stream_accessor.hpp @@ -66,6 +66,11 @@ namespace cv class Stream; class Event; + /** @brief Class that enables getting cudaStream_t from cuda::Stream + + because it is the only public header that depends on the CUDA Runtime API. Including it + brings a dependency to your code. + */ struct StreamAccessor { CV_EXPORTS static cudaStream_t getStream(const Stream& stream); diff --git a/modules/core/include/opencv2/core/cuda_types.hpp b/modules/core/include/opencv2/core/cuda_types.hpp index ec67ae08ba..490086fb0a 100644 --- a/modules/core/include/opencv2/core/cuda_types.hpp +++ b/modules/core/include/opencv2/core/cuda_types.hpp @@ -89,6 +89,11 @@ namespace cv size_t size; }; + /** @brief Structure similar to cuda::PtrStepSz but containing only a pointer and row step. + + Width and height fields are excluded due to performance reasons. The structure is intended + for internal use or for users who write device code. + */ template struct PtrStep : public DevPtr { __CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {} @@ -104,6 +109,12 @@ namespace cv __CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } }; + /** @brief Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compiled code (CUDA + kernels). + + Typically, it is used internally by OpenCV and by users who write device code. You can call + its members from both host and device code. + */ template struct PtrStepSz : public PtrStep { __CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {} diff --git a/modules/cuda/doc/introduction.markdown b/modules/cuda/doc/introduction.markdown new file mode 100644 index 0000000000..ebe8c21af3 --- /dev/null +++ b/modules/cuda/doc/introduction.markdown @@ -0,0 +1,85 @@ +CUDA Module Introduction {#cuda_intro} +======================== + +General Information +------------------- + +The OpenCV CUDA module is a set of classes and functions to utilize CUDA computational capabilities. +It is implemented using NVIDIA\* CUDA\* Runtime API and supports only NVIDIA GPUs. The OpenCV CUDA +module includes utility functions, low-level vision primitives, and high-level algorithms. The +utility functions and low-level primitives provide a powerful infrastructure for developing fast +vision algorithms taking advantage of CUDA whereas the high-level functionality includes some +state-of-the-art algorithms (such as stereo correspondence, face and people detectors, and others) +ready to be used by the application developers. + +The CUDA module is designed as a host-level API. This means that if you have pre-compiled OpenCV +CUDA binaries, you are not required to have the CUDA Toolkit installed or write any extra code to +make use of the CUDA. + +The OpenCV CUDA module is designed for ease of use and does not require any knowledge of CUDA. +Though, such a knowledge will certainly be useful to handle non-trivial cases or achieve the highest +performance. It is helpful to understand the cost of various operations, what the GPU does, what the +preferred data formats are, and so on. The CUDA module is an effective instrument for quick +implementation of CUDA-accelerated computer vision algorithms. However, if your algorithm involves +many simple operations, then, for the best possible performance, you may still need to write your +own kernels to avoid extra write and read operations on the intermediate results. + +To enable CUDA support, configure OpenCV using CMake with WITH\_CUDA=ON . When the flag is set and +if CUDA is installed, the full-featured OpenCV CUDA module is built. Otherwise, the module is still +built but at runtime all functions from the module throw Exception with CV\_GpuNotSupported error +code, except for cuda::getCudaEnabledDeviceCount(). The latter function returns zero GPU count in +this case. Building OpenCV without CUDA support does not perform device code compilation, so it does +not require the CUDA Toolkit installed. Therefore, using the cuda::getCudaEnabledDeviceCount() +function, you can implement a high-level algorithm that will detect GPU presence at runtime and +choose an appropriate implementation (CPU or GPU) accordingly. + +Compilation for Different NVIDIA\* Platforms +-------------------------------------------- + +NVIDIA\* compiler enables generating binary code (cubin and fatbin) and intermediate code (PTX). +Binary code often implies a specific GPU architecture and generation, so the compatibility with +other GPUs is not guaranteed. PTX is targeted for a virtual platform that is defined entirely by the +set of capabilities or features. Depending on the selected virtual platform, some of the +instructions are emulated or disabled, even if the real hardware supports all the features. + +At the first call, the PTX code is compiled to binary code for the particular GPU using a JIT +compiler. When the target GPU has a compute capability (CC) lower than the PTX code, JIT fails. By +default, the OpenCV CUDA module includes: + +\* + Binaries for compute capabilities 1.3 and 2.0 (controlled by CUDA\_ARCH\_BIN in CMake) + +\* + PTX code for compute capabilities 1.1 and 1.3 (controlled by CUDA\_ARCH\_PTX in CMake) + +This means that for devices with CC 1.3 and 2.0 binary images are ready to run. For all newer +platforms, the PTX code for 1.3 is JIT'ed to a binary image. For devices with CC 1.1 and 1.2, the +PTX for 1.1 is JIT'ed. For devices with CC 1.0, no code is available and the functions throw +Exception. For platforms where JIT compilation is performed first, the run is slow. + +On a GPU with CC 1.0, you can still compile the CUDA module and most of the functions will run +flawlessly. To achieve this, add "1.0" to the list of binaries, for example, +CUDA\_ARCH\_BIN="1.0 1.3 2.0" . The functions that cannot be run on CC 1.0 GPUs throw an exception. + +You can always determine at runtime whether the OpenCV GPU-built binaries (or PTX code) are +compatible with your GPU. The function cuda::DeviceInfo::isCompatible returns the compatibility +status (true/false). + +Utilizing Multiple GPUs +----------------------- + +In the current version, each of the OpenCV CUDA algorithms can use only a single GPU. So, to utilize +multiple GPUs, you have to manually distribute the work between GPUs. Switching active devie can be +done using cuda::setDevice() function. For more details please read Cuda C Programming Guide. + +While developing algorithms for multiple GPUs, note a data passing overhead. For primitive functions +and small images, it can be significant, which may eliminate all the advantages of having multiple +GPUs. But for high-level algorithms, consider using multi-GPU acceleration. For example, the Stereo +Block Matching algorithm has been successfully parallelized using the following algorithm: + +1. Split each image of the stereo pair into two horizontal overlapping stripes. +2. Process each pair of stripes (from the left and right images) on a separate Fermi\* GPU. +3. Merge the results into a single disparity map. + +With this algorithm, a dual GPU gave a 180% performance increase comparing to the single Fermi GPU. +For a source code example, see . diff --git a/modules/cuda/include/opencv2/cuda.hpp b/modules/cuda/include/opencv2/cuda.hpp index a42bfb7d83..ac51b87dde 100644 --- a/modules/cuda/include/opencv2/cuda.hpp +++ b/modules/cuda/include/opencv2/cuda.hpp @@ -49,10 +49,25 @@ #include "opencv2/core/cuda.hpp" +/** +@defgroup cuda CUDA-accelerated Computer Vision + @ref cuda_intro "Introduction page" + @{ + @defgroup cuda_init Initalization and Information + @defgroup cuda_struct Data Structures + @defgroup cuda_calib3d Camera Calibration and 3D Reconstruction + @defgroup cuda_objdetect Object Detection + @} + + */ + namespace cv { namespace cuda { //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector ////////////// +//! @addtogroup cuda_objdetect +//! @{ + struct CV_EXPORTS HOGConfidence { double scale; @@ -61,31 +76,92 @@ struct CV_EXPORTS HOGConfidence std::vector part_scores[4]; }; +/** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector. + +Interfaces of all methods are kept similar to the CPU HOG descriptor and detector analogues as much +as possible. + +@note + - An example applying the HOG descriptor for people detection can be found at + opencv_source_code/samples/cpp/peopledetect.cpp + - A CUDA example applying the HOG descriptor for people detection can be found at + opencv_source_code/samples/gpu/hog.cpp + - (Python) An example applying the HOG descriptor for people detection can be found at + opencv_source_code/samples/python2/peopledetect.py + */ struct CV_EXPORTS HOGDescriptor { enum { DEFAULT_WIN_SIGMA = -1 }; enum { DEFAULT_NLEVELS = 64 }; enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL }; + /** @brief Creates the HOG descriptor and detector. + + @param win_size Detection window size. Align to block size and block stride. + @param block_size Block size in pixels. Align to cell size. Only (16,16) is supported for now. + @param block_stride Block stride. It must be a multiple of cell size. + @param cell_size Cell size. Only (8, 8) is supported for now. + @param nbins Number of bins. Only 9 bins per cell are supported for now. + @param win_sigma Gaussian smoothing window parameter. + @param threshold_L2hys L2-Hys normalization method shrinkage. + @param gamma_correction Flag to specify whether the gamma correction preprocessing is required or + not. + @param nlevels Maximum number of detection window increases. + */ HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16), Size block_stride=Size(8, 8), Size cell_size=Size(8, 8), int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA, double threshold_L2hys=0.2, bool gamma_correction=true, int nlevels=DEFAULT_NLEVELS); + /** @brief Returns the number of coefficients required for the classification. + */ size_t getDescriptorSize() const; + /** @brief Returns the block histogram size. + */ size_t getBlockHistogramSize() const; + /** @brief Sets coefficients for the linear SVM classifier. + */ void setSVMDetector(const std::vector& detector); + /** @brief Returns coefficients of the classifier trained for people detection (for default window size). + */ static std::vector getDefaultPeopleDetector(); + /** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows). + */ static std::vector getPeopleDetector48x96(); + /** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows). + */ static std::vector getPeopleDetector64x128(); + /** @brief Performs object detection without a multi-scale window. + + @param img Source image. CV_8UC1 and CV_8UC4 types are supported for now. + @param found_locations Left-top corner points of detected objects boundaries. + @param hit_threshold Threshold for the distance between features and SVM classifying plane. + Usually it is 0 and should be specfied in the detector coefficients (as the last free + coefficient). But if the free coefficient is omitted (which is allowed), you can specify it + manually here. + @param win_stride Window stride. It must be a multiple of block stride. + @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0). + */ void detect(const GpuMat& img, std::vector& found_locations, double hit_threshold=0, Size win_stride=Size(), Size padding=Size()); + /** @brief Performs object detection with a multi-scale window. + + @param img Source image. See cuda::HOGDescriptor::detect for type limitations. + @param found_locations Detected objects boundaries. + @param hit_threshold Threshold for the distance between features and SVM classifying plane. See + cuda::HOGDescriptor::detect for details. + @param win_stride Window stride. It must be a multiple of block stride. + @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0). + @param scale0 Coefficient of the detection window increase. + @param group_threshold Coefficient to regulate the similarity threshold. When detected, some + objects can be covered by many rectangles. 0 means not to perform grouping. See groupRectangles . + */ void detectMultiScale(const GpuMat& img, std::vector& found_locations, double hit_threshold=0, Size win_stride=Size(), Size padding=Size(), double scale0=1.05, @@ -98,6 +174,17 @@ struct CV_EXPORTS HOGDescriptor double hit_threshold, Size win_stride, Size padding, std::vector &conf_out, int group_threshold); + /** @brief Returns block descriptors computed for the whole image. + + @param img Source image. See cuda::HOGDescriptor::detect for type limitations. + @param win_stride Window stride. It must be a multiple of block stride. + @param descriptors 2D array of descriptors. + @param descr_format Descriptor storage format: + - **DESCR_FORMAT_ROW_BY_ROW** - Row-major order. + - **DESCR_FORMAT_COL_BY_COL** - Column-major order. + + The function is mainly used to learn the classifier. + */ void getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, int descr_format=DESCR_FORMAT_COL_BY_COL); @@ -145,20 +232,82 @@ protected: //////////////////////////// CascadeClassifier //////////////////////////// -// The cascade classifier class for object detection: supports old haar and new lbp xlm formats and nvbin for haar cascades olny. +/** @brief Cascade classifier class used for object detection. Supports HAAR and LBP cascades. : + +@note + - A cascade classifier example can be found at + opencv_source_code/samples/gpu/cascadeclassifier.cpp + - A Nvidea API specific cascade classifier example can be found at + opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp + */ class CV_EXPORTS CascadeClassifier_CUDA { public: CascadeClassifier_CUDA(); + /** @brief Loads the classifier from a file. Cascade type is detected automatically by constructor parameter. + + @param filename Name of the file from which the classifier is loaded. Only the old haar classifier + (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new + type of OpenCV XML cascade supported for LBP. + */ CascadeClassifier_CUDA(const String& filename); ~CascadeClassifier_CUDA(); + /** @brief Checks whether the classifier is loaded or not. + */ bool empty() const; + /** @brief Loads the classifier from a file. The previous content is destroyed. + + @param filename Name of the file from which the classifier is loaded. Only the old haar classifier + (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new + type of OpenCV XML cascade supported for LBP. + */ bool load(const String& filename); + /** @brief Destroys the loaded classifier. + */ void release(); - /* returns number of detected objects */ + /** @overload */ int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size()); + /** @brief Detects objects of different sizes in the input image. + + @param image Matrix of type CV_8U containing an image where objects should be detected. + @param objectsBuf Buffer to store detected objects (rectangles). If it is empty, it is allocated + with the default size. If not empty, the function searches not more than N objects, where + N = sizeof(objectsBufer's data)/sizeof(cv::Rect). + @param maxObjectSize Maximum possible object size. Objects larger than that are ignored. Used for + second signature and supported only for LBP cascades. + @param scaleFactor Parameter specifying how much the image size is reduced at each image scale. + @param minNeighbors Parameter specifying how many neighbors each candidate rectangle should have + to retain it. + @param minSize Minimum possible object size. Objects smaller than that are ignored. + + The detected objects are returned as a list of rectangles. + + The function returns the number of detected objects, so you can retrieve them as in the following + example: + @code + cuda::CascadeClassifier_CUDA cascade_gpu(...); + + Mat image_cpu = imread(...) + GpuMat image_gpu(image_cpu); + + GpuMat objbuf; + int detections_number = cascade_gpu.detectMultiScale( image_gpu, + objbuf, 1.2, minNeighbors); + + Mat obj_host; + // download only detected number of rectangles + objbuf.colRange(0, detections_number).download(obj_host); + + Rect* faces = obj_host.ptr(); + for(int i = 0; i < detections_num; ++i) + cv::rectangle(image_cpu, faces[i], Scalar(255)); + + imshow("Faces", image_cpu); + @endcode + @sa CascadeClassifier::detectMultiScale + */ int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4); bool findLargestObject; @@ -174,8 +323,13 @@ private: friend class CascadeClassifier_CUDA_LBP; }; +//! @} cuda_objdetect + //////////////////////////// Labeling //////////////////////////// +//! @addtogroup cuda +//! @{ + //!performs labeling via graph cuts of a 2D regular 4-connected graph. CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& stream = Stream::Null()); @@ -192,8 +346,13 @@ CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Sc //! performs connected componnents labeling. CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, int flags = 0, Stream& stream = Stream::Null()); +//! @} + //////////////////////////// Calib3d //////////////////////////// +//! @addtogroup cuda_calib3d +//! @{ + CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream = Stream::Null()); @@ -201,13 +360,34 @@ CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tve const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream = Stream::Null()); +/** @brief Finds the object pose from 3D-2D point correspondences. + +@param object Single-row matrix of object points. +@param image Single-row matrix of image points. +@param camera_mat 3x3 matrix of intrinsic camera parameters. +@param dist_coef Distortion coefficients. See undistortPoints for details. +@param rvec Output 3D rotation vector. +@param tvec Output 3D translation vector. +@param use_extrinsic_guess Flag to indicate that the function must use rvec and tvec as an +initial transformation guess. It is not supported for now. +@param num_iters Maximum number of RANSAC iterations. +@param max_dist Euclidean distance threshold to detect whether point is inlier or not. +@param min_inlier_count Flag to indicate that the function must stop if greater or equal number +of inliers is achieved. It is not supported for now. +@param inliers Output vector of inlier indices. + */ CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat, const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false, int num_iters=100, float max_dist=8.0, int min_inlier_count=100, std::vector* inliers=NULL); +//! @} + //////////////////////////// VStab //////////////////////////// +//! @addtogroup cuda +//! @{ + //! removes points (CV_32FC2, single row matrix) with zero mask value CV_EXPORTS void compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask); @@ -215,6 +395,8 @@ CV_EXPORTS void calcWobbleSuppressionMaps( int left, int idx, int right, Size size, const Mat &ml, const Mat &mr, GpuMat &mapx, GpuMat &mapy); +//! @} + }} // namespace cv { namespace cuda { #endif /* __OPENCV_CUDA_HPP__ */ diff --git a/modules/cudaarithm/include/opencv2/cudaarithm.hpp b/modules/cudaarithm/include/opencv2/cudaarithm.hpp index e493fd759c..8f3d352baf 100644 --- a/modules/cudaarithm/include/opencv2/cudaarithm.hpp +++ b/modules/cudaarithm/include/opencv2/cudaarithm.hpp @@ -49,18 +49,85 @@ #include "opencv2/core/cuda.hpp" +/** + @addtogroup cuda + @{ + @defgroup cudaarithm Operations on Matrices + @{ + @defgroup cudaarithm_core Core Operations on Matrices + @defgroup cudaarithm_elem Per-element Operations + @defgroup cudaarithm_reduce Matrix Reductions + @defgroup cudaarithm_arithm Arithm Operations on Matrices + @} + @} + */ + namespace cv { namespace cuda { -//! adds one matrix to another (dst = src1 + src2) +//! @addtogroup cudaarithm +//! @{ + +//! @addtogroup cudaarithm_elem +//! @{ + +/** @brief Computes a matrix-matrix or matrix-scalar sum. + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . +@param dst Destination matrix that has the same size and number of channels as the input array(s). +The depth is defined by dtype or src1 depth. +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. +@param dtype Optional depth of the output array. +@param stream Stream for the asynchronous version. + +@sa add + */ CV_EXPORTS void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()); -//! subtracts one matrix from another (dst = src1 - src2) +/** @brief Computes a matrix-matrix or matrix-scalar difference. + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . +@param dst Destination matrix that has the same size and number of channels as the input array(s). +The depth is defined by dtype or src1 depth. +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. +@param dtype Optional depth of the output array. +@param stream Stream for the asynchronous version. + +@sa subtract + */ CV_EXPORTS void subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()); -//! computes element-wise weighted product of the two arrays (dst = scale * src1 * src2) +/** @brief Computes a matrix-matrix or matrix-scalar per-element product. + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and number of channels as the input array(s). +The depth is defined by dtype or src1 depth. +@param scale Optional scale factor. +@param dtype Optional depth of the output array. +@param stream Stream for the asynchronous version. + +@sa multiply + */ CV_EXPORTS void multiply(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! computes element-wise weighted quotient of the two arrays (dst = scale * (src1 / src2)) +/** @brief Computes a matrix-matrix or matrix-scalar division. + +@param src1 First source matrix or a scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and number of channels as the input array(s). +The depth is defined by dtype or src1 depth. +@param scale Optional scale factor. +@param dtype Optional depth of the output array. +@param stream Stream for the asynchronous version. + +This function, in contrast to divide, uses a round-down rounding mode. + +@sa divide + */ CV_EXPORTS void divide(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); //! computes element-wise weighted reciprocal of an array (dst = scale/src2) @@ -69,59 +136,199 @@ static inline void divide(double src1, InputArray src2, OutputArray dst, int dty divide(src1, src2, dst, 1.0, dtype, stream); } -//! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2)) +/** @brief Computes per-element absolute difference of two matrices (or of a matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param stream Stream for the asynchronous version. + +@sa absdiff + */ CV_EXPORTS void absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); -//! computes absolute value of each matrix element +/** @brief Computes an absolute value of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +@sa abs + */ CV_EXPORTS void abs(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); -//! computes square of each pixel in an image +/** @brief Computes a square value of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void sqr(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); -//! computes square root of each pixel in an image +/** @brief Computes a square root of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +@sa sqrt + */ CV_EXPORTS void sqrt(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); -//! computes exponent of each matrix element +/** @brief Computes an exponent of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +@sa exp + */ CV_EXPORTS void exp(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); -//! computes natural logarithm of absolute value of each matrix element +/** @brief Computes a natural logarithm of absolute value of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +@sa log + */ CV_EXPORTS void log(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); -//! computes power of each matrix element: -//! (dst(i,j) = pow( src(i,j) , power), if src.type() is integer -//! (dst(i,j) = pow(fabs(src(i,j)), power), otherwise +/** @brief Raises every matrix element to a power. + +@param src Source matrix. +@param power Exponent of power. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +The function pow raises every element of the input matrix to power : + +\f[\texttt{dst} (I) = \fork{\texttt{src}(I)^power}{if \texttt{power} is integer}{|\texttt{src}(I)|^power}{otherwise}\f] + +@sa pow + */ CV_EXPORTS void pow(InputArray src, double power, OutputArray dst, Stream& stream = Stream::Null()); -//! compares elements of two arrays (dst = src1 src2) +/** @brief Compares elements of two matrices (or of a matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param cmpop Flag specifying the relation between the elements to be checked: +- **CMP_EQ:** a(.) == b(.) +- **CMP_GT:** a(.) \< b(.) +- **CMP_GE:** a(.) \<= b(.) +- **CMP_LT:** a(.) \< b(.) +- **CMP_LE:** a(.) \<= b(.) +- **CMP_NE:** a(.) != b(.) +@param stream Stream for the asynchronous version. + +@sa compare + */ CV_EXPORTS void compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream = Stream::Null()); -//! performs per-elements bit-wise inversion +/** @brief Performs a per-element bitwise inversion. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param mask Optional operation mask. 8-bit single channel image. +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void bitwise_not(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise disjunction of two arrays +/** @brief Performs a per-element bitwise disjunction of two matrices (or of matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param mask Optional operation mask. 8-bit single channel image. +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise conjunction of two arrays +/** @brief Performs a per-element bitwise conjunction of two matrices (or of matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param mask Optional operation mask. 8-bit single channel image. +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise "exclusive or" operation +/** @brief Performs a per-element bitwise exclusive or operation of two matrices (or of matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param mask Optional operation mask. 8-bit single channel image. +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); -//! pixel by pixel right shift of an image by a constant value -//! supports 1, 3 and 4 channels images with integers elements +/** @brief Performs pixel by pixel right shift of an image by a constant value. + +@param src Source matrix. Supports 1, 3 and 4 channels images with integers elements. +@param val Constant values, one per channel. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void rshift(InputArray src, Scalar_ val, OutputArray dst, Stream& stream = Stream::Null()); -//! pixel by pixel left shift of an image by a constant value -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth +/** @brief Performs pixel by pixel right left of an image by a constant value. + +@param src Source matrix. Supports 1, 3 and 4 channels images with CV_8U , CV_16U or CV_32S +depth. +@param val Constant values, one per channel. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void lshift(InputArray src, Scalar_ val, OutputArray dst, Stream& stream = Stream::Null()); -//! computes per-element minimum of two arrays (dst = min(src1, src2)) +/** @brief Computes the per-element minimum of two matrices (or a matrix and a scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param stream Stream for the asynchronous version. + +@sa min + */ CV_EXPORTS void min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); -//! computes per-element maximum of two arrays (dst = max(src1, src2)) +/** @brief Computes the per-element maximum of two matrices (or a matrix and a scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param stream Stream for the asynchronous version. + +@sa max + */ CV_EXPORTS void max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); -//! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) +/** @brief Computes the weighted sum of two arrays. + +@param src1 First source array. +@param alpha Weight for the first array elements. +@param src2 Second source array of the same size and channel number as src1 . +@param beta Weight for the second array elements. +@param dst Destination array that has the same size and number of channels as the input arrays. +@param gamma Scalar added to each sum. +@param dtype Optional depth of the destination array. When both input arrays have the same depth, +dtype can be set to -1, which will be equivalent to src1.depth(). +@param stream Stream for the asynchronous version. + +The function addWeighted calculates the weighted sum of two arrays as follows: + +\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{src1} (I)* \texttt{alpha} + \texttt{src2} (I)* \texttt{beta} + \texttt{gamma} )\f] + +where I is a multi-dimensional index of array elements. In case of multi-channel arrays, each +channel is processed independently. + +@sa addWeighted + */ CV_EXPORTS void addWeighted(InputArray src1, double alpha, InputArray src2, double beta, double gamma, OutputArray dst, int dtype = -1, Stream& stream = Stream::Null()); @@ -131,142 +338,352 @@ static inline void scaleAdd(InputArray src1, double alpha, InputArray src2, Outp addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); } -//! applies fixed threshold to the image +/** @brief Applies a fixed-level threshold to each array element. + +@param src Source array (single-channel). +@param dst Destination array with the same size and type as src . +@param thresh Threshold value. +@param maxval Maximum value to use with THRESH_BINARY and THRESH_BINARY_INV threshold types. +@param type Threshold type. For details, see threshold . The THRESH_OTSU and THRESH_TRIANGLE +threshold types are not supported. +@param stream Stream for the asynchronous version. + +@sa threshold + */ CV_EXPORTS double threshold(InputArray src, OutputArray dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()); -//! computes magnitude of complex (x(i).re, x(i).im) vector -//! supports only CV_32FC2 type +/** @brief Computes magnitudes of complex matrix elements. + +@param xy Source complex matrix in the interleaved format ( CV_32FC2 ). +@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ). +@param stream Stream for the asynchronous version. + +@sa magnitude + */ CV_EXPORTS void magnitude(InputArray xy, OutputArray magnitude, Stream& stream = Stream::Null()); -//! computes squared magnitude of complex (x(i).re, x(i).im) vector -//! supports only CV_32FC2 type +/** @brief Computes squared magnitudes of complex matrix elements. + +@param xy Source complex matrix in the interleaved format ( CV_32FC2 ). +@param magnitude Destination matrix of float magnitude squares ( CV_32FC1 ). +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void magnitudeSqr(InputArray xy, OutputArray magnitude, Stream& stream = Stream::Null()); -//! computes magnitude of each (x(i), y(i)) vector -//! supports only floating-point source +/** @overload + computes magnitude of each (x(i), y(i)) vector + supports only floating-point source +@param x Source matrix containing real components ( CV_32FC1 ). +@param y Source matrix containing imaginary components ( CV_32FC1 ). +@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ). +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void magnitude(InputArray x, InputArray y, OutputArray magnitude, Stream& stream = Stream::Null()); -//! computes squared magnitude of each (x(i), y(i)) vector -//! supports only floating-point source +/** @overload + computes squared magnitude of each (x(i), y(i)) vector + supports only floating-point source +@param x Source matrix containing real components ( CV_32FC1 ). +@param y Source matrix containing imaginary components ( CV_32FC1 ). +@param magnitude Destination matrix of float magnitude squares ( CV_32FC1 ). +@param stream Stream for the asynchronous version. +*/ CV_EXPORTS void magnitudeSqr(InputArray x, InputArray y, OutputArray magnitude, Stream& stream = Stream::Null()); -//! computes angle of each (x(i), y(i)) vector -//! supports only floating-point source +/** @brief Computes polar angles of complex matrix elements. + +@param x Source matrix containing real components ( CV_32FC1 ). +@param y Source matrix containing imaginary components ( CV_32FC1 ). +@param angle Destination matrix of angles ( CV_32FC1 ). +@param angleInDegrees Flag for angles that must be evaluated in degrees. +@param stream Stream for the asynchronous version. + +@sa phase + */ CV_EXPORTS void phase(InputArray x, InputArray y, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); -//! converts Cartesian coordinates to polar -//! supports only floating-point source +/** @brief Converts Cartesian coordinates into polar. + +@param x Source matrix containing real components ( CV_32FC1 ). +@param y Source matrix containing imaginary components ( CV_32FC1 ). +@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ). +@param angle Destination matrix of angles ( CV_32FC1 ). +@param angleInDegrees Flag for angles that must be evaluated in degrees. +@param stream Stream for the asynchronous version. + +@sa cartToPolar + */ CV_EXPORTS void cartToPolar(InputArray x, InputArray y, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); -//! converts polar coordinates to Cartesian -//! supports only floating-point source +/** @brief Converts polar coordinates into Cartesian. + +@param magnitude Source matrix containing magnitudes ( CV_32FC1 ). +@param angle Source matrix containing angles ( CV_32FC1 ). +@param x Destination matrix of real components ( CV_32FC1 ). +@param y Destination matrix of imaginary components ( CV_32FC1 ). +@param angleInDegrees Flag that indicates angles in degrees. +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void polarToCart(InputArray magnitude, InputArray angle, OutputArray x, OutputArray y, bool angleInDegrees = false, Stream& stream = Stream::Null()); -//! makes multi-channel array out of several single-channel arrays +//! @} cudaarithm_elem + +//! @addtogroup cudaarithm_core +//! @{ + +/** @brief Makes a multi-channel matrix out of several single-channel matrices. + +@param src Array/vector of source matrices. +@param n Number of source matrices. +@param dst Destination matrix. +@param stream Stream for the asynchronous version. + +@sa merge + */ CV_EXPORTS void merge(const GpuMat* src, size_t n, OutputArray dst, Stream& stream = Stream::Null()); +/** @overload */ CV_EXPORTS void merge(const std::vector& src, OutputArray dst, Stream& stream = Stream::Null()); -//! copies each plane of a multi-channel array to a dedicated array +/** @brief Copies each plane of a multi-channel matrix into an array. + +@param src Source matrix. +@param dst Destination array/vector of single-channel matrices. +@param stream Stream for the asynchronous version. + +@sa split + */ CV_EXPORTS void split(InputArray src, GpuMat* dst, Stream& stream = Stream::Null()); +/** @overload */ CV_EXPORTS void split(InputArray src, std::vector& dst, Stream& stream = Stream::Null()); -//! transposes the matrix -//! supports matrix with element size = 1, 4 and 8 bytes (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc) +/** @brief Transposes a matrix. + +@param src1 Source matrix. 1-, 4-, 8-byte element sizes are supported for now. +@param dst Destination matrix. +@param stream Stream for the asynchronous version. + +@sa transpose + */ CV_EXPORTS void transpose(InputArray src1, OutputArray dst, Stream& stream = Stream::Null()); -//! reverses the order of the rows, columns or both in a matrix -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or CV_32F depth +/** @brief Flips a 2D matrix around vertical, horizontal, or both axes. + +@param src Source matrix. Supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or +CV_32F depth. +@param dst Destination matrix. +@param flipCode Flip mode for the source: +- 0 Flips around x-axis. +- \> 0 Flips around y-axis. +- \< 0 Flips around both axes. +@param stream Stream for the asynchronous version. + +@sa flip + */ CV_EXPORTS void flip(InputArray src, OutputArray dst, int flipCode, Stream& stream = Stream::Null()); -//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i)) -//! destination array will have the depth type as lut and the same channels number as source -//! supports CV_8UC1, CV_8UC3 types +/** @brief Base class for transform using lookup table. + */ class CV_EXPORTS LookUpTable : public Algorithm { public: + /** @brief Transforms the source matrix into the destination matrix using the given look-up table: + dst(I) = lut(src(I)) . + + @param src Source matrix. CV_8UC1 and CV_8UC3 matrices are supported for now. + @param dst Destination matrix. + @param stream Stream for the asynchronous version. + */ virtual void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0; }; +/** @brief Creates implementation for cuda::LookUpTable . + +@param lut Look-up table of 256 elements. It is a continuous CV_8U matrix. + */ CV_EXPORTS Ptr createLookUpTable(InputArray lut); -//! copies 2D array to a larger destination array and pads borders with user-specifiable constant +/** @brief Forms a border around an image. + +@param src Source image. CV_8UC1 , CV_8UC4 , CV_32SC1 , and CV_32FC1 types are supported. +@param dst Destination image with the same type as src. The size is +Size(src.cols+left+right, src.rows+top+bottom) . +@param top +@param bottom +@param left +@param right Number of pixels in each direction from the source image rectangle to extrapolate. +For example: top=1, bottom=1, left=1, right=1 mean that 1 pixel-wide border needs to be built. +@param borderType Border type. See borderInterpolate for details. BORDER_REFLECT101 , +BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now. +@param value Border value. +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void copyMakeBorder(InputArray src, OutputArray dst, int top, int bottom, int left, int right, int borderType, Scalar value = Scalar(), Stream& stream = Stream::Null()); -//! computes norm of array -//! supports NORM_INF, NORM_L1, NORM_L2 -//! supports all matrices except 64F +//! @} cudaarithm_core + +//! @addtogroup cudaarithm_reduce +//! @{ + +/** @brief Returns the norm of a matrix (or difference of two matrices). + +@param src1 Source matrix. Any matrices except 64F are supported. +@param normType Norm type. NORM_L1 , NORM_L2 , and NORM_INF are supported for now. +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. +@param buf Optional buffer to avoid extra memory allocations. It is resized automatically. + +@sa norm + */ CV_EXPORTS double norm(InputArray src1, int normType, InputArray mask, GpuMat& buf); +/** @overload +uses new buffer, no mask +*/ static inline double norm(InputArray src, int normType) { GpuMat buf; return norm(src, normType, GpuMat(), buf); } +/** @overload +no mask +*/ static inline double norm(InputArray src, int normType, GpuMat& buf) { return norm(src, normType, GpuMat(), buf); } -//! computes norm of the difference between two arrays -//! supports NORM_INF, NORM_L1, NORM_L2 -//! supports only CV_8UC1 type +/** @brief Returns the difference of two matrices. + +@param src1 Source matrix. Any matrices except 64F are supported. +@param src2 Second source matrix (if any) with the same size and type as src1. +@param normType Norm type. NORM_L1 , NORM_L2 , and NORM_INF are supported for now. +@param buf Optional buffer to avoid extra memory allocations. It is resized automatically. + +@sa norm + */ CV_EXPORTS double norm(InputArray src1, InputArray src2, GpuMat& buf, int normType=NORM_L2); +/** @overload +uses new buffer +*/ static inline double norm(InputArray src1, InputArray src2, int normType=NORM_L2) { GpuMat buf; return norm(src1, src2, buf, normType); } -//! computes sum of array elements -//! supports only single channel images +/** @brief Returns the sum of matrix elements. + +@param src Source image of any depth except for CV_64F . +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. +@param buf Optional buffer to avoid extra memory allocations. It is resized automatically. + +@sa sum + */ CV_EXPORTS Scalar sum(InputArray src, InputArray mask, GpuMat& buf); +/** @overload +uses new buffer, no mask +*/ static inline Scalar sum(InputArray src) { GpuMat buf; return sum(src, GpuMat(), buf); } +/** @overload +no mask +*/ static inline Scalar sum(InputArray src, GpuMat& buf) { return sum(src, GpuMat(), buf); } -//! computes sum of array elements absolute values -//! supports only single channel images +/** @brief Returns the sum of absolute values for matrix elements. + +@param src Source image of any depth except for CV_64F . +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. +@param buf Optional buffer to avoid extra memory allocations. It is resized automatically. + */ CV_EXPORTS Scalar absSum(InputArray src, InputArray mask, GpuMat& buf); +/** @overload +uses new buffer, no mask +*/ static inline Scalar absSum(InputArray src) { GpuMat buf; return absSum(src, GpuMat(), buf); } +/** @overload +no mask +*/ static inline Scalar absSum(InputArray src, GpuMat& buf) { return absSum(src, GpuMat(), buf); } -//! computes squared sum of array elements -//! supports only single channel images +/** @brief Returns the squared sum of matrix elements. + +@param src Source image of any depth except for CV_64F . +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. +@param buf Optional buffer to avoid extra memory allocations. It is resized automatically. + */ CV_EXPORTS Scalar sqrSum(InputArray src, InputArray mask, GpuMat& buf); +/** @overload +uses new buffer, no mask +*/ static inline Scalar sqrSum(InputArray src) { GpuMat buf; return sqrSum(src, GpuMat(), buf); } +/** @overload +no mask +*/ static inline Scalar sqrSum(InputArray src, GpuMat& buf) { return sqrSum(src, GpuMat(), buf); } -//! finds global minimum and maximum array elements and returns their values +/** @brief Finds global minimum and maximum matrix elements and returns their values. + +@param src Single-channel source image. +@param minVal Pointer to the returned minimum value. Use NULL if not required. +@param maxVal Pointer to the returned maximum value. Use NULL if not required. +@param mask Optional mask to select a sub-matrix. +@param buf Optional buffer to avoid extra memory allocations. It is resized automatically. + +The function does not work with CV_64F images on GPUs with the compute capability \< 1.3. + +@sa minMaxLoc + */ CV_EXPORTS void minMax(InputArray src, double* minVal, double* maxVal, InputArray mask, GpuMat& buf); +/** @overload +uses new buffer +*/ static inline void minMax(InputArray src, double* minVal, double* maxVal=0, InputArray mask=noArray()) { GpuMat buf; minMax(src, minVal, maxVal, mask, buf); } -//! finds global minimum and maximum array elements and returns their values with locations +/** @brief Finds global minimum and maximum matrix elements and returns their values with locations. + +@param src Single-channel source image. +@param minVal Pointer to the returned minimum value. Use NULL if not required. +@param maxVal Pointer to the returned maximum value. Use NULL if not required. +@param minLoc Pointer to the returned minimum location. Use NULL if not required. +@param maxLoc Pointer to the returned maximum location. Use NULL if not required. +@param mask Optional mask to select a sub-matrix. +@param valbuf Optional values buffer to avoid extra memory allocations. It is resized +automatically. +@param locbuf Optional locations buffer to avoid extra memory allocations. It is resized +automatically. +The function does not work with CV_64F images on GPU with the compute capability \< 1.3. + +@sa minMaxLoc + */ CV_EXPORTS void minMaxLoc(InputArray src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, InputArray mask, GpuMat& valbuf, GpuMat& locbuf); +/** @overload +uses new buffer +*/ static inline void minMaxLoc(InputArray src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, InputArray mask=noArray()) { @@ -274,34 +691,104 @@ static inline void minMaxLoc(InputArray src, double* minVal, double* maxVal=0, P minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf); } -//! counts non-zero array elements +/** @brief Counts non-zero matrix elements. + +@param src Single-channel source image. +@param buf Optional buffer to avoid extra memory allocations. It is resized automatically. + +The function does not work with CV_64F images on GPUs with the compute capability \< 1.3. + +@sa countNonZero + */ CV_EXPORTS int countNonZero(InputArray src, GpuMat& buf); +/** @overload +uses new buffer +*/ static inline int countNonZero(const GpuMat& src) { GpuMat buf; return countNonZero(src, buf); } -//! reduces a matrix to a vector +/** @brief Reduces a matrix to a vector. + +@param mtx Source 2D matrix. +@param vec Destination vector. Its size and type is defined by dim and dtype parameters. +@param dim Dimension index along which the matrix is reduced. 0 means that the matrix is reduced +to a single row. 1 means that the matrix is reduced to a single column. +@param reduceOp Reduction operation that could be one of the following: +- **CV_REDUCE_SUM** The output is the sum of all rows/columns of the matrix. +- **CV_REDUCE_AVG** The output is the mean vector of all rows/columns of the matrix. +- **CV_REDUCE_MAX** The output is the maximum (column/row-wise) of all rows/columns of the +matrix. +- **CV_REDUCE_MIN** The output is the minimum (column/row-wise) of all rows/columns of the +matrix. +@param dtype When it is negative, the destination vector will have the same type as the source +matrix. Otherwise, its type will be CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), mtx.channels()) . +@param stream Stream for the asynchronous version. + +The function reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of +1D vectors and performing the specified operation on the vectors until a single row/column is +obtained. For example, the function can be used to compute horizontal and vertical projections of a +raster image. In case of CV_REDUCE_SUM and CV_REDUCE_AVG , the output may have a larger element +bit-depth to preserve accuracy. And multi-channel arrays are also supported in these two reduction +modes. + +@sa reduce + */ CV_EXPORTS void reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null()); -//! computes mean value and standard deviation of all or selected array elements -//! supports only CV_8UC1 type +/** @brief Computes a mean value and a standard deviation of matrix elements. + +@param mtx Source matrix. CV_8UC1 matrices are supported for now. +@param mean Mean value. +@param stddev Standard deviation value. +@param buf Optional buffer to avoid extra memory allocations. It is resized automatically. + +@sa meanStdDev + */ CV_EXPORTS void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev, GpuMat& buf); +/** @overload +uses new buffer +*/ static inline void meanStdDev(InputArray src, Scalar& mean, Scalar& stddev) { GpuMat buf; meanStdDev(src, mean, stddev, buf); } -//! computes the standard deviation of integral images -//! supports only CV_32SC1 source type and CV_32FC1 sqr type -//! output will have CV_32FC1 type +/** @brief Computes a standard deviation of integral images. + +@param src Source image. Only the CV_32SC1 type is supported. +@param sqr Squared source image. Only the CV_32FC1 type is supported. +@param dst Destination image with the same type and size as src . +@param rect Rectangular window. +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void rectStdDev(InputArray src, InputArray sqr, OutputArray dst, Rect rect, Stream& stream = Stream::Null()); -//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values +/** @brief Normalizes the norm or value range of an array. + +@param src Input array. +@param dst Output array of the same size as src . +@param alpha Norm value to normalize to or the lower range boundary in case of the range +normalization. +@param beta Upper range boundary in case of the range normalization; it is not used for the norm +normalization. +@param norm_type Normalization type ( NORM_MINMAX , NORM_L2 , NORM_L1 or NORM_INF ). +@param dtype When negative, the output array has the same type as src; otherwise, it has the same +number of channels as src and the depth =CV_MAT_DEPTH(dtype). +@param mask Optional operation mask. +@param norm_buf Optional buffer to avoid extra memory allocations. It is resized automatically. +@param cvt_buf Optional buffer to avoid extra memory allocations. It is resized automatically. + +@sa normalize + */ CV_EXPORTS void normalize(InputArray src, OutputArray dst, double alpha, double beta, int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf); +/** @overload +uses new buffers +*/ static inline void normalize(InputArray src, OutputArray dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, InputArray mask = noArray()) { @@ -310,65 +797,179 @@ static inline void normalize(InputArray src, OutputArray dst, double alpha = 1, normalize(src, dst, alpha, beta, norm_type, dtype, mask, norm_buf, cvt_buf); } -//! computes the integral image -//! sum will have CV_32S type, but will contain unsigned int values -//! supports only CV_8UC1 source type +/** @brief Computes an integral image. + +@param src Source image. Only CV_8UC1 images are supported for now. +@param sum Integral image containing 32-bit unsigned integer values packed into CV_32SC1 . +@param buffer Optional buffer to avoid extra memory allocations. It is resized automatically. +@param stream Stream for the asynchronous version. + +@sa integral + */ CV_EXPORTS void integral(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null()); static inline void integralBuffered(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null()) { integral(src, sum, buffer, stream); } +/** @overload +uses new buffer +*/ static inline void integral(InputArray src, OutputArray sum, Stream& stream = Stream::Null()) { GpuMat buffer; integral(src, sum, buffer, stream); } -//! computes squared integral image -//! result matrix will have 64F type, but will contain 64U values -//! supports source images of 8UC1 type only +/** @brief Computes a squared integral image. + +@param src Source image. Only CV_8UC1 images are supported for now. +@param sqsum Squared integral image containing 64-bit unsigned integer values packed into +CV_64FC1 . +@param buf Optional buffer to avoid extra memory allocations. It is resized automatically. +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void sqrIntegral(InputArray src, OutputArray sqsum, GpuMat& buf, Stream& stream = Stream::Null()); +/** @overload +uses new buffer +*/ static inline void sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream = Stream::Null()) { GpuMat buffer; sqrIntegral(src, sqsum, buffer, stream); } +//! @} cudaarithm_reduce + +//! @addtogroup cudaarithm_arithm +//! @{ + +/** @brief Performs generalized matrix multiplication. + +@param src1 First multiplied input matrix that should have CV_32FC1 , CV_64FC1 , CV_32FC2 , or +CV_64FC2 type. +@param src2 Second multiplied input matrix of the same type as src1 . +@param alpha Weight of the matrix product. +@param src3 Third optional delta matrix added to the matrix product. It should have the same type +as src1 and src2 . +@param beta Weight of src3 . +@param dst Destination matrix. It has the proper size and the same type as input matrices. +@param flags Operation flags: +- **GEMM_1_T** transpose src1 +- **GEMM_2_T** transpose src2 +- **GEMM_3_T** transpose src3 +@param stream Stream for the asynchronous version. + +The function performs generalized matrix multiplication similar to the gemm functions in BLAS level +3. For example, gemm(src1, src2, alpha, src3, beta, dst, GEMM_1_T + GEMM_3_T) corresponds to + +\f[\texttt{dst} = \texttt{alpha} \cdot \texttt{src1} ^T \cdot \texttt{src2} + \texttt{beta} \cdot \texttt{src3} ^T\f] + +@note Transposition operation doesn't support CV_64FC2 input type. + +@sa gemm + */ CV_EXPORTS void gemm(InputArray src1, InputArray src2, double alpha, InputArray src3, double beta, OutputArray dst, int flags = 0, Stream& stream = Stream::Null()); -//! performs per-element multiplication of two full (not packed) Fourier spectrums -//! supports 32FC2 matrices only (interleaved format) +/** @brief Performs a per-element multiplication of two Fourier spectrums. + +@param src1 First spectrum. +@param src2 Second spectrum with the same size and type as a . +@param dst Destination spectrum. +@param flags Mock parameter used for CPU/CUDA interfaces similarity. +@param conjB Optional flag to specify if the second spectrum needs to be conjugated before the +multiplication. +@param stream Stream for the asynchronous version. + +Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now. + +@sa mulSpectrums + */ CV_EXPORTS void mulSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, bool conjB=false, Stream& stream = Stream::Null()); -//! performs per-element multiplication of two full (not packed) Fourier spectrums -//! supports 32FC2 matrices only (interleaved format) +/** @brief Performs a per-element multiplication of two Fourier spectrums and scales the result. + +@param src1 First spectrum. +@param src2 Second spectrum with the same size and type as a . +@param dst Destination spectrum. +@param flags Mock parameter used for CPU/CUDA interfaces similarity. +@param scale Scale constant. +@param conjB Optional flag to specify if the second spectrum needs to be conjugated before the +multiplication. +@param stream Stream for the asynchronous version. + +Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now. + +@sa mulSpectrums + */ CV_EXPORTS void mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null()); -//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix. -//! Param dft_size is the size of DFT transform. -//! -//! If the source matrix is not continous, then additional copy will be done, -//! so to avoid copying ensure the source matrix is continous one. If you want to use -//! preallocated output ensure it is continuous too, otherwise it will be reallocated. -//! -//! Being implemented via CUFFT real-to-complex transform result contains only non-redundant values -//! in CUFFT's format. Result as full complex matrix for such kind of transform cannot be retrieved. -//! -//! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format. +/** @brief Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix. + +@param src Source matrix (real or complex). +@param dst Destination matrix (real or complex). +@param dft_size Size of a discrete Fourier transform. +@param flags Optional flags: +- **DFT_ROWS** transforms each individual row of the source matrix. +- **DFT_SCALE** scales the result: divide it by the number of elements in the transform +(obtained from dft_size ). +- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real +cases are always forward and inverse, respectively). +- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of +real-complex transform, so the destination matrix must be real. +@param stream Stream for the asynchronous version. + +Use to handle real matrices ( CV32FC1 ) and complex matrices in the interleaved format ( CV32FC2 ). + +The source matrix should be continuous, otherwise reallocation and data copying is performed. The +function chooses an operation mode depending on the flags, size, and channel count of the source +matrix: + +- If the source matrix is complex and the output is not specified as real, the destination +matrix is complex and has the dft_size size and CV_32FC2 type. The destination matrix +contains a full result of the DFT (forward or inverse). +- If the source matrix is complex and the output is specified as real, the function assumes that +its input is the result of the forward transform (see the next item). The destination matrix +has the dft_size size and CV_32FC1 type. It contains the result of the inverse DFT. +- If the source matrix is real (its type is CV_32FC1 ), forward DFT is performed. The result of +the DFT is packed into complex ( CV_32FC2 ) matrix. So, the width of the destination matrix +is dft_size.width / 2 + 1 . But if the source is a single column, the height is reduced +instead of the width. + +@sa dft + */ CV_EXPORTS void dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null()); -//! computes convolution (or cross-correlation) of two images using discrete Fourier transform -//! supports source images of 32FC1 type only -//! result matrix will have 32FC1 type +/** @brief Base class for convolution (or cross-correlation) operator. : + */ class CV_EXPORTS Convolution : public Algorithm { public: + /** @brief Computes a convolution (or cross-correlation) of two images. + + @param image Source image. Only CV_32FC1 images are supported for now. + @param templ Template image. The size is not greater than the image size. The type is the same as + image . + @param result Result image. If image is *W x H* and templ is *w x h*, then result must be *W-w+1 x + H-h+1*. + @param ccorr Flags to evaluate cross-correlation instead of convolution. + @param stream Stream for the asynchronous version. + */ virtual void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()) = 0; }; +/** @brief Creates implementation for cuda::Convolution . + +@param user_block_size Block size. If you leave default value Size(0,0) then automatic +estimation of block size will be used (which is optimized for speed). By varying user_block_size +you can reduce memory requirements at the cost of speed. + */ CV_EXPORTS Ptr createConvolution(Size user_block_size = Size()); +//! @} cudaarithm_arithm + +//! @} cudaarithm + }} // namespace cv { namespace cuda { #endif /* __OPENCV_CUDAARITHM_HPP__ */ diff --git a/modules/cudabgsegm/include/opencv2/cudabgsegm.hpp b/modules/cudabgsegm/include/opencv2/cudabgsegm.hpp index a08ed64b1a..4b5e305d6a 100644 --- a/modules/cudabgsegm/include/opencv2/cudabgsegm.hpp +++ b/modules/cudabgsegm/include/opencv2/cudabgsegm.hpp @@ -50,11 +50,33 @@ #include "opencv2/core/cuda.hpp" #include "opencv2/video/background_segm.hpp" +/** + @addtogroup cuda + @{ + @defgroup cudabgsegm Background Segmentation + @} + */ + namespace cv { namespace cuda { +//! @addtogroup cudabgsegm +//! @{ + //////////////////////////////////////////////////// // MOG +/** @brief Gaussian Mixture-based Background/Foreground Segmentation Algorithm. + +The class discriminates between foreground and background pixels by building and maintaining a model +of the background. Any pixel which does not fit this model is then deemed to be foreground. The +class implements algorithm described in @cite MOG2001 . + +@sa BackgroundSubtractorMOG + +@note + - An example on gaussian mixture based background/foreground segmantation can be found at + opencv_source_code/samples/gpu/bgfg_segm.cpp + */ class CV_EXPORTS BackgroundSubtractorMOG : public cv::BackgroundSubtractor { public: @@ -78,6 +100,14 @@ public: virtual void setNoiseSigma(double noiseSigma) = 0; }; +/** @brief Creates mixture-of-gaussian background subtractor + +@param history Length of the history. +@param nmixtures Number of Gaussian mixtures. +@param backgroundRatio Background ratio. +@param noiseSigma Noise strength (standard deviation of the brightness or each color channel). 0 +means some automatic value. + */ CV_EXPORTS Ptr createBackgroundSubtractorMOG(int history = 200, int nmixtures = 5, double backgroundRatio = 0.7, double noiseSigma = 0); @@ -85,6 +115,14 @@ CV_EXPORTS Ptr //////////////////////////////////////////////////// // MOG2 +/** @brief Gaussian Mixture-based Background/Foreground Segmentation Algorithm. + +The class discriminates between foreground and background pixels by building and maintaining a model +of the background. Any pixel which does not fit this model is then deemed to be foreground. The +class implements algorithm described in @cite Zivkovic2004 . + +@sa BackgroundSubtractorMOG2 + */ class CV_EXPORTS BackgroundSubtractorMOG2 : public cv::BackgroundSubtractorMOG2 { public: @@ -96,6 +134,15 @@ public: virtual void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const = 0; }; +/** @brief Creates MOG2 Background Subtractor + +@param history Length of the history. +@param varThreshold Threshold on the squared Mahalanobis distance between the pixel and the model +to decide whether a pixel is well described by the background model. This parameter does not +affect the background update. +@param detectShadows If true, the algorithm will detect shadows and mark them. It decreases the +speed a bit, so if you do not need this feature, set the parameter to false. + */ CV_EXPORTS Ptr createBackgroundSubtractorMOG2(int history = 500, double varThreshold = 16, bool detectShadows = true); @@ -103,6 +150,12 @@ CV_EXPORTS Ptr //////////////////////////////////////////////////// // GMG +/** @brief Background/Foreground Segmentation Algorithm. + +The class discriminates between foreground and background pixels by building and maintaining a model +of the background. Any pixel which does not fit this model is then deemed to be foreground. The +class implements algorithm described in @cite Gold2012 . + */ class CV_EXPORTS BackgroundSubtractorGMG : public cv::BackgroundSubtractor { public: @@ -140,54 +193,71 @@ public: virtual void setMaxVal(double val) = 0; }; +/** @brief Creates GMG Background Subtractor + +@param initializationFrames Number of frames of video to use to initialize histograms. +@param decisionThreshold Value above which pixel is determined to be FG. + */ CV_EXPORTS Ptr createBackgroundSubtractorGMG(int initializationFrames = 120, double decisionThreshold = 0.8); //////////////////////////////////////////////////// // FGD -/** - * Foreground Object Detection from Videos Containing Complex Background. - * Liyuan Li, Weimin Huang, Irene Y.H. Gu, and Qi Tian. - * ACM MM2003 9p +/** @brief The class discriminates between foreground and background pixels by building and maintaining a model +of the background. + +Any pixel which does not fit this model is then deemed to be foreground. The class implements +algorithm described in @cite FGD2003 . +@sa BackgroundSubtractor */ class CV_EXPORTS BackgroundSubtractorFGD : public cv::BackgroundSubtractor { public: + /** @brief Returns the output foreground regions calculated by findContours. + + @param foreground_regions Output array (CPU memory). + */ virtual void getForegroundRegions(OutputArrayOfArrays foreground_regions) = 0; }; struct CV_EXPORTS FGDParams { - int Lc; // Quantized levels per 'color' component. Power of two, typically 32, 64 or 128. - int N1c; // Number of color vectors used to model normal background color variation at a given pixel. - int N2c; // Number of color vectors retained at given pixel. Must be > N1c, typically ~ 5/3 of N1c. - // Used to allow the first N1c vectors to adapt over time to changing background. + int Lc; //!< Quantized levels per 'color' component. Power of two, typically 32, 64 or 128. + int N1c; //!< Number of color vectors used to model normal background color variation at a given pixel. + int N2c; //!< Number of color vectors retained at given pixel. Must be > N1c, typically ~ 5/3 of N1c. + //!< Used to allow the first N1c vectors to adapt over time to changing background. - int Lcc; // Quantized levels per 'color co-occurrence' component. Power of two, typically 16, 32 or 64. - int N1cc; // Number of color co-occurrence vectors used to model normal background color variation at a given pixel. - int N2cc; // Number of color co-occurrence vectors retained at given pixel. Must be > N1cc, typically ~ 5/3 of N1cc. - // Used to allow the first N1cc vectors to adapt over time to changing background. + int Lcc; //!< Quantized levels per 'color co-occurrence' component. Power of two, typically 16, 32 or 64. + int N1cc; //!< Number of color co-occurrence vectors used to model normal background color variation at a given pixel. + int N2cc; //!< Number of color co-occurrence vectors retained at given pixel. Must be > N1cc, typically ~ 5/3 of N1cc. + //!< Used to allow the first N1cc vectors to adapt over time to changing background. - bool is_obj_without_holes; // If TRUE we ignore holes within foreground blobs. Defaults to TRUE. - int perform_morphing; // Number of erode-dilate-erode foreground-blob cleanup iterations. - // These erase one-pixel junk blobs and merge almost-touching blobs. Default value is 1. + bool is_obj_without_holes; //!< If TRUE we ignore holes within foreground blobs. Defaults to TRUE. + int perform_morphing; //!< Number of erode-dilate-erode foreground-blob cleanup iterations. + //!< These erase one-pixel junk blobs and merge almost-touching blobs. Default value is 1. - float alpha1; // How quickly we forget old background pixel values seen. Typically set to 0.1. - float alpha2; // "Controls speed of feature learning". Depends on T. Typical value circa 0.005. - float alpha3; // Alternate to alpha2, used (e.g.) for quicker initial convergence. Typical value 0.1. + float alpha1; //!< How quickly we forget old background pixel values seen. Typically set to 0.1. + float alpha2; //!< "Controls speed of feature learning". Depends on T. Typical value circa 0.005. + float alpha3; //!< Alternate to alpha2, used (e.g.) for quicker initial convergence. Typical value 0.1. - float delta; // Affects color and color co-occurrence quantization, typically set to 2. - float T; // A percentage value which determines when new features can be recognized as new background. (Typically 0.9). - float minArea; // Discard foreground blobs whose bounding box is smaller than this threshold. + float delta; //!< Affects color and color co-occurrence quantization, typically set to 2. + float T; //!< A percentage value which determines when new features can be recognized as new background. (Typically 0.9). + float minArea; //!< Discard foreground blobs whose bounding box is smaller than this threshold. - // default Params + //! default Params FGDParams(); }; +/** @brief Creates FGD Background Subtractor + +@param params Algorithm's parameters. See @cite FGD2003 for explanation. + */ CV_EXPORTS Ptr createBackgroundSubtractorFGD(const FGDParams& params = FGDParams()); +//! @} + }} // namespace cv { namespace cuda { #endif /* __OPENCV_CUDABGSEGM_HPP__ */ diff --git a/modules/cudacodec/include/opencv2/cudacodec.hpp b/modules/cudacodec/include/opencv2/cudacodec.hpp index 747c044ee5..610ecf607f 100644 --- a/modules/cudacodec/include/opencv2/cudacodec.hpp +++ b/modules/cudacodec/include/opencv2/cudacodec.hpp @@ -50,8 +50,18 @@ #include "opencv2/core/cuda.hpp" +/** + @addtogroup cuda + @{ + @defgroup cudacodec Video Encoding/Decoding + @} + */ + namespace cv { namespace cudacodec { +//! @addtogroup cudacodec +//! @{ + ////////////////////////////////// Video Encoding ////////////////////////////////// // Works only under Windows. @@ -68,35 +78,53 @@ enum SurfaceFormat SF_GRAY = SF_BGR }; +/** @brief Different parameters for CUDA video encoder. + */ struct CV_EXPORTS EncoderParams { - int P_Interval; // NVVE_P_INTERVAL, - int IDR_Period; // NVVE_IDR_PERIOD, - int DynamicGOP; // NVVE_DYNAMIC_GOP, - int RCType; // NVVE_RC_TYPE, - int AvgBitrate; // NVVE_AVG_BITRATE, - int PeakBitrate; // NVVE_PEAK_BITRATE, - int QP_Level_Intra; // NVVE_QP_LEVEL_INTRA, - int QP_Level_InterP; // NVVE_QP_LEVEL_INTER_P, - int QP_Level_InterB; // NVVE_QP_LEVEL_INTER_B, - int DeblockMode; // NVVE_DEBLOCK_MODE, - int ProfileLevel; // NVVE_PROFILE_LEVEL, - int ForceIntra; // NVVE_FORCE_INTRA, - int ForceIDR; // NVVE_FORCE_IDR, - int ClearStat; // NVVE_CLEAR_STAT, - int DIMode; // NVVE_SET_DEINTERLACE, - int Presets; // NVVE_PRESETS, - int DisableCabac; // NVVE_DISABLE_CABAC, - int NaluFramingType; // NVVE_CONFIGURE_NALU_FRAMING_TYPE - int DisableSPSPPS; // NVVE_DISABLE_SPS_PPS + int P_Interval; //!< NVVE_P_INTERVAL, + int IDR_Period; //!< NVVE_IDR_PERIOD, + int DynamicGOP; //!< NVVE_DYNAMIC_GOP, + int RCType; //!< NVVE_RC_TYPE, + int AvgBitrate; //!< NVVE_AVG_BITRATE, + int PeakBitrate; //!< NVVE_PEAK_BITRATE, + int QP_Level_Intra; //!< NVVE_QP_LEVEL_INTRA, + int QP_Level_InterP; //!< NVVE_QP_LEVEL_INTER_P, + int QP_Level_InterB; //!< NVVE_QP_LEVEL_INTER_B, + int DeblockMode; //!< NVVE_DEBLOCK_MODE, + int ProfileLevel; //!< NVVE_PROFILE_LEVEL, + int ForceIntra; //!< NVVE_FORCE_INTRA, + int ForceIDR; //!< NVVE_FORCE_IDR, + int ClearStat; //!< NVVE_CLEAR_STAT, + int DIMode; //!< NVVE_SET_DEINTERLACE, + int Presets; //!< NVVE_PRESETS, + int DisableCabac; //!< NVVE_DISABLE_CABAC, + int NaluFramingType; //!< NVVE_CONFIGURE_NALU_FRAMING_TYPE + int DisableSPSPPS; //!< NVVE_DISABLE_SPS_PPS EncoderParams(); + /** @brief Constructors. + + @param configFile Config file name. + + Creates default parameters or reads parameters from config file. + */ explicit EncoderParams(const String& configFile); + /** @brief Reads parameters from config file. + + @param configFile Config file name. + */ void load(const String& configFile); + /** @brief Saves parameters to config file. + + @param configFile Config file name. + */ void save(const String& configFile) const; }; +/** @brief Callbacks for CUDA video encoder. + */ class CV_EXPORTS EncoderCallBack { public: @@ -109,41 +137,109 @@ public: virtual ~EncoderCallBack() {} - //! callback function to signal the start of bitstream that is to be encoded - //! callback must allocate host buffer for CUDA encoder and return pointer to it and it's size + /** @brief Callback function to signal the start of bitstream that is to be encoded. + + Callback must allocate buffer for CUDA encoder and return pointer to it and it's size. + */ virtual uchar* acquireBitStream(int* bufferSize) = 0; - //! callback function to signal that the encoded bitstream is ready to be written to file + /** @brief Callback function to signal that the encoded bitstream is ready to be written to file. + */ virtual void releaseBitStream(unsigned char* data, int size) = 0; - //! callback function to signal that the encoding operation on the frame has started + /** @brief Callback function to signal that the encoding operation on the frame has started. + + @param frameNumber + @param picType Specify frame type (I-Frame, P-Frame or B-Frame). + */ virtual void onBeginFrame(int frameNumber, PicType picType) = 0; - //! callback function signals that the encoding operation on the frame has finished + /** @brief Callback function signals that the encoding operation on the frame has finished. + + @param frameNumber + @param picType Specify frame type (I-Frame, P-Frame or B-Frame). + */ virtual void onEndFrame(int frameNumber, PicType picType) = 0; }; +/** @brief Video writer interface. + +The implementation uses H264 video codec. + +@note Currently only Windows platform is supported. + +@note + - An example on how to use the videoWriter class can be found at + opencv_source_code/samples/gpu/video_writer.cpp + */ class CV_EXPORTS VideoWriter { public: virtual ~VideoWriter() {} - //! writes the next frame from GPU memory + /** @brief Writes the next video frame. + + @param frame The written frame. + @param lastFrame Indicates that it is end of stream. The parameter can be ignored. + + The method write the specified image to video file. The image must have the same size and the same + surface format as has been specified when opening the video writer. + */ virtual void write(InputArray frame, bool lastFrame = false) = 0; virtual EncoderParams getEncoderParams() const = 0; }; -//! create VideoWriter for specified output file (only AVI file format is supported) +/** @brief Creates video writer. + +@param fileName Name of the output video file. Only AVI file format is supported. +@param frameSize Size of the input video frames. +@param fps Framerate of the created video stream. +@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 , +SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before +encoding, frames with other formats will be used as is. + +The constructors initialize video writer. FFMPEG is used to write videos. User can implement own +multiplexing with cudacodec::EncoderCallBack . + */ CV_EXPORTS Ptr createVideoWriter(const String& fileName, Size frameSize, double fps, SurfaceFormat format = SF_BGR); +/** @overload +@param fileName Name of the output video file. Only AVI file format is supported. +@param frameSize Size of the input video frames. +@param fps Framerate of the created video stream. +@param params Encoder parameters. See cudacodec::EncoderParams . +@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 , +SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before +encoding, frames with other formats will be used as is. +*/ CV_EXPORTS Ptr createVideoWriter(const String& fileName, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); -//! create VideoWriter for user-defined callbacks +/** @overload +@param encoderCallback Callbacks for video encoder. See cudacodec::EncoderCallBack . Use it if you +want to work with raw video stream. +@param frameSize Size of the input video frames. +@param fps Framerate of the created video stream. +@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 , +SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before +encoding, frames with other formats will be used as is. +*/ CV_EXPORTS Ptr createVideoWriter(const Ptr& encoderCallback, Size frameSize, double fps, SurfaceFormat format = SF_BGR); +/** @overload +@param encoderCallback Callbacks for video encoder. See cudacodec::EncoderCallBack . Use it if you +want to work with raw video stream. +@param frameSize Size of the input video frames. +@param fps Framerate of the created video stream. +@param params Encoder parameters. See cudacodec::EncoderParams . +@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 , +SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before +encoding, frames with other formats will be used as is. +*/ CV_EXPORTS Ptr createVideoWriter(const Ptr& encoderCallback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); ////////////////////////////////// Video Decoding ////////////////////////////////////////// +/** @brief Video codecs supported by cudacodec::VideoReader . + */ enum Codec { MPEG1 = 0, @@ -155,13 +251,15 @@ enum Codec H264_SVC, H264_MVC, - Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), // Y,U,V (4:2:0) - Uncompressed_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), // Y,V,U (4:2:0) - Uncompressed_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), // Y,UV (4:2:0) - Uncompressed_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), // YUYV/YUY2 (4:2:2) - Uncompressed_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')) // UYVY (4:2:2) + Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), //!< Y,U,V (4:2:0) + Uncompressed_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), //!< Y,V,U (4:2:0) + Uncompressed_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), //!< Y,UV (4:2:0) + Uncompressed_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), //!< YUYV/YUY2 (4:2:2) + Uncompressed_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')) //!< UYVY (4:2:2) }; +/** @brief Chroma formats supported by cudacodec::VideoReader . + */ enum ChromaFormat { Monochrome = 0, @@ -170,6 +268,8 @@ enum ChromaFormat YUV444 }; +/** @brief Struct providing information about video file format. : + */ struct FormatInfo { Codec codec; @@ -178,29 +278,65 @@ struct FormatInfo int height; }; +/** @brief Video reader interface. + +@note + - An example on how to use the videoReader class can be found at + opencv_source_code/samples/gpu/video_reader.cpp + */ class CV_EXPORTS VideoReader { public: virtual ~VideoReader() {} + /** @brief Grabs, decodes and returns the next video frame. + + If no frames has been grabbed (there are no more frames in video file), the methods return false . + The method throws Exception if error occurs. + */ virtual bool nextFrame(OutputArray frame) = 0; + /** @brief Returns information about video file format. + */ virtual FormatInfo format() const = 0; }; +/** @brief Interface for video demultiplexing. : + +User can implement own demultiplexing by implementing this interface. + */ class CV_EXPORTS RawVideoSource { public: virtual ~RawVideoSource() {} + /** @brief Returns next packet with RAW video frame. + + @param data Pointer to frame data. + @param size Size in bytes of current frame. + @param endOfFile Indicates that it is end of stream. + */ virtual bool getNextPacket(unsigned char** data, int* size, bool* endOfFile) = 0; + /** @brief Returns information about video file format. + */ virtual FormatInfo format() const = 0; }; +/** @brief Creates video reader. + +@param filename Name of the input video file. + +FFMPEG is used to read videos. User can implement own demultiplexing with cudacodec::RawVideoSource + */ CV_EXPORTS Ptr createVideoReader(const String& filename); +/** @overload +@param source RAW video source implemented by user. +*/ CV_EXPORTS Ptr createVideoReader(const Ptr& source); +//! @} + }} // namespace cv { namespace cudacodec { #endif /* __OPENCV_CUDACODEC_HPP__ */ diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp index a89580e0ee..f61d2dfd00 100644 --- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp +++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp @@ -50,150 +50,175 @@ #include "opencv2/core/cuda.hpp" #include "opencv2/cudafilters.hpp" +/** + @addtogroup cuda + @{ + @defgroup cudafeatures2d Feature Detection and Description + @} + */ + namespace cv { namespace cuda { +//! @addtogroup cudafeatures2d +//! @{ + +/** @brief Brute-force descriptor matcher. + +For each descriptor in the first set, this matcher finds the closest descriptor in the second set +by trying each one. This descriptor matcher supports masking permissible matches between descriptor +sets. + +The class BFMatcher_CUDA has an interface similar to the class DescriptorMatcher. It has two groups +of match methods: for matching descriptors of one image with another image or with an image set. +Also, all functions have an alternative to save results either to the GPU memory or to the CPU +memory. + +@sa DescriptorMatcher, BFMatcher + */ class CV_EXPORTS BFMatcher_CUDA { public: explicit BFMatcher_CUDA(int norm = cv::NORM_L2); - // Add descriptors to train descriptor collection + //! Add descriptors to train descriptor collection void add(const std::vector& descCollection); - // Get train descriptors collection + //! Get train descriptors collection const std::vector& getTrainDescriptors() const; - // Clear train descriptors collection + //! Clear train descriptors collection void clear(); - // Return true if there are not train descriptors in collection + //! Return true if there are not train descriptors in collection bool empty() const; - // Return true if the matcher supports mask in match methods + //! Return true if the matcher supports mask in match methods bool isMaskSupported() const; - // Find one best match for each query descriptor + //! Find one best match for each query descriptor void matchSingle(const GpuMat& query, const GpuMat& train, GpuMat& trainIdx, GpuMat& distance, const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); - // Download trainIdx and distance and convert it to CPU vector with DMatch + //! Download trainIdx and distance and convert it to CPU vector with DMatch static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector& matches); - // Convert trainIdx and distance to vector with DMatch + //! Convert trainIdx and distance to vector with DMatch static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector& matches); - // Find one best match for each query descriptor + //! Find one best match for each query descriptor void match(const GpuMat& query, const GpuMat& train, std::vector& matches, const GpuMat& mask = GpuMat()); - // Make gpu collection of trains and masks in suitable format for matchCollection function + //! Make gpu collection of trains and masks in suitable format for matchCollection function void makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, const std::vector& masks = std::vector()); - // Find one best match from train collection for each query descriptor + //! Find one best match from train collection for each query descriptor void matchCollection(const GpuMat& query, const GpuMat& trainCollection, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, const GpuMat& masks = GpuMat(), Stream& stream = Stream::Null()); - // Download trainIdx, imgIdx and distance and convert it to vector with DMatch + //! Download trainIdx, imgIdx and distance and convert it to vector with DMatch static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector& matches); - // Convert trainIdx, imgIdx and distance to vector with DMatch + //! Convert trainIdx, imgIdx and distance to vector with DMatch static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector& matches); - // Find one best match from train collection for each query descriptor. + //! Find one best match from train collection for each query descriptor. void match(const GpuMat& query, std::vector& matches, const std::vector& masks = std::vector()); - // Find k best matches for each query descriptor (in increasing order of distances) + //! Find k best matches for each query descriptor (in increasing order of distances) void knnMatchSingle(const GpuMat& query, const GpuMat& train, GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k, const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); - // Download trainIdx and distance and convert it to vector with DMatch - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. + //! Download trainIdx and distance and convert it to vector with DMatch + //! compactResult is used when mask is not empty. If compactResult is false matches + //! vector will have the same size as queryDescriptors rows. If compactResult is true + //! matches vector will not contain matches for fully masked out query descriptors. static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector< std::vector >& matches, bool compactResult = false); - // Convert trainIdx and distance to vector with DMatch + //! Convert trainIdx and distance to vector with DMatch static void knnMatchConvert(const Mat& trainIdx, const Mat& distance, std::vector< std::vector >& matches, bool compactResult = false); - // Find k best matches for each query descriptor (in increasing order of distances). - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. + //! Find k best matches for each query descriptor (in increasing order of distances). + //! compactResult is used when mask is not empty. If compactResult is false matches + //! vector will have the same size as queryDescriptors rows. If compactResult is true + //! matches vector will not contain matches for fully masked out query descriptors. void knnMatch(const GpuMat& query, const GpuMat& train, std::vector< std::vector >& matches, int k, const GpuMat& mask = GpuMat(), bool compactResult = false); - // Find k best matches from train collection for each query descriptor (in increasing order of distances) + //! Find k best matches from train collection for each query descriptor (in increasing order of distances) void knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, const GpuMat& maskCollection = GpuMat(), Stream& stream = Stream::Null()); - // Download trainIdx and distance and convert it to vector with DMatch - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. + //! Download trainIdx and distance and convert it to vector with DMatch + //! compactResult is used when mask is not empty. If compactResult is false matches + //! vector will have the same size as queryDescriptors rows. If compactResult is true + //! matches vector will not contain matches for fully masked out query descriptors. + //! @see BFMatcher_CUDA::knnMatchDownload static void knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector< std::vector >& matches, bool compactResult = false); - // Convert trainIdx and distance to vector with DMatch + //! Convert trainIdx and distance to vector with DMatch + //! @see BFMatcher_CUDA::knnMatchConvert static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector< std::vector >& matches, bool compactResult = false); - // Find k best matches for each query descriptor (in increasing order of distances). - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. + //! Find k best matches for each query descriptor (in increasing order of distances). + //! compactResult is used when mask is not empty. If compactResult is false matches + //! vector will have the same size as queryDescriptors rows. If compactResult is true + //! matches vector will not contain matches for fully masked out query descriptors. void knnMatch(const GpuMat& query, std::vector< std::vector >& matches, int k, const std::vector& masks = std::vector(), bool compactResult = false); - // Find best matches for each query descriptor which have distance less than maxDistance. - // nMatches.at(0, queryIdx) will contain matches count for queryIdx. - // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches, - // because it didn't have enough memory. - // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10), - // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches - // Matches doesn't sorted. + //! Find best matches for each query descriptor which have distance less than maxDistance. + //! nMatches.at(0, queryIdx) will contain matches count for queryIdx. + //! carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches, + //! because it didn't have enough memory. + //! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10), + //! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches + //! Matches doesn't sorted. void radiusMatchSingle(const GpuMat& query, const GpuMat& train, GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); - // Download trainIdx, nMatches and distance and convert it to vector with DMatch. - // matches will be sorted in increasing order of distances. - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. + //! Download trainIdx, nMatches and distance and convert it to vector with DMatch. + //! matches will be sorted in increasing order of distances. + //! compactResult is used when mask is not empty. If compactResult is false matches + //! vector will have the same size as queryDescriptors rows. If compactResult is true + //! matches vector will not contain matches for fully masked out query descriptors. static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches, std::vector< std::vector >& matches, bool compactResult = false); - // Convert trainIdx, nMatches and distance to vector with DMatch. + //! Convert trainIdx, nMatches and distance to vector with DMatch. static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector >& matches, bool compactResult = false); - // Find best matches for each query descriptor which have distance less than maxDistance - // in increasing order of distances). + //! Find best matches for each query descriptor which have distance less than maxDistance + //! in increasing order of distances). void radiusMatch(const GpuMat& query, const GpuMat& train, std::vector< std::vector >& matches, float maxDistance, const GpuMat& mask = GpuMat(), bool compactResult = false); - // Find best matches for each query descriptor which have distance less than maxDistance. - // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10), - // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches - // Matches doesn't sorted. + //! Find best matches for each query descriptor which have distance less than maxDistance. + //! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10), + //! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches + //! Matches doesn't sorted. void radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, const std::vector& masks = std::vector(), Stream& stream = Stream::Null()); - // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch. - // matches will be sorted in increasing order of distances. - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. + //! Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch. + //! matches will be sorted in increasing order of distances. + //! compactResult is used when mask is not empty. If compactResult is false matches + //! vector will have the same size as queryDescriptors rows. If compactResult is true + //! matches vector will not contain matches for fully masked out query descriptors. static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches, std::vector< std::vector >& matches, bool compactResult = false); - // Convert trainIdx, nMatches and distance to vector with DMatch. + //! Convert trainIdx, nMatches and distance to vector with DMatch. static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector >& matches, bool compactResult = false); - // Find best matches from train collection for each query descriptor which have distance less than - // maxDistance (in increasing order of distances). + //! Find best matches from train collection for each query descriptor which have distance less than + //! maxDistance (in increasing order of distances). void radiusMatch(const GpuMat& query, std::vector< std::vector >& matches, float maxDistance, const std::vector& masks = std::vector(), bool compactResult = false); @@ -203,6 +228,8 @@ private: std::vector trainDescCollection; }; +/** @brief Class used for corner detection using the FAST algorithm. : + */ class CV_EXPORTS FAST_CUDA { public: @@ -213,23 +240,45 @@ public: ROWS_COUNT }; - // all features have same size + //! all features have same size static const int FEATURE_SIZE = 7; + /** @brief Constructor. + + @param threshold Threshold on difference between intensity of the central pixel and pixels on a + circle around this pixel. + @param nonmaxSuppression If it is true, non-maximum suppression is applied to detected corners + (keypoints). + @param keypointsRatio Inner buffer size for keypoints store is determined as (keypointsRatio \* + image_width \* image_height). + */ explicit FAST_CUDA(int threshold, bool nonmaxSuppression = true, double keypointsRatio = 0.05); - //! finds the keypoints using FAST detector - //! supports only CV_8UC1 images + /** @brief Finds the keypoints using FAST detector. + + @param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are + supported. + @param mask Optional input mask that marks the regions where we should detect features. + @param keypoints The output vector of keypoints. Can be stored both in CPU and GPU memory. For GPU + memory: + - keypoints.ptr\(LOCATION_ROW)[i] will contain location of i'th point + - keypoints.ptr\(RESPONSE_ROW)[i] will contain response of i'th point (if non-maximum + suppression is applied) + */ void operator ()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints); + /** @overload */ void operator ()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints); - //! download keypoints from device to host memory + /** @brief Download keypoints from GPU to CPU memory. + */ static void downloadKeypoints(const GpuMat& d_keypoints, std::vector& keypoints); - //! convert keypoints to KeyPoint vector + /** @brief Converts keypoints from CUDA representation to vector of KeyPoint. + */ static void convertKeypoints(const Mat& h_keypoints, std::vector& keypoints); - //! release temporary buffer's memory + /** @brief Releases inner buffer memory. + */ void release(); bool nonmaxSuppression; @@ -239,13 +288,22 @@ public: //! max keypoints = keypointsRatio * img.size().area() double keypointsRatio; - //! find keypoints and compute it's response if nonmaxSuppression is true - //! return count of detected keypoints + /** @brief Find keypoints and compute it's response if nonmaxSuppression is true. + + @param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are + supported. + @param mask Optional input mask that marks the regions where we should detect features. + + The function returns count of detected keypoints. + */ int calcKeyPointsLocation(const GpuMat& image, const GpuMat& mask); - //! get final array of keypoints - //! performs nonmax suppression if needed - //! return final count of keypoints + /** @brief Gets final array of keypoints. + + @param keypoints The output vector of keypoints. + + The function performs non-max suppression if needed and returns final count of keypoints. + */ int getKeyPoints(GpuMat& keypoints); private: @@ -257,6 +315,8 @@ private: GpuMat d_keypoints_; }; +/** @brief Class for extracting ORB features and descriptors from an image. : + */ class CV_EXPORTS ORB_CUDA { public: @@ -276,28 +336,51 @@ public: DEFAULT_FAST_THRESHOLD = 20 }; - //! Constructor + /** @brief Constructor. + + @param nFeatures The number of desired features. + @param scaleFactor Coefficient by which we divide the dimensions from one scale pyramid level to + the next. + @param nLevels The number of levels in the scale pyramid. + @param edgeThreshold How far from the boundary the points should be. + @param firstLevel The level at which the image is given. If 1, that means we will also look at the + image scaleFactor times bigger. + @param WTA_K + @param scoreType + @param patchSize + */ explicit ORB_CUDA(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31, int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31); - //! Compute the ORB features on an image - //! image - the image to compute the features (supports only CV_8UC1 images) - //! mask - the mask to apply - //! keypoints - the resulting keypoints + /** @overload */ void operator()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints); + /** @overload */ void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints); - //! Compute the ORB features and descriptors on an image - //! image - the image to compute the features (supports only CV_8UC1 images) - //! mask - the mask to apply - //! keypoints - the resulting keypoints - //! descriptors - descriptors array + /** @brief Detects keypoints and computes descriptors for them. + + @param image Input 8-bit grayscale image. + @param mask Optional input mask that marks the regions where we should detect features. + @param keypoints The input/output vector of keypoints. Can be stored both in CPU and GPU memory. + For GPU memory: + - keypoints.ptr\(X_ROW)[i] contains x coordinate of the i'th feature. + - keypoints.ptr\(Y_ROW)[i] contains y coordinate of the i'th feature. + - keypoints.ptr\(RESPONSE_ROW)[i] contains the response of the i'th feature. + - keypoints.ptr\(ANGLE_ROW)[i] contains orientation of the i'th feature. + - keypoints.ptr\(OCTAVE_ROW)[i] contains the octave of the i'th feature. + - keypoints.ptr\(SIZE_ROW)[i] contains the size of the i'th feature. + @param descriptors Computed descriptors. if blurForDescriptor is true, image will be blurred + before descriptors calculation. + */ void operator()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints, GpuMat& descriptors); + /** @overload */ void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors); - //! download keypoints from device to host memory + /** @brief Download keypoints from GPU to CPU memory. + */ static void downloadKeyPoints(const GpuMat& d_keypoints, std::vector& keypoints); - //! convert keypoints to KeyPoint vector + /** @brief Converts keypoints from CUDA representation to vector of KeyPoint. + */ static void convertKeyPoints(const Mat& d_keypoints, std::vector& keypoints); //! returns the descriptor size in bytes @@ -309,7 +392,8 @@ public: fastDetector_.nonmaxSuppression = nonmaxSuppression; } - //! release temporary buffer's memory + /** @brief Releases inner buffer memory. + */ void release(); //! if true, image will be blurred before descriptors calculation @@ -335,10 +419,10 @@ private: int scoreType_; int patchSize_; - // The number of desired features per scale + //! The number of desired features per scale std::vector n_features_per_level_; - // Points to compute BRIEF descriptors from + //! Points to compute BRIEF descriptors from GpuMat pattern_; std::vector imagePyr_; @@ -356,6 +440,8 @@ private: GpuMat d_keypoints_; }; +//! @} + }} // namespace cv { namespace cuda { #endif /* __OPENCV_CUDAFEATURES2D_HPP__ */ diff --git a/modules/cudafilters/include/opencv2/cudafilters.hpp b/modules/cudafilters/include/opencv2/cudafilters.hpp index 2c06575b5d..9e86cc3a71 100644 --- a/modules/cudafilters/include/opencv2/cudafilters.hpp +++ b/modules/cudafilters/include/opencv2/cudafilters.hpp @@ -50,65 +50,189 @@ #include "opencv2/core/cuda.hpp" #include "opencv2/imgproc.hpp" +/** + @addtogroup cuda + @{ + @defgroup cudafilters Image Filtering + +Functions and classes described in this section are used to perform various linear or non-linear +filtering operations on 2D images. + +@note + - An example containing all basic morphology operators like erode and dilate can be found at + opencv_source_code/samples/gpu/morphology.cpp + + @} + */ + namespace cv { namespace cuda { +//! @addtogroup cudafilters +//! @{ + +/** @brief Common interface for all CUDA filters : + */ class CV_EXPORTS Filter : public Algorithm { public: + /** @brief Applies the specified filter to the image. + + @param src Input image. + @param dst Output image. + @param stream Stream for the asynchronous version. + */ virtual void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0; }; //////////////////////////////////////////////////////////////////////////////////////////////////// // Box Filter -//! creates a normalized 2D box filter -//! supports CV_8UC1, CV_8UC4 types +/** @brief Creates a normalized 2D box filter. + +@param srcType Input image type. Only CV_8UC1 and CV_8UC4 are supported for now. +@param dstType Output image type. Only the same type as src is supported for now. +@param ksize Kernel size. +@param anchor Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel +center. +@param borderMode Pixel extrapolation method. For details, see borderInterpolate . +@param borderVal Default border value. + +@sa boxFilter + */ CV_EXPORTS Ptr createBoxFilter(int srcType, int dstType, Size ksize, Point anchor = Point(-1,-1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); //////////////////////////////////////////////////////////////////////////////////////////////////// // Linear Filter -//! Creates a non-separable linear 2D filter -//! supports 1 and 4 channel CV_8U, CV_16U and CV_32F input +/** @brief Creates a non-separable linear 2D filter. + +@param srcType Input image type. Supports CV_8U , CV_16U and CV_32F one and four channel image. +@param dstType Output image type. Only the same type as src is supported for now. +@param kernel 2D array of filter coefficients. +@param anchor Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel +center. +@param borderMode Pixel extrapolation method. For details, see borderInterpolate . +@param borderVal Default border value. + +@sa filter2D + */ CV_EXPORTS Ptr createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor = Point(-1,-1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); //////////////////////////////////////////////////////////////////////////////////////////////////// // Laplacian Filter -//! creates a Laplacian operator -//! supports only ksize = 1 and ksize = 3 +/** @brief Creates a Laplacian operator. + +@param srcType Input image type. Supports CV_8U , CV_16U and CV_32F one and four channel image. +@param dstType Output image type. Only the same type as src is supported for now. +@param ksize Aperture size used to compute the second-derivative filters (see getDerivKernels). It +must be positive and odd. Only ksize = 1 and ksize = 3 are supported. +@param scale Optional scale factor for the computed Laplacian values. By default, no scaling is +applied (see getDerivKernels ). +@param borderMode Pixel extrapolation method. For details, see borderInterpolate . +@param borderVal Default border value. + +@sa Laplacian + */ CV_EXPORTS Ptr createLaplacianFilter(int srcType, int dstType, int ksize = 1, double scale = 1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); //////////////////////////////////////////////////////////////////////////////////////////////////// // Separable Linear Filter -//! creates a separable linear filter +/** @brief Creates a separable linear filter. + +@param srcType Source array type. +@param dstType Destination array type. +@param rowKernel Horizontal filter coefficients. Support kernels with size \<= 32 . +@param columnKernel Vertical filter coefficients. Support kernels with size \<= 32 . +@param anchor Anchor position within the kernel. Negative values mean that anchor is positioned at +the aperture center. +@param rowBorderMode Pixel extrapolation method in the vertical direction For details, see +borderInterpolate. +@param columnBorderMode Pixel extrapolation method in the horizontal direction. + +@sa sepFilter2D + */ CV_EXPORTS Ptr createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel, Point anchor = Point(-1,-1), int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); //////////////////////////////////////////////////////////////////////////////////////////////////// // Deriv Filter -//! creates a generalized Deriv operator +/** @brief Creates a generalized Deriv operator. + +@param srcType Source image type. +@param dstType Destination array type. +@param dx Derivative order in respect of x. +@param dy Derivative order in respect of y. +@param ksize Aperture size. See getDerivKernels for details. +@param normalize Flag indicating whether to normalize (scale down) the filter coefficients or not. +See getDerivKernels for details. +@param scale Optional scale factor for the computed derivative values. By default, no scaling is +applied. For details, see getDerivKernels . +@param rowBorderMode Pixel extrapolation method in the vertical direction. For details, see +borderInterpolate. +@param columnBorderMode Pixel extrapolation method in the horizontal direction. + */ CV_EXPORTS Ptr createDerivFilter(int srcType, int dstType, int dx, int dy, int ksize, bool normalize = false, double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); -//! creates a Sobel operator +/** @brief Creates a Sobel operator. + +@param srcType Source image type. +@param dstType Destination array type. +@param dx Derivative order in respect of x. +@param dy Derivative order in respect of y. +@param ksize Size of the extended Sobel kernel. Possible values are 1, 3, 5 or 7. +@param scale Optional scale factor for the computed derivative values. By default, no scaling is +applied. For details, see getDerivKernels . +@param rowBorderMode Pixel extrapolation method in the vertical direction. For details, see +borderInterpolate. +@param columnBorderMode Pixel extrapolation method in the horizontal direction. + +@sa Sobel + */ CV_EXPORTS Ptr createSobelFilter(int srcType, int dstType, int dx, int dy, int ksize = 3, double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); -//! creates a vertical or horizontal Scharr operator +/** @brief Creates a vertical or horizontal Scharr operator. + +@param srcType Source image type. +@param dstType Destination array type. +@param dx Order of the derivative x. +@param dy Order of the derivative y. +@param scale Optional scale factor for the computed derivative values. By default, no scaling is +applied. See getDerivKernels for details. +@param rowBorderMode Pixel extrapolation method in the vertical direction. For details, see +borderInterpolate. +@param columnBorderMode Pixel extrapolation method in the horizontal direction. + +@sa Scharr + */ CV_EXPORTS Ptr createScharrFilter(int srcType, int dstType, int dx, int dy, double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); //////////////////////////////////////////////////////////////////////////////////////////////////// // Gaussian Filter -//! creates a Gaussian filter +/** @brief Creates a Gaussian filter. + +@param srcType Source image type. +@param dstType Destination array type. +@param ksize Aperture size. See getGaussianKernel for details. +@param sigma1 Gaussian sigma in the horizontal direction. See getGaussianKernel for details. +@param sigma2 Gaussian sigma in the vertical direction. If 0, then +\f$\texttt{sigma2}\leftarrow\texttt{sigma1}\f$ . +@param rowBorderMode Pixel extrapolation method in the vertical direction. For details, see +borderInterpolate. +@param columnBorderMode Pixel extrapolation method in the horizontal direction. + +@sa GaussianBlur + */ CV_EXPORTS Ptr createGaussianFilter(int srcType, int dstType, Size ksize, double sigma1, double sigma2 = 0, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); @@ -116,19 +240,49 @@ CV_EXPORTS Ptr createGaussianFilter(int srcType, int dstType, Size ksize //////////////////////////////////////////////////////////////////////////////////////////////////// // Morphology Filter -//! creates a 2D morphological filter -//! supports CV_8UC1 and CV_8UC4 types +/** @brief Creates a 2D morphological filter. + +@param op Type of morphological operation. The following types are possible: +- **MORPH_ERODE** erode +- **MORPH_DILATE** dilate +- **MORPH_OPEN** opening +- **MORPH_CLOSE** closing +- **MORPH_GRADIENT** morphological gradient +- **MORPH_TOPHAT** "top hat" +- **MORPH_BLACKHAT** "black hat" +@param srcType Input/output image type. Only CV_8UC1 and CV_8UC4 are supported. +@param kernel 2D 8-bit structuring element for the morphological operation. +@param anchor Anchor position within the structuring element. Negative values mean that the anchor +is at the center. +@param iterations Number of times erosion and dilation to be applied. + +@sa morphologyEx + */ CV_EXPORTS Ptr createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor = Point(-1, -1), int iterations = 1); //////////////////////////////////////////////////////////////////////////////////////////////////// // Image Rank Filter -//! result pixel value is the maximum of pixel values under the rectangular mask region +/** @brief Creates the maximum filter. + +@param srcType Input/output image type. Only CV_8UC1 and CV_8UC4 are supported. +@param ksize Kernel size. +@param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center. +@param borderMode Pixel extrapolation method. For details, see borderInterpolate . +@param borderVal Default border value. + */ CV_EXPORTS Ptr createBoxMaxFilter(int srcType, Size ksize, Point anchor = Point(-1, -1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); -//! result pixel value is the maximum of pixel values under the rectangular mask region +/** @brief Creates the minimum filter. + +@param srcType Input/output image type. Only CV_8UC1 and CV_8UC4 are supported. +@param ksize Kernel size. +@param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center. +@param borderMode Pixel extrapolation method. For details, see borderInterpolate . +@param borderVal Default border value. + */ CV_EXPORTS Ptr createBoxMinFilter(int srcType, Size ksize, Point anchor = Point(-1, -1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); @@ -136,14 +290,30 @@ CV_EXPORTS Ptr createBoxMinFilter(int srcType, Size ksize, //////////////////////////////////////////////////////////////////////////////////////////////////// // 1D Sum Filter -//! creates a horizontal 1D box filter -//! supports only CV_8UC1 source type and CV_32FC1 sum type +/** @brief Creates a horizontal 1D box filter. + +@param srcType Input image type. Only CV_8UC1 type is supported for now. +@param dstType Output image type. Only CV_32FC1 type is supported for now. +@param ksize Kernel size. +@param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center. +@param borderMode Pixel extrapolation method. For details, see borderInterpolate . +@param borderVal Default border value. + */ CV_EXPORTS Ptr createRowSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); -//! creates a vertical 1D box filter -//! supports only CV_8UC1 sum type and CV_32FC1 dst type +/** @brief Creates a vertical 1D box filter. + +@param srcType Input image type. Only CV_8UC1 type is supported for now. +@param dstType Output image type. Only CV_32FC1 type is supported for now. +@param ksize Kernel size. +@param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center. +@param borderMode Pixel extrapolation method. For details, see borderInterpolate . +@param borderVal Default border value. + */ CV_EXPORTS Ptr createColumnSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); +//! @} + }} // namespace cv { namespace cuda { #endif /* __OPENCV_CUDAFILTERS_HPP__ */ diff --git a/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp b/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp index d451b93b1e..1ec288fa9c 100644 --- a/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp +++ b/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp @@ -50,16 +50,48 @@ #include "opencv2/core/cuda.hpp" #include "opencv2/imgproc.hpp" +/** + @addtogroup cuda + @{ + @defgroup cudaimgproc Image Processing + @{ + @defgroup cudaimgproc_color Color space processing + @defgroup cudaimgproc_hist Histogram Calculation + @defgroup cudaimgproc_hough Hough Transform + @defgroup cudaimgproc_feature Feature Detection + @} + @} +*/ + namespace cv { namespace cuda { +//! @addtogroup cudaimgproc +//! @{ + /////////////////////////// Color Processing /////////////////////////// -//! converts image from one color space to another +//! @addtogroup cudaimgproc_color +//! @{ + +/** @brief Converts an image from one color space to another. + +@param src Source image with CV_8U , CV_16U , or CV_32F depth and 1, 3, or 4 channels. +@param dst Destination image. +@param code Color space conversion code. For details, see cvtColor . +@param dcn Number of channels in the destination image. If the parameter is 0, the number of the +channels is derived automatically from src and the code . +@param stream Stream for the asynchronous version. + +3-channel color spaces (like HSV, XYZ, and so on) can be stored in a 4-channel image for better +performance. + +@sa cvtColor + */ CV_EXPORTS void cvtColor(InputArray src, OutputArray dst, int code, int dcn = 0, Stream& stream = Stream::Null()); enum { - // Bayer Demosaicing (Malvar, He, and Cutler) + //! Bayer Demosaicing (Malvar, He, and Cutler) COLOR_BayerBG2BGR_MHT = 256, COLOR_BayerGB2BGR_MHT = 257, COLOR_BayerRG2BGR_MHT = 258, @@ -75,105 +107,228 @@ enum COLOR_BayerRG2GRAY_MHT = 262, COLOR_BayerGR2GRAY_MHT = 263 }; + +/** @brief Converts an image from Bayer pattern to RGB or grayscale. + +@param src Source image (8-bit or 16-bit single channel). +@param dst Destination image. +@param code Color space conversion code (see the description below). +@param dcn Number of channels in the destination image. If the parameter is 0, the number of the +channels is derived automatically from src and the code . +@param stream Stream for the asynchronous version. + +The function can do the following transformations: + +- Demosaicing using bilinear interpolation + + > - COLOR_BayerBG2GRAY , COLOR_BayerGB2GRAY , COLOR_BayerRG2GRAY , COLOR_BayerGR2GRAY + > - COLOR_BayerBG2BGR , COLOR_BayerGB2BGR , COLOR_BayerRG2BGR , COLOR_BayerGR2BGR + +- Demosaicing using Malvar-He-Cutler algorithm (@cite MHT2011) + + > - COLOR_BayerBG2GRAY_MHT , COLOR_BayerGB2GRAY_MHT , COLOR_BayerRG2GRAY_MHT , + > COLOR_BayerGR2GRAY_MHT + > - COLOR_BayerBG2BGR_MHT , COLOR_BayerGB2BGR_MHT , COLOR_BayerRG2BGR_MHT , + > COLOR_BayerGR2BGR_MHT + +@sa cvtColor + */ CV_EXPORTS void demosaicing(InputArray src, OutputArray dst, int code, int dcn = -1, Stream& stream = Stream::Null()); -//! swap channels -//! dstOrder - Integer array describing how channel values are permutated. The n-th entry -//! of the array contains the number of the channel that is stored in the n-th channel of -//! the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR -//! channel order. +/** @brief Exchanges the color channels of an image in-place. + +@param image Source image. Supports only CV_8UC4 type. +@param dstOrder Integer array describing how channel values are permutated. The n-th entry of the +array contains the number of the channel that is stored in the n-th channel of the output image. +E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR channel order. +@param stream Stream for the asynchronous version. + +The methods support arbitrary permutations of the original channels, including replication. + */ CV_EXPORTS void swapChannels(InputOutputArray image, const int dstOrder[4], Stream& stream = Stream::Null()); -//! Routines for correcting image color gamma +/** @brief Routines for correcting image color gamma. + +@param src Source image (3- or 4-channel 8 bit). +@param dst Destination image. +@param forward true for forward gamma correction or false for inverse gamma correction. +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void gammaCorrection(InputArray src, OutputArray dst, bool forward = true, Stream& stream = Stream::Null()); enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL, ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL}; -//! Composite two images using alpha opacity values contained in each image -//! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types +/** @brief Composites two images using alpha opacity values contained in each image. + +@param img1 First image. Supports CV_8UC4 , CV_16UC4 , CV_32SC4 and CV_32FC4 types. +@param img2 Second image. Must have the same size and the same type as img1 . +@param dst Destination image. +@param alpha_op Flag specifying the alpha-blending operation: +- **ALPHA_OVER** +- **ALPHA_IN** +- **ALPHA_OUT** +- **ALPHA_ATOP** +- **ALPHA_XOR** +- **ALPHA_PLUS** +- **ALPHA_OVER_PREMUL** +- **ALPHA_IN_PREMUL** +- **ALPHA_OUT_PREMUL** +- **ALPHA_ATOP_PREMUL** +- **ALPHA_XOR_PREMUL** +- **ALPHA_PLUS_PREMUL** +- **ALPHA_PREMUL** +@param stream Stream for the asynchronous version. + +@note + - An example demonstrating the use of alphaComp can be found at + opencv_source_code/samples/gpu/alpha_comp.cpp + */ CV_EXPORTS void alphaComp(InputArray img1, InputArray img2, OutputArray dst, int alpha_op, Stream& stream = Stream::Null()); +//! @} cudaimgproc_color + ////////////////////////////// Histogram /////////////////////////////// -//! Calculates histogram for 8u one channel image -//! Output hist will have one row, 256 cols and CV32SC1 type. +//! @addtogroup cudaimgproc_hist +//! @{ + +/** @brief Calculates histogram for one channel 8-bit image. + +@param src Source image with CV_8UC1 type. +@param hist Destination histogram with one row, 256 columns, and the CV_32SC1 type. +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void calcHist(InputArray src, OutputArray hist, Stream& stream = Stream::Null()); -//! normalizes the grayscale image brightness and contrast by normalizing its histogram +/** @brief Equalizes the histogram of a grayscale image. + +@param src Source image with CV_8UC1 type. +@param dst Destination image. +@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes). +@param stream Stream for the asynchronous version. + +@sa equalizeHist + */ CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, InputOutputArray buf, Stream& stream = Stream::Null()); +/** @overload */ static inline void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) { GpuMat buf; cuda::equalizeHist(src, dst, buf, stream); } +/** @brief Base class for Contrast Limited Adaptive Histogram Equalization. : + */ class CV_EXPORTS CLAHE : public cv::CLAHE { public: using cv::CLAHE::apply; + /** @brief Equalizes the histogram of a grayscale image using Contrast Limited Adaptive Histogram Equalization. + + @param src Source image with CV_8UC1 type. + @param dst Destination image. + @param stream Stream for the asynchronous version. + */ virtual void apply(InputArray src, OutputArray dst, Stream& stream) = 0; }; + +/** @brief Creates implementation for cuda::CLAHE . + +@param clipLimit Threshold for contrast limiting. +@param tileGridSize Size of grid for histogram equalization. Input image will be divided into +equally sized rectangular tiles. tileGridSize defines the number of tiles in row and column. + */ CV_EXPORTS Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); -//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type. +/** @brief Computes levels with even distribution. + +@param levels Destination array. levels has 1 row, nLevels columns, and the CV_32SC1 type. +@param nLevels Number of computed levels. nLevels must be at least 2. +@param lowerLevel Lower boundary value of the lowest level. +@param upperLevel Upper boundary value of the greatest level. + */ CV_EXPORTS void evenLevels(OutputArray levels, int nLevels, int lowerLevel, int upperLevel); -//! Calculates histogram with evenly distributed bins for signle channel source. -//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types. -//! Output hist will have one row and histSize cols and CV_32SC1 type. +/** @brief Calculates a histogram with evenly distributed bins. + +@param src Source image. CV_8U, CV_16U, or CV_16S depth and 1 or 4 channels are supported. For +a four-channel image, all channels are processed separately. +@param hist Destination histogram with one row, histSize columns, and the CV_32S type. +@param histSize Size of the histogram. +@param lowerLevel Lower boundary of lowest-level bin. +@param upperLevel Upper boundary of highest-level bin. +@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes). +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void histEven(InputArray src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()); +/** @overload */ static inline void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()) { GpuMat buf; cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream); } -//! Calculates histogram with evenly distributed bins for four-channel source. -//! All channels of source are processed separately. -//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types. -//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type. +/** @overload */ CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null()); +/** @overload */ static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null()) { GpuMat buf; cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream); } -//! Calculates histogram with bins determined by levels array. -//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise. -//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types. -//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type. +/** @brief Calculates a histogram with bins determined by the levels array. + +@param src Source image. CV_8U , CV_16U , or CV_16S depth and 1 or 4 channels are supported. +For a four-channel image, all channels are processed separately. +@param hist Destination histogram with one row, (levels.cols-1) columns, and the CV_32SC1 type. +@param levels Number of levels in the histogram. +@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes). +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, InputOutputArray buf, Stream& stream = Stream::Null()); +/** @overload */ static inline void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null()) { GpuMat buf; cuda::histRange(src, hist, levels, buf, stream); } -//! Calculates histogram with bins determined by levels array. -//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise. -//! All channels of source are processed separately. -//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types. -//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type. +/** @overload */ CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream = Stream::Null()); +/** @overload */ static inline void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null()) { GpuMat buf; cuda::histRange(src, hist, levels, buf, stream); } +//! @} cudaimgproc_hist + //////////////////////////////// Canny //////////////////////////////// +/** @brief Base class for Canny Edge Detector. : + */ class CV_EXPORTS CannyEdgeDetector : public Algorithm { public: + /** @brief Finds edges in an image using the @cite Canny86 algorithm. + + @param image Single-channel 8-bit input image. + @param edges Output edge map. It has the same size and type as image . + */ virtual void detect(InputArray image, OutputArray edges) = 0; + /** @overload + @param dx First derivative of image in the vertical direction. Support only CV_32S type. + @param dy First derivative of image in the horizontal direction. Support only CV_32S type. + @param edges Output edge map. It has the same size and type as image . + */ virtual void detect(InputArray dx, InputArray dy, OutputArray edges) = 0; virtual void setLowThreshold(double low_thresh) = 0; @@ -189,6 +344,16 @@ public: virtual bool getL2Gradient() const = 0; }; +/** @brief Creates implementation for cuda::CannyEdgeDetector . + +@param low_thresh First threshold for the hysteresis procedure. +@param high_thresh Second threshold for the hysteresis procedure. +@param apperture_size Aperture size for the Sobel operator. +@param L2gradient Flag indicating whether a more accurate \f$L_2\f$ norm +\f$=\sqrt{(dI/dx)^2 + (dI/dy)^2}\f$ should be used to compute the image gradient magnitude ( +L2gradient=true ), or a faster default \f$L_1\f$ norm \f$=|dI/dx|+|dI/dy|\f$ is enough ( L2gradient=false +). + */ CV_EXPORTS Ptr createCannyEdgeDetector(double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); /////////////////////////// Hough Transform //////////////////////////// @@ -196,10 +361,32 @@ CV_EXPORTS Ptr createCannyEdgeDetector(double low_thresh, dou ////////////////////////////////////// // HoughLines +//! @addtogroup cudaimgproc_hough +//! @{ + +/** @brief Base class for lines detector algorithm. : + */ class CV_EXPORTS HoughLinesDetector : public Algorithm { public: + /** @brief Finds lines in a binary image using the classical Hough transform. + + @param src 8-bit, single-channel binary source image. + @param lines Output vector of lines. Each line is represented by a two-element vector + \f$(\rho, \theta)\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of + the image). \f$\theta\f$ is the line rotation angle in radians ( + \f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ). + + @sa HoughLines + */ virtual void detect(InputArray src, OutputArray lines) = 0; + + /** @brief Downloads results from cuda::HoughLinesDetector::detect to host memory. + + @param d_lines Result of cuda::HoughLinesDetector::detect . + @param h_lines Output host array. + @param h_votes Optional output array for line's votes. + */ virtual void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray()) = 0; virtual void setRho(float rho) = 0; @@ -218,16 +405,35 @@ public: virtual int getMaxLines() const = 0; }; +/** @brief Creates implementation for cuda::HoughLinesDetector . + +@param rho Distance resolution of the accumulator in pixels. +@param theta Angle resolution of the accumulator in radians. +@param threshold Accumulator threshold parameter. Only those lines are returned that get enough +votes ( \f$>\texttt{threshold}\f$ ). +@param doSort Performs lines sort by votes. +@param maxLines Maximum number of output lines. + */ CV_EXPORTS Ptr createHoughLinesDetector(float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096); ////////////////////////////////////// // HoughLinesP -//! finds line segments in the black-n-white image using probabilistic Hough transform +/** @brief Base class for line segments detector algorithm. : + */ class CV_EXPORTS HoughSegmentDetector : public Algorithm { public: + /** @brief Finds line segments in a binary image using the probabilistic Hough transform. + + @param src 8-bit, single-channel binary source image. + @param lines Output vector of lines. Each line is represented by a 4-element vector + \f$(x_1, y_1, x_2, y_2)\f$ , where \f$(x_1,y_1)\f$ and \f$(x_2, y_2)\f$ are the ending points of each detected + line segment. + + @sa HoughLinesP + */ virtual void detect(InputArray src, OutputArray lines) = 0; virtual void setRho(float rho) = 0; @@ -246,14 +452,32 @@ public: virtual int getMaxLines() const = 0; }; +/** @brief Creates implementation for cuda::HoughSegmentDetector . + +@param rho Distance resolution of the accumulator in pixels. +@param theta Angle resolution of the accumulator in radians. +@param minLineLength Minimum line length. Line segments shorter than that are rejected. +@param maxLineGap Maximum allowed gap between points on the same line to link them. +@param maxLines Maximum number of output lines. + */ CV_EXPORTS Ptr createHoughSegmentDetector(float rho, float theta, int minLineLength, int maxLineGap, int maxLines = 4096); ////////////////////////////////////// // HoughCircles +/** @brief Base class for circles detector algorithm. : + */ class CV_EXPORTS HoughCirclesDetector : public Algorithm { public: + /** @brief Finds circles in a grayscale image using the Hough transform. + + @param src 8-bit, single-channel grayscale input image. + @param circles Output vector of found circles. Each vector is encoded as a 3-element + floating-point vector \f$(x, y, radius)\f$ . + + @sa HoughCircles + */ virtual void detect(InputArray src, OutputArray circles) = 0; virtual void setDp(float dp) = 0; @@ -278,85 +502,257 @@ public: virtual int getMaxCircles() const = 0; }; +/** @brief Creates implementation for cuda::HoughCirclesDetector . + +@param dp Inverse ratio of the accumulator resolution to the image resolution. For example, if +dp=1 , the accumulator has the same resolution as the input image. If dp=2 , the accumulator has +half as big width and height. +@param minDist Minimum distance between the centers of the detected circles. If the parameter is +too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is +too large, some circles may be missed. +@param cannyThreshold The higher threshold of the two passed to Canny edge detector (the lower one +is twice smaller). +@param votesThreshold The accumulator threshold for the circle centers at the detection stage. The +smaller it is, the more false circles may be detected. +@param minRadius Minimum circle radius. +@param maxRadius Maximum circle radius. +@param maxCircles Maximum number of output circles. + */ CV_EXPORTS Ptr createHoughCirclesDetector(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096); ////////////////////////////////////// // GeneralizedHough -//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122. -//! Detects position only without traslation and rotation +/** @brief Creates implementation for generalized hough transform from @cite Ballard1981 . + */ CV_EXPORTS Ptr createGeneralizedHoughBallard(); -//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038. -//! Detects position, traslation and rotation +/** @brief Creates implementation for generalized hough transform from @cite Guil1999 . + */ CV_EXPORTS Ptr createGeneralizedHoughGuil(); +//! @} cudaimgproc_hough + ////////////////////////// Corners Detection /////////////////////////// +//! @addtogroup cudaimgproc_feature +//! @{ + +/** @brief Base class for Cornerness Criteria computation. : + */ class CV_EXPORTS CornernessCriteria : public Algorithm { public: + /** @brief Computes the cornerness criteria at each image pixel. + + @param src Source image. + @param dst Destination image containing cornerness values. It will have the same size as src and + CV_32FC1 type. + @param stream Stream for the asynchronous version. + */ virtual void compute(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0; }; -//! computes Harris cornerness criteria at each image pixel +/** @brief Creates implementation for Harris cornerness criteria. + +@param srcType Input source type. Only CV_8UC1 and CV_32FC1 are supported for now. +@param blockSize Neighborhood size. +@param ksize Aperture parameter for the Sobel operator. +@param k Harris detector free parameter. +@param borderType Pixel extrapolation method. Only BORDER_REFLECT101 and BORDER_REPLICATE are +supported for now. + +@sa cornerHarris + */ CV_EXPORTS Ptr createHarrisCorner(int srcType, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101); -//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria +/** @brief Creates implementation for the minimum eigen value of a 2x2 derivative covariation matrix (the +cornerness criteria). + +@param srcType Input source type. Only CV_8UC1 and CV_32FC1 are supported for now. +@param blockSize Neighborhood size. +@param ksize Aperture parameter for the Sobel operator. +@param borderType Pixel extrapolation method. Only BORDER_REFLECT101 and BORDER_REPLICATE are +supported for now. + +@sa cornerMinEigenVal + */ CV_EXPORTS Ptr createMinEigenValCorner(int srcType, int blockSize, int ksize, int borderType = BORDER_REFLECT101); ////////////////////////// Corners Detection /////////////////////////// +/** @brief Base class for Corners Detector. : + */ class CV_EXPORTS CornersDetector : public Algorithm { public: - //! return 1 rows matrix with CV_32FC2 type + /** @brief Determines strong corners on an image. + + @param image Input 8-bit or floating-point 32-bit, single-channel image. + @param corners Output vector of detected corners (1-row matrix with CV_32FC2 type with corners + positions). + @param mask Optional region of interest. If the image is not empty (it needs to have the type + CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected. + */ virtual void detect(InputArray image, OutputArray corners, InputArray mask = noArray()) = 0; }; +/** @brief Creates implementation for cuda::CornersDetector . + +@param srcType Input source type. Only CV_8UC1 and CV_32FC1 are supported for now. +@param maxCorners Maximum number of corners to return. If there are more corners than are found, +the strongest of them is returned. +@param qualityLevel Parameter characterizing the minimal accepted quality of image corners. The +parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue +(see cornerMinEigenVal ) or the Harris function response (see cornerHarris ). The corners with the +quality measure less than the product are rejected. For example, if the best corner has the +quality measure = 1500, and the qualityLevel=0.01 , then all the corners with the quality measure +less than 15 are rejected. +@param minDistance Minimum possible Euclidean distance between the returned corners. +@param blockSize Size of an average block for computing a derivative covariation matrix over each +pixel neighborhood. See cornerEigenValsAndVecs . +@param useHarrisDetector Parameter indicating whether to use a Harris detector (see cornerHarris) +or cornerMinEigenVal. +@param harrisK Free parameter of the Harris detector. + */ CV_EXPORTS Ptr createGoodFeaturesToTrackDetector(int srcType, int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04); +//! @} cudaimgproc_feature + ///////////////////////////// Mean Shift ////////////////////////////// -//! Does mean shift filtering on GPU. +/** @brief Performs mean-shift filtering for each point of the source image. + +@param src Source image. Only CV_8UC4 images are supported for now. +@param dst Destination image containing the color of mapped points. It has the same size and type +as src . +@param sp Spatial window radius. +@param sr Color window radius. +@param criteria Termination criteria. See TermCriteria. +@param stream + +It maps each point of the source image into another point. As a result, you have a new color and new +position of each point. + */ CV_EXPORTS void meanShiftFiltering(InputArray src, OutputArray dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream = Stream::Null()); -//! Does mean shift procedure on GPU. +/** @brief Performs a mean-shift procedure and stores information about processed points (their colors and +positions) in two images. + +@param src Source image. Only CV_8UC4 images are supported for now. +@param dstr Destination image containing the color of mapped points. The size and type is the same +as src . +@param dstsp Destination image containing the position of mapped points. The size is the same as +src size. The type is CV_16SC2 . +@param sp Spatial window radius. +@param sr Color window radius. +@param criteria Termination criteria. See TermCriteria. +@param stream + +@sa cuda::meanShiftFiltering + */ CV_EXPORTS void meanShiftProc(InputArray src, OutputArray dstr, OutputArray dstsp, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream = Stream::Null()); -//! Does mean shift segmentation with elimination of small regions. +/** @brief Performs a mean-shift segmentation of the source image and eliminates small segments. + +@param src Source image. Only CV_8UC4 images are supported for now. +@param dst Segmented image with the same size and type as src (host memory). +@param sp Spatial window radius. +@param sr Color window radius. +@param minsize Minimum segment size. Smaller segments are merged. +@param criteria Termination criteria. See TermCriteria. + */ CV_EXPORTS void meanShiftSegmentation(InputArray src, OutputArray dst, int sp, int sr, int minsize, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); /////////////////////////// Match Template //////////////////////////// -//! computes the proximity map for the raster template and the image where the template is searched for +/** @brief Base class for Template Matching. : + */ class CV_EXPORTS TemplateMatching : public Algorithm { public: + /** @brief Computes a proximity map for a raster template and an image where the template is searched for. + + @param image Source image. + @param templ Template image with the size and type the same as image . + @param result Map containing comparison results ( CV_32FC1 ). If image is *W x H* and templ is *w + x h*, then result must be *W-w+1 x H-h+1*. + @param stream Stream for the asynchronous version. + */ virtual void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null()) = 0; }; +/** @brief Creates implementation for cuda::TemplateMatching . + +@param srcType Input source type. CV_32F and CV_8U depth images (1..4 channels) are supported +for now. +@param method Specifies the way to compare the template with the image. +@param user_block_size You can use field user_block_size to set specific block size. If you +leave its default value Size(0,0) then automatic estimation of block size will be used (which is +optimized for speed). By varying user_block_size you can reduce memory requirements at the cost +of speed. + +The following methods are supported for the CV_8U depth images for now: + +- CV_TM_SQDIFF +- CV_TM_SQDIFF_NORMED +- CV_TM_CCORR +- CV_TM_CCORR_NORMED +- CV_TM_CCOEFF +- CV_TM_CCOEFF_NORMED + +The following methods are supported for the CV_32F images for now: + +- CV_TM_SQDIFF +- CV_TM_CCORR + +@sa matchTemplate + */ CV_EXPORTS Ptr createTemplateMatching(int srcType, int method, Size user_block_size = Size()); ////////////////////////// Bilateral Filter /////////////////////////// -//! Performa bilateral filtering of passsed image +/** @brief Performs bilateral filtering of passed image + +@param src Source image. Supports only (channles != 2 && depth() != CV_8S && depth() != CV_32S +&& depth() != CV_64F). +@param dst Destination imagwe. +@param kernel_size Kernel window size. +@param sigma_color Filter sigma in the color space. +@param sigma_spatial Filter sigma in the coordinate space. +@param borderMode Border type. See borderInterpolate for details. BORDER_REFLECT101 , +BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now. +@param stream Stream for the asynchronous version. + +@sa bilateralFilter + */ CV_EXPORTS void bilateralFilter(InputArray src, OutputArray dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode = BORDER_DEFAULT, Stream& stream = Stream::Null()); ///////////////////////////// Blending //////////////////////////////// -//! performs linear blending of two images -//! to avoid accuracy errors sum of weigths shouldn't be very close to zero +/** @brief Performs linear blending of two images. + +@param img1 First image. Supports only CV_8U and CV_32F depth. +@param img2 Second image. Must have the same size and the same type as img1 . +@param weights1 Weights for first image. Must have tha same size as img1 . Supports only CV_32F +type. +@param weights2 Weights for second image. Must have tha same size as img2 . Supports only CV_32F +type. +@param result Destination image. +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void blendLinear(InputArray img1, InputArray img2, InputArray weights1, InputArray weights2, OutputArray result, Stream& stream = Stream::Null()); +//! @} + }} // namespace cv { namespace cuda { #endif /* __OPENCV_CUDAIMGPROC_HPP__ */ diff --git a/modules/cudalegacy/include/opencv2/cudalegacy.hpp b/modules/cudalegacy/include/opencv2/cudalegacy.hpp index 5ae75cd316..a72ef09c75 100644 --- a/modules/cudalegacy/include/opencv2/cudalegacy.hpp +++ b/modules/cudalegacy/include/opencv2/cudalegacy.hpp @@ -49,4 +49,11 @@ #include "opencv2/cudalegacy/NCVHaarObjectDetection.hpp" #include "opencv2/cudalegacy/NCVBroxOpticalFlow.hpp" +/** + @addtogroup cuda + @{ + @defgroup cudalegacy Legacy support + @} +*/ + #endif /* __OPENCV_CUDALEGACY_HPP__ */ diff --git a/modules/cudalegacy/include/opencv2/cudalegacy/NCV.hpp b/modules/cudalegacy/include/opencv2/cudalegacy/NCV.hpp index cb84c23ad5..9b9a6fe178 100644 --- a/modules/cudalegacy/include/opencv2/cudalegacy/NCV.hpp +++ b/modules/cudalegacy/include/opencv2/cudalegacy/NCV.hpp @@ -60,6 +60,8 @@ // //============================================================================== +//! @addtogroup cudalegacy +//! @{ /** * Compile-time assert namespace @@ -203,6 +205,7 @@ struct NcvPoint2D32u __host__ __device__ NcvPoint2D32u(Ncv32u x_, Ncv32u y_) : x(x_), y(y_) {} }; +//! @cond IGNORED NCV_CT_ASSERT(sizeof(NcvBool) <= 4); NCV_CT_ASSERT(sizeof(Ncv64s) == 8); @@ -221,6 +224,7 @@ NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u)); NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u)); NCV_CT_ASSERT(sizeof(NcvPoint2D32u) == 2 * sizeof(Ncv32u)); +//! @endcond //============================================================================== // @@ -1023,6 +1027,6 @@ CV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Nc NCVMatrixAlloc name(alloc, width, height); \ ncvAssertReturn(name.isMemAllocated(), err); - +//! @} #endif // _ncv_hpp_ diff --git a/modules/cudalegacy/include/opencv2/cudalegacy/NCVBroxOpticalFlow.hpp b/modules/cudalegacy/include/opencv2/cudalegacy/NCVBroxOpticalFlow.hpp index 777000cf7b..c14532b480 100644 --- a/modules/cudalegacy/include/opencv2/cudalegacy/NCVBroxOpticalFlow.hpp +++ b/modules/cudalegacy/include/opencv2/cudalegacy/NCVBroxOpticalFlow.hpp @@ -62,6 +62,9 @@ #include "opencv2/cudalegacy/NCV.hpp" +//! @addtogroup cudalegacy +//! @{ + /// \brief Model and solver parameters struct NCVBroxOpticalFlowDescriptor { @@ -89,6 +92,7 @@ struct NCVBroxOpticalFlowDescriptor /// \param [in] frame1 frame to track /// \param [out] u flow horizontal component (along \b x axis) /// \param [out] v flow vertical component (along \b y axis) +/// \param stream /// \return computation status ///////////////////////////////////////////////////////////////////////////////////////// @@ -101,4 +105,6 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, NCVMatrix &v, cudaStream_t stream); +//! @} + #endif diff --git a/modules/cudalegacy/include/opencv2/cudalegacy/NCVHaarObjectDetection.hpp b/modules/cudalegacy/include/opencv2/cudalegacy/NCVHaarObjectDetection.hpp index 6c69cbd5a1..6b84e8b255 100644 --- a/modules/cudalegacy/include/opencv2/cudalegacy/NCVHaarObjectDetection.hpp +++ b/modules/cudalegacy/include/opencv2/cudalegacy/NCVHaarObjectDetection.hpp @@ -61,6 +61,8 @@ #include "opencv2/cudalegacy/NCV.hpp" +//! @addtogroup cudalegacy +//! @{ //============================================================================== // @@ -456,6 +458,6 @@ CV_EXPORTS NCVStatus ncvHaarStoreNVBIN_host(const cv::String &filename, NCVVector &h_HaarNodes, NCVVector &h_HaarFeatures); - +//! @} #endif // _ncvhaarobjectdetection_hpp_ diff --git a/modules/cudalegacy/include/opencv2/cudalegacy/NCVPyramid.hpp b/modules/cudalegacy/include/opencv2/cudalegacy/NCVPyramid.hpp index 7ec22a367f..9f4501a5aa 100644 --- a/modules/cudalegacy/include/opencv2/cudalegacy/NCVPyramid.hpp +++ b/modules/cudalegacy/include/opencv2/cudalegacy/NCVPyramid.hpp @@ -48,6 +48,8 @@ #include "opencv2/cudalegacy/NCV.hpp" #include "opencv2/core/cuda/common.hpp" +//! @cond IGNORED + namespace cv { namespace cuda { namespace device { namespace pyramid @@ -106,4 +108,6 @@ private: #endif //_WIN32 +//! @endcond + #endif //_ncvpyramid_hpp_ diff --git a/modules/cudalegacy/include/opencv2/cudalegacy/NPP_staging.hpp b/modules/cudalegacy/include/opencv2/cudalegacy/NPP_staging.hpp index 979ceef41f..6cc50d7a47 100644 --- a/modules/cudalegacy/include/opencv2/cudalegacy/NPP_staging.hpp +++ b/modules/cudalegacy/include/opencv2/cudalegacy/NPP_staging.hpp @@ -45,19 +45,14 @@ #include "opencv2/cudalegacy/NCV.hpp" - -/** -* \file NPP_staging.hpp -* NPP Staging Library -*/ - +//! @addtogroup cudalegacy +//! @{ /** \defgroup core_npp NPPST Core * Basic functions for CUDA streams management. * @{ */ - /** * Gets an active CUDA stream used by NPPST * NOT THREAD SAFE @@ -168,6 +163,7 @@ NCVStatus nppiStInterpolateFrames(const NppStInterpolationState *pState); * \param nSrcStep [IN] Source image line step * \param pDst [OUT] Destination image pointer (CUDA device memory) * \param dstSize [OUT] Destination image size + * \param nDstStep * \param oROI [IN] Region of interest in the source image * \param borderType [IN] Type of border * \param pKernel [IN] Pointer to row kernel values (CUDA device memory) @@ -201,6 +197,7 @@ NCVStatus nppiStFilterRowBorder_32f_C1R(const Ncv32f *pSrc, * \param nSrcStep [IN] Source image line step * \param pDst [OUT] Destination image pointer (CUDA device memory) * \param dstSize [OUT] Destination image size + * \param nDstStep [IN] * \param oROI [IN] Region of interest in the source image * \param borderType [IN] Type of border * \param pKernel [IN] Pointer to column kernel values (CUDA device memory) @@ -228,7 +225,7 @@ NCVStatus nppiStFilterColumnBorder_32f_C1R(const Ncv32f *pSrc, /** Size of buffer required for vector image warping. * * \param srcSize [IN] Source image size - * \param nStep [IN] Source image line step + * \param nSrcStep [IN] Source image line step * \param hpSize [OUT] Where to store computed size (host memory) * * \return NCV status code @@ -285,6 +282,7 @@ NCVStatus nppiStVectorWarp_PSF1x1_32f_C1(const Ncv32f *pSrc, * \param pU [IN] Pointer to horizontal displacement field (CUDA device memory) * \param pV [IN] Pointer to vertical displacement field (CUDA device memory) * \param nVFStep [IN] Displacement field line step + * \param pBuffer * \param timeScale [IN] Value by which displacement field will be scaled for warping * \param pDst [OUT] Destination image pointer (CUDA device memory) * @@ -903,5 +901,6 @@ NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen, /*@}*/ +//! @} #endif // _npp_staging_hpp_ diff --git a/modules/cudalegacy/include/opencv2/cudalegacy/private.hpp b/modules/cudalegacy/include/opencv2/cudalegacy/private.hpp index 41c23836d9..721748099c 100644 --- a/modules/cudalegacy/include/opencv2/cudalegacy/private.hpp +++ b/modules/cudalegacy/include/opencv2/cudalegacy/private.hpp @@ -56,6 +56,8 @@ #include "opencv2/cudalegacy.hpp" +//! @cond IGNORED + namespace cv { namespace cuda { class NppStStreamHandler @@ -89,4 +91,6 @@ namespace cv { namespace cuda #define ncvSafeCall(expr) cv::cuda::checkNcvError(expr, __FILE__, __LINE__, CV_Func) +//! @endcond + #endif // __OPENCV_CORE_CUDALEGACY_PRIVATE_HPP__ diff --git a/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp b/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp index d07a834ef3..f65b1447b2 100644 --- a/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp +++ b/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp @@ -49,8 +49,21 @@ #include "opencv2/core/cuda.hpp" +/** + @addtogroup cuda + @{ + @defgroup cudaoptflow Optical Flow + @} + */ + namespace cv { namespace cuda { +//! @addtogroup cudaoptflow +//! @{ + +/** @brief Class computing the optical flow for two images using Brox et al Optical Flow algorithm +(@cite Brox2004). : + */ class CV_EXPORTS BroxOpticalFlow { public: @@ -88,16 +101,58 @@ public: GpuMat buf; }; +/** @brief Class used for calculating an optical flow. + +The class can calculate an optical flow for a sparse feature set or dense optical flow using the +iterative Lucas-Kanade method with pyramids. + +@sa calcOpticalFlowPyrLK + +@note + - An example of the Lucas Kanade optical flow algorithm can be found at + opencv_source_code/samples/gpu/pyrlk_optical_flow.cpp + */ class CV_EXPORTS PyrLKOpticalFlow { public: PyrLKOpticalFlow(); + /** @brief Calculate an optical flow for a sparse feature set. + + @param prevImg First 8-bit input image (supports both grayscale and color images). + @param nextImg Second input image of the same size and the same type as prevImg . + @param prevPts Vector of 2D points for which the flow needs to be found. It must be one row matrix + with CV_32FC2 type. + @param nextPts Output vector of 2D points (with single-precision floating-point coordinates) + containing the calculated new positions of input features in the second image. When useInitialFlow + is true, the vector must have the same size as in the input. + @param status Output status vector (CV_8UC1 type). Each element of the vector is set to 1 if the + flow for the corresponding features has been found. Otherwise, it is set to 0. + @param err Output vector (CV_32FC1 type) that contains the difference between patches around the + original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not + needed. + + @sa calcOpticalFlowPyrLK + */ void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err = 0); + /** @brief Calculate dense optical flow. + + @param prevImg First 8-bit grayscale input image. + @param nextImg Second input image of the same size and the same type as prevImg . + @param u Horizontal component of the optical flow of the same size as input images, 32-bit + floating-point, single-channel + @param v Vertical component of the optical flow of the same size as input images, 32-bit + floating-point, single-channel + @param err Output vector (CV_32FC1 type) that contains the difference between patches around the + original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not + needed. + */ void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0); + /** @brief Releases inner buffers memory. + */ void releaseMemory(); Size winSize; @@ -115,6 +170,8 @@ private: GpuMat vPyr_[2]; }; +/** @brief Class computing a dense optical flow using the Gunnar Farneback’s algorithm. : + */ class CV_EXPORTS FarnebackOpticalFlow { public: @@ -139,8 +196,20 @@ public: double polySigma; int flags; + /** @brief Computes a dense optical flow using the Gunnar Farneback’s algorithm. + + @param frame0 First 8-bit gray-scale input image + @param frame1 Second 8-bit gray-scale input image + @param flowx Flow horizontal component + @param flowy Flow vertical component + @param s Stream + + @sa calcOpticalFlowFarneback + */ void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null()); + /** @brief Releases unused auxiliary memory buffers. + */ void releaseMemory() { frames_[0].release(); @@ -295,20 +364,22 @@ private: GpuMat extended_I1; }; -//! Interpolate frames (images) using provided optical flow (displacement field). -//! frame0 - frame 0 (32-bit floating point images, single channel) -//! frame1 - frame 1 (the same type and size) -//! fu - forward horizontal displacement -//! fv - forward vertical displacement -//! bu - backward horizontal displacement -//! bv - backward vertical displacement -//! pos - new frame position -//! newFrame - new frame -//! buf - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 GpuMat; -//! occlusion masks 0, occlusion masks 1, -//! interpolated forward flow 0, interpolated forward flow 1, -//! interpolated backward flow 0, interpolated backward flow 1 -//! +/** @brief Interpolates frames (images) using provided optical flow (displacement field). + +@param frame0 First frame (32-bit floating point images, single channel). +@param frame1 Second frame. Must have the same type and size as frame0 . +@param fu Forward horizontal displacement. +@param fv Forward vertical displacement. +@param bu Backward horizontal displacement. +@param bv Backward vertical displacement. +@param pos New frame position. +@param newFrame Output image. +@param buf Temporary buffer, will have width x 6\*height size, CV_32FC1 type and contain 6 +GpuMat: occlusion masks for first frame, occlusion masks for second, interpolated forward +horizontal flow, interpolated forward vertical flow, interpolated backward horizontal flow, +interpolated backward vertical flow. +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv, @@ -317,6 +388,8 @@ CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors); +//! @} + }} // namespace cv { namespace cuda { #endif /* __OPENCV_CUDAOPTFLOW_HPP__ */ diff --git a/modules/cudastereo/include/opencv2/cudastereo.hpp b/modules/cudastereo/include/opencv2/cudastereo.hpp index a58156c433..af265bb44f 100644 --- a/modules/cudastereo/include/opencv2/cudastereo.hpp +++ b/modules/cudastereo/include/opencv2/cudastereo.hpp @@ -50,11 +50,25 @@ #include "opencv2/core/cuda.hpp" #include "opencv2/calib3d.hpp" +/** + @addtogroup cuda + @{ + @defgroup cudastereo Stereo Correspondence + @} + */ + namespace cv { namespace cuda { +//! @addtogroup cudastereo +//! @{ + ///////////////////////////////////////// // StereoBM +/** @brief Class computing stereo correspondence (disparity map) using the block matching algorithm. : + +@sa StereoBM + */ class CV_EXPORTS StereoBM : public cv::StereoBM { public: @@ -63,20 +77,70 @@ public: virtual void compute(InputArray left, InputArray right, OutputArray disparity, Stream& stream) = 0; }; +/** @brief Creates StereoBM object. + +@param numDisparities the disparity search range. For each pixel algorithm will find the best +disparity from 0 (default minimum disparity) to numDisparities. The search range can then be +shifted by changing the minimum disparity. +@param blockSize the linear size of the blocks compared by the algorithm. The size should be odd +(as the block is centered at the current pixel). Larger block size implies smoother, though less +accurate disparity map. Smaller block size gives more detailed disparity map, but there is higher +chance for algorithm to find a wrong correspondence. + */ CV_EXPORTS Ptr createStereoBM(int numDisparities = 64, int blockSize = 19); ///////////////////////////////////////// // StereoBeliefPropagation -//! "Efficient Belief Propagation for Early Vision" P.Felzenszwalb +/** @brief Class computing stereo correspondence using the belief propagation algorithm. : + +The class implements algorithm described in @cite Felzenszwalb2006 . It can compute own data cost +(using a truncated linear model) or use a user-provided data cost. + +@note + StereoBeliefPropagation requires a lot of memory for message storage: + + \f[width \_ step \cdot height \cdot ndisp \cdot 4 \cdot (1 + 0.25)\f] + + and for data cost storage: + + \f[width\_step \cdot height \cdot ndisp \cdot (1 + 0.25 + 0.0625 + \dotsm + \frac{1}{4^{levels}})\f] + + width_step is the number of bytes in a line including padding. + +StereoBeliefPropagation uses a truncated linear model for the data cost and discontinuity terms: + +\f[DataCost = data \_ weight \cdot \min ( \lvert Img_Left(x,y)-Img_Right(x-d,y) \rvert , max \_ data \_ term)\f] + +\f[DiscTerm = \min (disc \_ single \_ jump \cdot \lvert f_1-f_2 \rvert , max \_ disc \_ term)\f] + +For more details, see @cite Felzenszwalb2006 . + +By default, StereoBeliefPropagation uses floating-point arithmetics and the CV_32FC1 type for +messages. But it can also use fixed-point arithmetics and the CV_16SC1 message type for better +performance. To avoid an overflow in this case, the parameters must satisfy the following +requirement: + +\f[10 \cdot 2^{levels-1} \cdot max \_ data \_ term < SHRT \_ MAX\f] + +@sa StereoMatcher + */ class CV_EXPORTS StereoBeliefPropagation : public cv::StereoMatcher { public: using cv::StereoMatcher::compute; + /** @overload */ virtual void compute(InputArray left, InputArray right, OutputArray disparity, Stream& stream) = 0; - //! version for user specified data term + /** @brief Enables the stereo correspondence operator that finds the disparity for the specified data cost. + + @param data User-specified data cost, a matrix of msg_type type and + Size(\\*ndisp, \) size. + @param disparity Output disparity map. If disparity is empty, the output type is CV_16SC1 . + Otherwise, the type is retained. + @param stream Stream for the asynchronous version. + */ virtual void compute(InputArray data, OutputArray disparity, Stream& stream = Stream::Null()) = 0; //! number of BP iterations on each level @@ -107,18 +171,48 @@ public: virtual int getMsgType() const = 0; virtual void setMsgType(int msg_type) = 0; + /** @brief Uses a heuristic method to compute the recommended parameters ( ndisp, iters and levels ) for the + specified image size ( width and height ). + */ static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels); }; +/** @brief Creates StereoBeliefPropagation object. + +@param ndisp Number of disparities. +@param iters Number of BP iterations on each level. +@param levels Number of levels. +@param msg_type Type for messages. CV_16SC1 and CV_32FC1 types are supported. + */ CV_EXPORTS Ptr createStereoBeliefPropagation(int ndisp = 64, int iters = 5, int levels = 5, int msg_type = CV_32F); ///////////////////////////////////////// // StereoConstantSpaceBP -//! "A Constant-Space Belief Propagation Algorithm for Stereo Matching" -//! Qingxiong Yang, Liang Wang, Narendra Ahuja -//! http://vision.ai.uiuc.edu/~qyang6/ +/** @brief Class computing stereo correspondence using the constant space belief propagation algorithm. : + +The class implements algorithm described in @cite Yang2010 . StereoConstantSpaceBP supports both local +minimum and global minimum data cost initialization algorithms. For more details, see the paper +mentioned above. By default, a local algorithm is used. To enable a global algorithm, set +use_local_init_data_cost to false . + +StereoConstantSpaceBP uses a truncated linear model for the data cost and discontinuity terms: + +\f[DataCost = data \_ weight \cdot \min ( \lvert I_2-I_1 \rvert , max \_ data \_ term)\f] + +\f[DiscTerm = \min (disc \_ single \_ jump \cdot \lvert f_1-f_2 \rvert , max \_ disc \_ term)\f] + +For more details, see @cite Yang2010 . + +By default, StereoConstantSpaceBP uses floating-point arithmetics and the CV_32FC1 type for +messages. But it can also use fixed-point arithmetics and the CV_16SC1 message type for better +performance. To avoid an overflow in this case, the parameters must satisfy the following +requirement: + +\f[10 \cdot 2^{levels-1} \cdot max \_ data \_ term < SHRT \_ MAX\f] + + */ class CV_EXPORTS StereoConstantSpaceBP : public cuda::StereoBeliefPropagation { public: @@ -129,23 +223,40 @@ public: virtual bool getUseLocalInitDataCost() const = 0; virtual void setUseLocalInitDataCost(bool use_local_init_data_cost) = 0; + /** @brief Uses a heuristic method to compute parameters (ndisp, iters, levelsand nrplane) for the specified + image size (widthand height). + */ static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane); }; +/** @brief Creates StereoConstantSpaceBP object. + +@param ndisp Number of disparities. +@param iters Number of BP iterations on each level. +@param levels Number of levels. +@param nr_plane Number of disparity levels on the first level. +@param msg_type Type for messages. CV_16SC1 and CV_32FC1 types are supported. + */ CV_EXPORTS Ptr createStereoConstantSpaceBP(int ndisp = 128, int iters = 8, int levels = 4, int nr_plane = 4, int msg_type = CV_32F); ///////////////////////////////////////// // DisparityBilateralFilter -//! Disparity map refinement using joint bilateral filtering given a single color image. -//! Qingxiong Yang, Liang Wang, Narendra Ahuja -//! http://vision.ai.uiuc.edu/~qyang6/ +/** @brief Class refining a disparity map using joint bilateral filtering. : + +The class implements @cite Yang2010 algorithm. + */ class CV_EXPORTS DisparityBilateralFilter : public cv::Algorithm { public: - //! the disparity map refinement operator. Refine disparity map using joint bilateral filtering given a single color image. - //! disparity must have CV_8U or CV_16S type, image must have CV_8UC1 or CV_8UC3 type. + /** @brief Refines a disparity map using joint bilateral filtering. + + @param disparity Input disparity map. CV_8UC1 and CV_16SC1 types are supported. + @param image Input image. CV_8UC1 and CV_8UC3 types are supported. + @param dst Destination disparity map. It has the same size and type as disparity . + @param stream Stream for the asynchronous version. + */ virtual void apply(InputArray disparity, InputArray image, OutputArray dst, Stream& stream = Stream::Null()) = 0; virtual int getNumDisparities() const = 0; @@ -170,24 +281,48 @@ public: virtual void setSigmaRange(double sigma_range) = 0; }; +/** @brief Creates DisparityBilateralFilter object. + +@param ndisp Number of disparities. +@param radius Filter radius. +@param iters Number of iterations. + */ CV_EXPORTS Ptr createDisparityBilateralFilter(int ndisp = 64, int radius = 3, int iters = 1); ///////////////////////////////////////// // Utility -//! Reprojects disparity image to 3D space. -//! Supports CV_8U and CV_16S types of input disparity. -//! The output is a 3- or 4-channel floating-point matrix. -//! Each element of this matrix will contain the 3D coordinates of the point (x,y,z,1), computed from the disparity map. -//! Q is the 4x4 perspective transformation matrix that can be obtained with cvStereoRectify. +/** @brief Reprojects a disparity image to 3D space. + +@param disp Input disparity image. CV_8U and CV_16S types are supported. +@param xyzw Output 3- or 4-channel floating-point image of the same size as disp . Each element of +xyzw(x,y) contains 3D coordinates (x,y,z) or (x,y,z,1) of the point (x,y) , computed from the +disparity map. +@param Q \f$4 \times 4\f$ perspective transformation matrix that can be obtained via stereoRectify . +@param dst_cn The number of channels for output image. Can be 3 or 4. +@param stream Stream for the asynchronous version. + +@sa reprojectImageTo3D + */ CV_EXPORTS void reprojectImageTo3D(InputArray disp, OutputArray xyzw, InputArray Q, int dst_cn = 4, Stream& stream = Stream::Null()); -//! Does coloring of disparity image: [0..ndisp) -> [0..240, 1, 1] in HSV. -//! Supported types of input disparity: CV_8U, CV_16S. -//! Output disparity has CV_8UC4 type in BGRA format (alpha = 255). +/** @brief Colors a disparity image. + +@param src_disp Source disparity image. CV_8UC1 and CV_16SC1 types are supported. +@param dst_disp Output disparity image. It has the same size as src_disp . The type is CV_8UC4 +in BGRA format (alpha = 255). +@param ndisp Number of disparities. +@param stream Stream for the asynchronous version. + +This function draws a colored disparity map by converting disparity values from [0..ndisp) interval +first to HSV color space (where different disparity values correspond to different hues) and then +converting the pixels to RGB for visualization. + */ CV_EXPORTS void drawColorDisp(InputArray src_disp, OutputArray dst_disp, int ndisp, Stream& stream = Stream::Null()); +//! @} + }} // namespace cv { namespace cuda { #endif /* __OPENCV_CUDASTEREO_HPP__ */ diff --git a/modules/cudawarping/include/opencv2/cudawarping.hpp b/modules/cudawarping/include/opencv2/cudawarping.hpp index d759f55598..ca877d50c9 100644 --- a/modules/cudawarping/include/opencv2/cudawarping.hpp +++ b/modules/cudawarping/include/opencv2/cudawarping.hpp @@ -50,54 +50,178 @@ #include "opencv2/core/cuda.hpp" #include "opencv2/imgproc.hpp" +/** + @addtogroup cuda + @{ + @defgroup cudawarping Image Warping + @} + */ + namespace cv { namespace cuda { -//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]] -//! supports only CV_32FC1 map type +//! @addtogroup cudawarping +//! @{ + +/** @brief Applies a generic geometrical transformation to an image. + +@param src Source image. +@param dst Destination image with the size the same as xmap and the type the same as src . +@param xmap X values. Only CV_32FC1 type is supported. +@param ymap Y values. Only CV_32FC1 type is supported. +@param interpolation Interpolation method (see resize ). INTER_NEAREST , INTER_LINEAR and +INTER_CUBIC are supported for now. +@param borderMode Pixel extrapolation method (see borderInterpolate ). BORDER_REFLECT101 , +BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now. +@param borderValue Value used in case of a constant border. By default, it is 0. +@param stream Stream for the asynchronous version. + +The function transforms the source image using the specified map: + +\f[\texttt{dst} (x,y) = \texttt{src} (xmap(x,y), ymap(x,y))\f] + +Values of pixels with non-integer coordinates are computed using the bilinear interpolation. + +@sa remap + */ CV_EXPORTS void remap(InputArray src, OutputArray dst, InputArray xmap, InputArray ymap, int interpolation, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null()); -//! resizes the image -//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA +/** @brief Resizes an image. + +@param src Source image. +@param dst Destination image with the same type as src . The size is dsize (when it is non-zero) +or the size is computed from src.size() , fx , and fy . +@param dsize Destination image size. If it is zero, it is computed as: +\f[\texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))}\f] +Either dsize or both fx and fy must be non-zero. +@param fx Scale factor along the horizontal axis. If it is zero, it is computed as: +\f[\texttt{(double)dsize.width/src.cols}\f] +@param fy Scale factor along the vertical axis. If it is zero, it is computed as: +\f[\texttt{(double)dsize.height/src.rows}\f] +@param interpolation Interpolation method. INTER_NEAREST , INTER_LINEAR and INTER_CUBIC are +supported for now. +@param stream Stream for the asynchronous version. + +@sa resize + */ CV_EXPORTS void resize(InputArray src, OutputArray dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null()); -//! warps the image using affine transformation -//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC +/** @brief Applies an affine transformation to an image. + +@param src Source image. CV_8U , CV_16U , CV_32S , or CV_32F depth and 1, 3, or 4 channels are +supported. +@param dst Destination image with the same type as src . The size is dsize . +@param M *2x3* transformation matrix. +@param dsize Size of the destination image. +@param flags Combination of interpolation methods (see resize) and the optional flag +WARP_INVERSE_MAP specifying that M is an inverse transformation ( dst=\>src ). Only +INTER_NEAREST , INTER_LINEAR , and INTER_CUBIC interpolation methods are supported. +@param borderMode +@param borderValue +@param stream Stream for the asynchronous version. + +@sa warpAffine + */ CV_EXPORTS void warpAffine(InputArray src, OutputArray dst, InputArray M, Size dsize, int flags = INTER_LINEAR, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null()); +/** @brief Builds transformation maps for affine transformation. + +@param M *2x3* transformation matrix. +@param inverse Flag specifying that M is an inverse transformation ( dst=\>src ). +@param dsize Size of the destination image. +@param xmap X values with CV_32FC1 type. +@param ymap Y values with CV_32FC1 type. +@param stream Stream for the asynchronous version. + +@sa cuda::warpAffine , cuda::remap + */ CV_EXPORTS void buildWarpAffineMaps(InputArray M, bool inverse, Size dsize, OutputArray xmap, OutputArray ymap, Stream& stream = Stream::Null()); -//! warps the image using perspective transformation -//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC +/** @brief Applies a perspective transformation to an image. + +@param src Source image. CV_8U , CV_16U , CV_32S , or CV_32F depth and 1, 3, or 4 channels are +supported. +@param dst Destination image with the same type as src . The size is dsize . +@param M *3x3* transformation matrix. +@param dsize Size of the destination image. +@param flags Combination of interpolation methods (see resize ) and the optional flag +WARP_INVERSE_MAP specifying that M is the inverse transformation ( dst =\> src ). Only +INTER_NEAREST , INTER_LINEAR , and INTER_CUBIC interpolation methods are supported. +@param borderMode +@param borderValue +@param stream Stream for the asynchronous version. + +@sa warpPerspective + */ CV_EXPORTS void warpPerspective(InputArray src, OutputArray dst, InputArray M, Size dsize, int flags = INTER_LINEAR, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null()); +/** @brief Builds transformation maps for perspective transformation. + +@param M *3x3* transformation matrix. +@param inverse Flag specifying that M is an inverse transformation ( dst=\>src ). +@param dsize Size of the destination image. +@param xmap X values with CV_32FC1 type. +@param ymap Y values with CV_32FC1 type. +@param stream Stream for the asynchronous version. + +@sa cuda::warpPerspective , cuda::remap + */ CV_EXPORTS void buildWarpPerspectiveMaps(InputArray M, bool inverse, Size dsize, OutputArray xmap, OutputArray ymap, Stream& stream = Stream::Null()); -//! builds plane warping maps +/** @brief Builds plane warping maps. + */ CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, InputArray T, float scale, OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null()); -//! builds cylindrical warping maps +/** @brief Builds cylindrical warping maps. + */ CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, float scale, OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null()); -//! builds spherical warping maps +/** @brief Builds spherical warping maps. + */ CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, float scale, OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null()); -//! rotates an image around the origin (0,0) and then shifts it -//! supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC -//! supports 1, 3 or 4 channels images with CV_8U, CV_16U or CV_32F depth +/** @brief Rotates an image around the origin (0,0) and then shifts it. + +@param src Source image. Supports 1, 3 or 4 channels images with CV_8U , CV_16U or CV_32F +depth. +@param dst Destination image with the same type as src . The size is dsize . +@param dsize Size of the destination image. +@param angle Angle of rotation in degrees. +@param xShift Shift along the horizontal axis. +@param yShift Shift along the vertical axis. +@param interpolation Interpolation method. Only INTER_NEAREST , INTER_LINEAR , and INTER_CUBIC +are supported. +@param stream Stream for the asynchronous version. + +@sa cuda::warpAffine + */ CV_EXPORTS void rotate(InputArray src, OutputArray dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null()); -//! smoothes the source image and downsamples it +/** @brief Smoothes an image and downsamples it. + +@param src Source image. +@param dst Destination image. Will have Size((src.cols+1)/2, (src.rows+1)/2) size and the same +type as src . +@param stream Stream for the asynchronous version. + +@sa pyrDown + */ CV_EXPORTS void pyrDown(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); -//! upsamples the source image and then smoothes it +/** @brief Upsamples an image and then smoothes it. + +@param src Source image. +@param dst Destination image. Will have Size(src.cols\*2, src.rows\*2) size and the same type as +src . +@param stream Stream for the asynchronous version. + */ CV_EXPORTS void pyrUp(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); class CV_EXPORTS ImagePyramid : public Algorithm @@ -108,6 +232,8 @@ public: CV_EXPORTS Ptr createImagePyramid(InputArray img, int nLayers = -1, Stream& stream = Stream::Null()); +//! @} + }} // namespace cv { namespace cuda { #endif /* __OPENCV_CUDAWARPING_HPP__ */ diff --git a/modules/cudev/include/opencv2/cudev.hpp b/modules/cudev/include/opencv2/cudev.hpp index a5fb4f6967..565efa1c6a 100644 --- a/modules/cudev/include/opencv2/cudev.hpp +++ b/modules/cudev/include/opencv2/cudev.hpp @@ -109,4 +109,11 @@ #include "cudev/expr/unary_op.hpp" #include "cudev/expr/warping.hpp" +/** + @addtogroup cuda + @{ + @defgroup cudev Device layer + @} +*/ + #endif diff --git a/modules/cudev/include/opencv2/cudev/block/block.hpp b/modules/cudev/include/opencv2/cudev/block/block.hpp index 385e1713e5..e8d59bb20b 100644 --- a/modules/cudev/include/opencv2/cudev/block/block.hpp +++ b/modules/cudev/include/opencv2/cudev/block/block.hpp @@ -50,6 +50,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + struct Block { __device__ __forceinline__ static uint blockId() @@ -122,6 +125,9 @@ __device__ __forceinline__ static void blockTransfrom(InIt1 beg1, InIt1 end1, In for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE) *o = op(*t1, *t2); } + +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/block/dynamic_smem.hpp b/modules/cudev/include/opencv2/cudev/block/dynamic_smem.hpp index 9f9ba60009..e52f829bf6 100644 --- a/modules/cudev/include/opencv2/cudev/block/dynamic_smem.hpp +++ b/modules/cudev/include/opencv2/cudev/block/dynamic_smem.hpp @@ -50,6 +50,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template struct DynamicSharedMem { __device__ __forceinline__ operator T*() @@ -81,6 +84,8 @@ template <> struct DynamicSharedMem } }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/block/reduce.hpp b/modules/cudev/include/opencv2/cudev/block/reduce.hpp index 4c9022631b..74c8fcac77 100644 --- a/modules/cudev/include/opencv2/cudev/block/reduce.hpp +++ b/modules/cudev/include/opencv2/cudev/block/reduce.hpp @@ -54,6 +54,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // blockReduce template @@ -123,6 +126,8 @@ __device__ __forceinline__ void blockReduceKeyVal(const tuple(skeys, key, svals, val, tid, cmp); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/block/scan.hpp b/modules/cudev/include/opencv2/cudev/block/scan.hpp index c54dfef9f3..3369cff987 100644 --- a/modules/cudev/include/opencv2/cudev/block/scan.hpp +++ b/modules/cudev/include/opencv2/cudev/block/scan.hpp @@ -51,6 +51,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template __device__ T blockScanInclusive(T data, volatile T* smem, uint tid) { @@ -96,6 +99,8 @@ __device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint t return blockScanInclusive(data, smem, tid) - data; } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/block/vec_distance.hpp b/modules/cudev/include/opencv2/cudev/block/vec_distance.hpp index c48e9146ef..767d32a466 100644 --- a/modules/cudev/include/opencv2/cudev/block/vec_distance.hpp +++ b/modules/cudev/include/opencv2/cudev/block/vec_distance.hpp @@ -53,6 +53,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // NormL1 template struct NormL1 @@ -179,6 +182,8 @@ struct NormHamming } }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/common.hpp b/modules/cudev/include/opencv2/cudev/common.hpp index c8a7b7da25..f475e20b68 100644 --- a/modules/cudev/include/opencv2/cudev/common.hpp +++ b/modules/cudev/include/opencv2/cudev/common.hpp @@ -52,6 +52,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + using namespace cv::cuda; // CV_CUDEV_ARCH @@ -84,6 +87,8 @@ __host__ __device__ __forceinline__ int divUp(int total, int grain) #define CV_PI_F ((float)CV_PI) #define CV_LOG2_F ((float)CV_LOG2) +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/expr/binary_func.hpp b/modules/cudev/include/opencv2/cudev/expr/binary_func.hpp index f35ea2dc3c..2777a1e185 100644 --- a/modules/cudev/include/opencv2/cudev/expr/binary_func.hpp +++ b/modules/cudev/include/opencv2/cudev/expr/binary_func.hpp @@ -55,6 +55,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + #define CV_CUDEV_EXPR_BINARY_FUNC(name) \ template \ __host__ Expr::ptr_type, typename PtrTraits::ptr_type, name ## _func::value_type, typename PtrTraits::value_type>::type> > > \ @@ -70,6 +73,8 @@ CV_CUDEV_EXPR_BINARY_FUNC(absdiff) #undef CV_CUDEV_EXPR_BINARY_FUNC +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/expr/binary_op.hpp b/modules/cudev/include/opencv2/cudev/expr/binary_op.hpp index f7e9655720..7533946fcc 100644 --- a/modules/cudev/include/opencv2/cudev/expr/binary_op.hpp +++ b/modules/cudev/include/opencv2/cudev/expr/binary_op.hpp @@ -58,6 +58,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // Binary Operations #define CV_CUDEV_EXPR_BINOP_INST(op, functor) \ @@ -230,6 +233,8 @@ CV_CUDEV_EXPR_BINOP_INST(>>, bit_rshift) #undef CV_CUDEV_EXPR_BINOP_INST +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/expr/color.hpp b/modules/cudev/include/opencv2/cudev/expr/color.hpp index 13f07c15a4..f53de78b3c 100644 --- a/modules/cudev/include/opencv2/cudev/expr/color.hpp +++ b/modules/cudev/include/opencv2/cudev/expr/color.hpp @@ -54,6 +54,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + #define CV_CUDEV_EXPR_CVTCOLOR_INST(name) \ template \ __host__ Expr::ptr_type, name ## _func::value_type>::elem_type> > > \ @@ -277,6 +280,8 @@ CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_LBGRA) #undef CV_CUDEV_EXPR_CVTCOLOR_INST +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/expr/deriv.hpp b/modules/cudev/include/opencv2/cudev/expr/deriv.hpp index 822a86b9a3..da51cc711f 100644 --- a/modules/cudev/include/opencv2/cudev/expr/deriv.hpp +++ b/modules/cudev/include/opencv2/cudev/expr/deriv.hpp @@ -53,6 +53,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // derivX template @@ -116,6 +119,8 @@ laplacian_(const SrcPtr& src) return makeExpr(laplacianPtr(src)); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/expr/expr.hpp b/modules/cudev/include/opencv2/cudev/expr/expr.hpp index 46c780b4a2..cdc8612173 100644 --- a/modules/cudev/include/opencv2/cudev/expr/expr.hpp +++ b/modules/cudev/include/opencv2/cudev/expr/expr.hpp @@ -51,6 +51,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template struct Expr { Body body; @@ -87,6 +90,8 @@ template struct PtrTraits< Expr > } }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/expr/per_element_func.hpp b/modules/cudev/include/opencv2/cudev/expr/per_element_func.hpp index 56a067de9d..d7ecd3bb06 100644 --- a/modules/cudev/include/opencv2/cudev/expr/per_element_func.hpp +++ b/modules/cudev/include/opencv2/cudev/expr/per_element_func.hpp @@ -56,6 +56,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // min/max template @@ -127,6 +130,8 @@ lut_(const SrcPtr& src, const TablePtr& tbl) return makeExpr(lutPtr(src, tbl)); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/expr/reduction.hpp b/modules/cudev/include/opencv2/cudev/expr/reduction.hpp index 1f0a3ff0e5..598fb4f86c 100644 --- a/modules/cudev/include/opencv2/cudev/expr/reduction.hpp +++ b/modules/cudev/include/opencv2/cudev/expr/reduction.hpp @@ -56,6 +56,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // sum template struct SumExprBody @@ -254,6 +257,8 @@ integral_(const SrcPtr& src) return makeExpr(body); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/expr/unary_func.hpp b/modules/cudev/include/opencv2/cudev/expr/unary_func.hpp index a30f6a6f3c..b19cec8272 100644 --- a/modules/cudev/include/opencv2/cudev/expr/unary_func.hpp +++ b/modules/cudev/include/opencv2/cudev/expr/unary_func.hpp @@ -54,6 +54,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + #define CV_CUDEV_EXPR_UNARY_FUNC(name) \ template \ __host__ Expr::ptr_type, name ## _func::value_type> > > \ @@ -93,6 +96,8 @@ pow_(const SrcPtr& src, float power) return makeExpr(transformPtr(src, bind2nd(pow_func::value_type>(), power))); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/expr/unary_op.hpp b/modules/cudev/include/opencv2/cudev/expr/unary_op.hpp index 905013e42f..c5fabe4ac9 100644 --- a/modules/cudev/include/opencv2/cudev/expr/unary_op.hpp +++ b/modules/cudev/include/opencv2/cudev/expr/unary_op.hpp @@ -57,6 +57,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + #define CV_CUDEV_EXPR_UNOP_INST(op, functor) \ template \ __host__ Expr >::ptr_type, functor > > \ @@ -89,6 +92,8 @@ CV_CUDEV_EXPR_UNOP_INST(~, bit_not) #undef CV_CUDEV_EXPR_UNOP_INST +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/expr/warping.hpp b/modules/cudev/include/opencv2/cudev/expr/warping.hpp index f942a3fb6f..e1f78b9689 100644 --- a/modules/cudev/include/opencv2/cudev/expr/warping.hpp +++ b/modules/cudev/include/opencv2/cudev/expr/warping.hpp @@ -57,6 +57,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // resize template @@ -166,6 +169,8 @@ transpose_(const SrcPtr& src) return makeExpr(body); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/functional/color_cvt.hpp b/modules/cudev/include/opencv2/cudev/functional/color_cvt.hpp index 8be854780a..5134d04ed9 100644 --- a/modules/cudev/include/opencv2/cudev/functional/color_cvt.hpp +++ b/modules/cudev/include/opencv2/cudev/functional/color_cvt.hpp @@ -51,6 +51,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // Various 3/4-channel to 3/4-channel RGB transformations #define CV_CUDEV_RGB2RGB_INST(name, scn, dcn, bidx) \ @@ -469,6 +472,8 @@ CV_CUDEV_RGB5x52GRAY_INST(BGR565_to_GRAY, 6) #undef CV_CUDEV_RGB5x52GRAY_INST +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/functional/functional.hpp b/modules/cudev/include/opencv2/cudev/functional/functional.hpp index 7934f78b94..125b66f07a 100644 --- a/modules/cudev/include/opencv2/cudev/functional/functional.hpp +++ b/modules/cudev/include/opencv2/cudev/functional/functional.hpp @@ -54,6 +54,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // Function Objects template struct unary_function @@ -873,6 +876,8 @@ template struct IsBinaryFunction enum { value = (sizeof(check(makeF())) == sizeof(Yes)) }; }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/functional/tuple_adapter.hpp b/modules/cudev/include/opencv2/cudev/functional/tuple_adapter.hpp index d3a40db0ea..ff075dc2b3 100644 --- a/modules/cudev/include/opencv2/cudev/functional/tuple_adapter.hpp +++ b/modules/cudev/include/opencv2/cudev/functional/tuple_adapter.hpp @@ -51,6 +51,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template struct UnaryTupleAdapter { typedef typename Op::result_type result_type; @@ -93,6 +96,8 @@ __host__ __device__ BinaryTupleAdapter binaryTupleAdapter(const Op& return a; } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/grid/copy.hpp b/modules/cudev/include/opencv2/cudev/grid/copy.hpp index d7d3ea8343..1d30f99763 100644 --- a/modules/cudev/include/opencv2/cudev/grid/copy.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/copy.hpp @@ -57,6 +57,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template __host__ void gridCopy_(const SrcPtr& src, GpuMat_& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) { @@ -447,6 +450,8 @@ __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz, Glob gridCopy_(src, dst, stream); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/grid/histogram.hpp b/modules/cudev/include/opencv2/cudev/grid/histogram.hpp index ecb1a19c85..154f73771b 100644 --- a/modules/cudev/include/opencv2/cudev/grid/histogram.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/histogram.hpp @@ -54,6 +54,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template __host__ void gridHistogram_(const SrcPtr& src, GpuMat_& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) { @@ -114,6 +117,8 @@ __host__ void gridHistogram(const SrcPtr& src, GpuMat_& dst, Stream& st gridHistogram_(src, dst, stream); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/grid/integral.hpp b/modules/cudev/include/opencv2/cudev/grid/integral.hpp index d948c1267f..6312f44772 100644 --- a/modules/cudev/include/opencv2/cudev/grid/integral.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/integral.hpp @@ -53,6 +53,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template __host__ void gridIntegral(const SrcPtr& src, GpuMat_& dst, Stream& stream = Stream::Null()) { @@ -64,6 +67,8 @@ __host__ void gridIntegral(const SrcPtr& src, GpuMat_& dst, Stream& str integral_detail::integral(shrinkPtr(src), shrinkPtr(dst), rows, cols, StreamAccessor::getStream(stream)); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/grid/pyramids.hpp b/modules/cudev/include/opencv2/cudev/grid/pyramids.hpp index 99833bd3f7..22eafe69fb 100644 --- a/modules/cudev/include/opencv2/cudev/grid/pyramids.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/pyramids.hpp @@ -55,6 +55,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template __host__ void gridPyrDown_(const SrcPtr& src, GpuMat_& dst, Stream& stream = Stream::Null()) { @@ -83,6 +86,8 @@ __host__ void gridPyrUp(const SrcPtr& src, GpuMat_& dst, Stream& stream pyramids_detail::pyrUp(shrinkPtr(src), shrinkPtr(dst), rows, cols, dst.rows, dst.cols, StreamAccessor::getStream(stream)); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/grid/reduce.hpp b/modules/cudev/include/opencv2/cudev/grid/reduce.hpp index 3861ae2281..4551bc886b 100644 --- a/modules/cudev/include/opencv2/cudev/grid/reduce.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/reduce.hpp @@ -57,6 +57,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template __host__ void gridCalcSum_(const SrcPtr& src, GpuMat_& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) { @@ -370,6 +373,8 @@ __host__ void gridCountNonZero(const SrcPtr& src, GpuMat_& dst, Stream& gridCountNonZero_(src, dst, stream); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/grid/reduce_to_vec.hpp b/modules/cudev/include/opencv2/cudev/grid/reduce_to_vec.hpp index 361d40d1c8..595ee8be6f 100644 --- a/modules/cudev/include/opencv2/cudev/grid/reduce_to_vec.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/reduce_to_vec.hpp @@ -59,6 +59,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template struct Sum : plus { typedef T work_type; @@ -225,6 +228,8 @@ __host__ void gridReduceToColumn(const SrcPtr& src, GpuMat_& dst, Strea gridReduceToColumn_(src, dst, stream); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/grid/split_merge.hpp b/modules/cudev/include/opencv2/cudev/grid/split_merge.hpp index ed7e8ee605..1a7134793b 100644 --- a/modules/cudev/include/opencv2/cudev/grid/split_merge.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/split_merge.hpp @@ -57,6 +57,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template __host__ void gridMerge_(const SrcPtrTuple& src, GpuMat_& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) { @@ -579,6 +582,8 @@ __host__ void gridSplit(const SrcPtr& src, GlobPtrSz (&dst)[COUNT], Str gridSplit_(src, dst, stream); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/grid/transform.hpp b/modules/cudev/include/opencv2/cudev/grid/transform.hpp index 62555ab5ae..2f16f7d392 100644 --- a/modules/cudev/include/opencv2/cudev/grid/transform.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/transform.hpp @@ -57,6 +57,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template __host__ void gridTransformUnary_(const SrcPtr& src, GpuMat_& dst, const UnOp& op, const MaskPtr& mask, Stream& stream = Stream::Null()) { @@ -536,6 +539,8 @@ __host__ void gridTransformTuple(const SrcPtr& src, const tuple< GlobPtrSz, gridTransformTuple_(src, dst, op, stream); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/grid/transpose.hpp b/modules/cudev/include/opencv2/cudev/grid/transpose.hpp index cf1bf8303e..0d7a19573d 100644 --- a/modules/cudev/include/opencv2/cudev/grid/transpose.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/transpose.hpp @@ -54,6 +54,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template __host__ void gridTranspose_(const SrcPtr& src, GpuMat_& dst, Stream& stream = Stream::Null()) { @@ -98,6 +101,8 @@ __host__ void gridTranspose(const SrcPtr& src, const GlobPtrSz& dst, St gridTranspose_(src, dst, stream); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/constant.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/constant.hpp index d3c56e7713..b3c5f5f23b 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/constant.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/constant.hpp @@ -51,6 +51,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template struct ConstantPtr { typedef T value_type; @@ -88,6 +91,8 @@ template struct PtrTraits< ConstantPtrSz > : PtrTraitsBase< Cons { }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/deriv.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/deriv.hpp index 097007400f..95088177f4 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/deriv.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/deriv.hpp @@ -53,6 +53,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // derivX template struct DerivXPtr @@ -388,6 +391,8 @@ template struct PtrTraits< LaplacianPtrSz struct BrdConstant @@ -214,6 +217,8 @@ __host__ BrdBase::ptr_type> brdWrap(const Sr return b; } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp index 7385926638..3563e56fcc 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp @@ -51,6 +51,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template struct GlobPtr { typedef T value_type; @@ -106,6 +109,8 @@ template struct PtrTraits< GlobPtrSz > : PtrTraitsBase class GpuMat_ : public GpuMat { @@ -154,6 +157,8 @@ template struct PtrTraits< GpuMat_ > : PtrTraitsBase, { }; +//! @} + }} #include "detail/gpumat.hpp" diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/interpolation.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/interpolation.hpp index e86d7191ed..256d4fd00a 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/interpolation.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/interpolation.hpp @@ -55,6 +55,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // Nearest template struct NearestInterPtr @@ -380,6 +383,8 @@ template struct PtrTraits< CommonAreaInterPtrSz > : PtrTr { }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp index accf545617..26a3725c08 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp @@ -54,6 +54,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template struct LutPtr { typedef typename PtrTraits::value_type value_type; @@ -95,6 +98,8 @@ template struct PtrTraits< LutPtrSz struct PtrTraits< SingleMaskChannelsSz > : Ptr { }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/remap.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/remap.hpp index db2669a40a..9d8745f94e 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/remap.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/remap.hpp @@ -54,6 +54,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template struct RemapPtr1 { typedef typename PtrTraits::value_type value_type; @@ -149,6 +152,8 @@ template struct PtrTraits< RemapPtr { }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/resize.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/resize.hpp index 10a4bad906..63ae7eb8a1 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/resize.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/resize.hpp @@ -54,6 +54,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template struct ResizePtr { typedef typename PtrTraits::value_type value_type; @@ -98,6 +101,8 @@ template struct PtrTraits< ResizePtrSz > : PtrTraitsBase< { }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp index 6df4a783d8..6fa83e631e 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp @@ -92,6 +92,9 @@ namespace namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + #if CUDART_VERSION >= 5050 template struct TexturePtr @@ -248,6 +251,8 @@ template struct PtrTraits< Texture > : PtrTraitsBase, #endif +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/traits.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/traits.hpp index 7fb4b32b17..f1552cafe8 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/traits.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/traits.hpp @@ -50,6 +50,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template struct PtrTraitsBase { typedef Ptr2DSz ptr_sz_type; @@ -96,6 +99,8 @@ __host__ int getCols(const Ptr2DSz& ptr) return PtrTraits::getCols(ptr); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/transform.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/transform.hpp index f540e75213..b6edb913d1 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/transform.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/transform.hpp @@ -53,6 +53,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // UnaryTransformPtr template struct UnaryTransformPtr @@ -146,6 +149,8 @@ template struct PtrTraits< BinaryTransf { }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/warping.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/warping.hpp index 80e5fbeef0..c9d00833ff 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/warping.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/warping.hpp @@ -53,6 +53,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // affine struct AffineMapPtr @@ -147,6 +150,8 @@ warpPerspectivePtr(const SrcPtr& src, Size dstSize, const GpuMat_& warpMa return remapPtr(src, perspectiveMap(dstSize, warpMat)); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/zip.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/zip.hpp index 934939f624..368848248c 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/zip.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/zip.hpp @@ -52,6 +52,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template struct ZipPtr; template struct ZipPtr< tuple > : tuple @@ -168,6 +171,8 @@ template struct PtrTraits< ZipPtrSz > : PtrTraitsBase { }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/util/atomic.hpp b/modules/cudev/include/opencv2/cudev/util/atomic.hpp index 2da110231b..a88cd99b36 100644 --- a/modules/cudev/include/opencv2/cudev/util/atomic.hpp +++ b/modules/cudev/include/opencv2/cudev/util/atomic.hpp @@ -50,6 +50,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // atomicAdd __device__ __forceinline__ int atomicAdd(int* address, int val) @@ -192,6 +195,8 @@ __device__ static double atomicMax(double* address, double val) #endif } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/util/limits.hpp b/modules/cudev/include/opencv2/cudev/util/limits.hpp index 58faca6b56..71e7faa779 100644 --- a/modules/cudev/include/opencv2/cudev/util/limits.hpp +++ b/modules/cudev/include/opencv2/cudev/util/limits.hpp @@ -52,6 +52,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template struct numeric_limits; template <> struct numeric_limits @@ -119,6 +122,8 @@ template <> struct numeric_limits static const bool is_signed = true; }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp b/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp index ff7ce85986..3176542d2c 100644 --- a/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp +++ b/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp @@ -50,6 +50,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template __device__ __forceinline__ T saturate_cast(uchar v) { return T(v); } template __device__ __forceinline__ T saturate_cast(schar v) { return T(v); } template __device__ __forceinline__ T saturate_cast(ushort v) { return T(v); } @@ -267,6 +270,8 @@ template <> __device__ __forceinline__ uint saturate_cast(double v) #endif } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/util/simd_functions.hpp b/modules/cudev/include/opencv2/cudev/util/simd_functions.hpp index db63f51800..2dd6f12ace 100644 --- a/modules/cudev/include/opencv2/cudev/util/simd_functions.hpp +++ b/modules/cudev/include/opencv2/cudev/util/simd_functions.hpp @@ -128,6 +128,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // 2 __device__ __forceinline__ uint vadd2(uint a, uint b) @@ -908,6 +911,8 @@ __device__ __forceinline__ uint vmin4(uint a, uint b) return r; } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/util/tuple.hpp b/modules/cudev/include/opencv2/cudev/util/tuple.hpp index b015ff3445..70d0424bdb 100644 --- a/modules/cudev/include/opencv2/cudev/util/tuple.hpp +++ b/modules/cudev/include/opencv2/cudev/util/tuple.hpp @@ -51,6 +51,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + using tuple_detail::tuple; using tuple_detail::tuple_size; using tuple_detail::get; @@ -75,6 +78,8 @@ template class CvtOp> struct ConvertTuple typedef typename tuple_detail::ConvertTuple::value, CvtOp>::type type; }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/util/type_traits.hpp b/modules/cudev/include/opencv2/cudev/util/type_traits.hpp index ca800c0b7a..acd1d3ba23 100644 --- a/modules/cudev/include/opencv2/cudev/util/type_traits.hpp +++ b/modules/cudev/include/opencv2/cudev/util/type_traits.hpp @@ -52,6 +52,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // NullType struct NullType {}; @@ -164,6 +167,8 @@ template struct LargerType >::type type; }; +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/util/vec_math.hpp b/modules/cudev/include/opencv2/cudev/util/vec_math.hpp index 361ef7b294..82fa06e9c9 100644 --- a/modules/cudev/include/opencv2/cudev/util/vec_math.hpp +++ b/modules/cudev/include/opencv2/cudev/util/vec_math.hpp @@ -51,6 +51,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // saturate_cast namespace vec_math_detail @@ -931,6 +934,8 @@ CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double) #undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/util/vec_traits.hpp b/modules/cudev/include/opencv2/cudev/util/vec_traits.hpp index 585423dd52..9bb5678e6f 100644 --- a/modules/cudev/include/opencv2/cudev/util/vec_traits.hpp +++ b/modules/cudev/include/opencv2/cudev/util/vec_traits.hpp @@ -50,6 +50,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // MakeVec template struct MakeVec; @@ -177,6 +180,8 @@ template<> struct VecTraits __host__ __device__ __forceinline__ static char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);} }; +//! @} + }} // DataType diff --git a/modules/cudev/include/opencv2/cudev/warp/reduce.hpp b/modules/cudev/include/opencv2/cudev/warp/reduce.hpp index 089ef92d0f..f3919c2feb 100644 --- a/modules/cudev/include/opencv2/cudev/warp/reduce.hpp +++ b/modules/cudev/include/opencv2/cudev/warp/reduce.hpp @@ -53,6 +53,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + // warpReduce template @@ -201,6 +204,8 @@ smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9); } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/warp/scan.hpp b/modules/cudev/include/opencv2/cudev/warp/scan.hpp index acd032fb00..a4402986dc 100644 --- a/modules/cudev/include/opencv2/cudev/warp/scan.hpp +++ b/modules/cudev/include/opencv2/cudev/warp/scan.hpp @@ -52,6 +52,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + template __device__ T warpScanInclusive(T data, volatile T* smem, uint tid) { @@ -94,6 +97,8 @@ __device__ __forceinline__ T warpScanExclusive(T data, volatile T* smem, uint ti return warpScanInclusive(data, smem, tid) - data; } +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp b/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp index a6aae5b902..97af06972e 100644 --- a/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp +++ b/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp @@ -51,6 +51,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + #if CV_CUDEV_ARCH >= 300 // shfl @@ -419,6 +422,8 @@ CV_CUDEV_SHFL_XOR_VEC_INST(double) #endif // CV_CUDEV_ARCH >= 300 +//! @} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/warp/warp.hpp b/modules/cudev/include/opencv2/cudev/warp/warp.hpp index c7649880f3..61caea259e 100644 --- a/modules/cudev/include/opencv2/cudev/warp/warp.hpp +++ b/modules/cudev/include/opencv2/cudev/warp/warp.hpp @@ -50,6 +50,9 @@ namespace cv { namespace cudev { +//! @addtogroup cudev +//! @{ + enum { LOG_WARP_SIZE = 5, @@ -117,6 +120,8 @@ __device__ __forceinline__ void warpYota(OutIt beg, OutIt end, T value) *t = value; } +//! @} + }} #endif diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp index a60a93bddf..c6223fb6bb 100644 --- a/modules/features2d/include/opencv2/features2d.hpp +++ b/modules/features2d/include/opencv2/features2d.hpp @@ -46,18 +46,54 @@ #include "opencv2/core.hpp" #include "opencv2/flann/miniflann.hpp" +/** + @defgroup features2d 2D Features Framework + @{ + @defgroup features2d_main Feature Detection and Description + @defgroup features2d_match Descriptor Matchers + +Matchers of keypoint descriptors in OpenCV have wrappers with a common interface that enables you to +easily switch between different algorithms solving the same problem. This section is devoted to +matching descriptors that are represented as vectors in a multidimensional space. All objects that +implement vector descriptor matchers inherit the DescriptorMatcher interface. + +@note + - An example explaining keypoint matching can be found at + opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp + - An example on descriptor matching evaluation can be found at + opencv_source_code/samples/cpp/detector_descriptor_matcher_evaluation.cpp + - An example on one to many image matching can be found at + opencv_source_code/samples/cpp/matching_to_many_images.cpp + + @defgroup features2d_draw Drawing Function of Keypoints and Matches + @defgroup features2d_category Object Categorization + +This section describes approaches based on local 2D features and used to categorize objects. + +@note + - A complete Bag-Of-Words sample can be found at + opencv_source_code/samples/cpp/bagofwords_classification.cpp + - (Python) An example using the features2D framework to perform object categorization can be + found at opencv_source_code/samples/python2/find_obj.py + + @} + */ + namespace cv { +//! @addtogroup features2d +//! @{ + // //! writes vector of keypoints to the file storage // CV_EXPORTS void write(FileStorage& fs, const String& name, const std::vector& keypoints); // //! reads vector of keypoints from the specified file storage node // CV_EXPORTS void read(const FileNode& node, CV_OUT std::vector& keypoints); -/* - * A class filters a vector of keypoints. - * Because now it is difficult to provide a convenient interface for all usage scenarios of the keypoints filter class, - * it has only several needed by now static methods. +/** @brief A class filters a vector of keypoints. + + Because now it is difficult to provide a convenient interface for all usage scenarios of the + keypoints filter class, it has only several needed by now static methods. */ class CV_EXPORTS KeyPointsFilter { @@ -91,44 +127,66 @@ public: /************************************ Base Classes ************************************/ -/* - * Abstract base class for 2D image feature detectors and descriptor extractors - */ +/** @brief Abstract base class for 2D image feature detectors and descriptor extractors +*/ class CV_EXPORTS_W Feature2D : public virtual Algorithm { public: virtual ~Feature2D(); - /* - * Detect keypoints in an image. - * image The image. - * keypoints The detected keypoints. - * mask Mask specifying where to look for keypoints (optional). Must be a char - * matrix with non-zero values in the region of interest. + /** @brief Detects keypoints in an image (first variant) or image set (second variant). + + @param image Image. + @param keypoints The detected keypoints. In the second variant of the method keypoints[i] is a set + of keypoints detected in images[i] . + @param mask Mask specifying where to look for keypoints (optional). It must be a 8-bit integer + matrix with non-zero values in the region of interest. */ CV_WRAP virtual void detect( InputArray image, CV_OUT std::vector& keypoints, InputArray mask=noArray() ); + /** @overload + @param images Image set. + @param keypoints The detected keypoints. In the second variant of the method keypoints[i] is a set + of keypoints detected in images[i] . + @param masks Masks for each input image specifying where to look for keypoints (optional). + masks[i] is a mask for images[i]. + */ virtual void detect( InputArrayOfArrays images, std::vector >& keypoints, InputArrayOfArrays masks=noArray() ); - /* - * Compute the descriptors for a set of keypoints in an image. - * image The image. - * keypoints The input keypoints. Keypoints for which a descriptor cannot be computed are removed. - * descriptors Copmputed descriptors. Row i is the descriptor for keypoint i. + /** @brief Computes the descriptors for a set of keypoints detected in an image (first variant) or image set + (second variant). + + @param image Image. + @param keypoints Input collection of keypoints. Keypoints for which a descriptor cannot be + computed are removed. Sometimes new keypoints can be added, for example: SIFT duplicates keypoint + with several dominant orientations (for each orientation). + @param descriptors Computed descriptors. In the second variant of the method descriptors[i] are + descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the + descriptor for keypoint j-th keypoint. */ CV_WRAP virtual void compute( InputArray image, CV_OUT CV_IN_OUT std::vector& keypoints, OutputArray descriptors ); + /** @overload + + @param images Image set. + @param keypoints Input collection of keypoints. Keypoints for which a descriptor cannot be + computed are removed. Sometimes new keypoints can be added, for example: SIFT duplicates keypoint + with several dominant orientations (for each orientation). + @param descriptors Computed descriptors. In the second variant of the method descriptors[i] are + descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the + descriptor for keypoint j-th keypoint. + */ virtual void compute( InputArrayOfArrays images, std::vector >& keypoints, OutputArrayOfArrays descriptors ); - /* Detects keypoints and computes the descriptors */ + /** Detects keypoints and computes the descriptors */ CV_WRAP virtual void detectAndCompute( InputArray image, InputArray mask, CV_OUT std::vector& keypoints, OutputArray descriptors, @@ -138,33 +196,96 @@ public: CV_WRAP virtual int descriptorType() const; CV_WRAP virtual int defaultNorm() const; - // Return true if detector object is empty + //! Return true if detector object is empty CV_WRAP virtual bool empty() const; }; +/** Feature detectors in OpenCV have wrappers with a common interface that enables you to easily switch +between different algorithms solving the same problem. All objects that implement keypoint detectors +inherit the FeatureDetector interface. */ typedef Feature2D FeatureDetector; + +/** Extractors of keypoint descriptors in OpenCV have wrappers with a common interface that enables you +to easily switch between different algorithms solving the same problem. This section is devoted to +computing descriptors represented as vectors in a multidimensional space. All objects that implement +the vector descriptor extractors inherit the DescriptorExtractor interface. + */ typedef Feature2D DescriptorExtractor; -/*! - BRISK implementation -*/ +//! @addtogroup features2d_main +//! @{ + +/** @brief Class implementing the BRISK keypoint detector and descriptor extractor, described in @cite LCS11 . + */ class CV_EXPORTS_W BRISK : public Feature2D { public: + /** @brief The BRISK constructor + + @param thresh FAST/AGAST detection threshold score. + @param octaves detection octaves. Use 0 to do single scale. + @param patternScale apply this scale to the pattern used for sampling the neighbourhood of a + keypoint. + */ CV_WRAP static Ptr create(int thresh=30, int octaves=3, float patternScale=1.0f); - // custom setup + + /** @brief The BRISK constructor for a custom pattern + + @param radiusList defines the radii (in pixels) where the samples around a keypoint are taken (for + keypoint scale 1). + @param numberList defines the number of sampling points on the sampling circle. Must be the same + size as radiusList.. + @param dMax threshold for the short pairings used for descriptor formation (in pixels for keypoint + scale 1). + @param dMin threshold for the long pairings used for orientation determination (in pixels for + keypoint scale 1). + @param indexChange index remapping of the bits. */ CV_WRAP static Ptr create(const std::vector &radiusList, const std::vector &numberList, float dMax=5.85f, float dMin=8.2f, const std::vector& indexChange=std::vector()); }; -/*! - ORB implementation. -*/ +/** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor + +described in @cite RRKB11 . The algorithm uses FAST in pyramids to detect stable keypoints, selects +the strongest features using FAST or Harris response, finds their orientation using first-order +moments and computes the descriptors using BRIEF (where the coordinates of random point pairs (or +k-tuples) are rotated according to the measured orientation). + */ class CV_EXPORTS_W ORB : public Feature2D { public: enum { kBytes = 32, HARRIS_SCORE=0, FAST_SCORE=1 }; + /** @brief The ORB constructor + + @param nfeatures The maximum number of features to retain. + @param scaleFactor Pyramid decimation ratio, greater than 1. scaleFactor==2 means the classical + pyramid, where each next level has 4x less pixels than the previous, but such a big scale factor + will degrade feature matching scores dramatically. On the other hand, too close to 1 scale factor + will mean that to cover certain scale range you will need more pyramid levels and so the speed + will suffer. + @param nlevels The number of pyramid levels. The smallest level will have linear size equal to + input_image_linear_size/pow(scaleFactor, nlevels). + @param edgeThreshold This is size of the border where the features are not detected. It should + roughly match the patchSize parameter. + @param firstLevel It should be 0 in the current implementation. + @param WTA_K The number of points that produce each element of the oriented BRIEF descriptor. The + default value 2 means the BRIEF where we take a random point pair and compare their brightnesses, + so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3 + random points (of course, those point coordinates are random, but they are generated from the + pre-defined seed, so each element of BRIEF descriptor is computed deterministically from the pixel + rectangle), find point of maximum brightness and output index of the winner (0, 1 or 2). Such + output will occupy 2 bits, and therefore it will need a special variant of Hamming distance, + denoted as NORM_HAMMING2 (2 bits per bin). When WTA_K=4, we take 4 random points to compute each + bin (that will also occupy 2 bits with possible values 0, 1, 2 or 3). + @param scoreType The default HARRIS_SCORE means that Harris algorithm is used to rank features + (the score is written to KeyPoint::score and is used to retain best nfeatures features); + FAST_SCORE is alternative value of the parameter that produces slightly less stable keypoints, + but it is a little faster to compute. + @param patchSize size of the patch used by the oriented BRIEF descriptor. Of course, on smaller + pyramid layers the perceived image area covered by a feature will be larger. + @param fastThreshold + */ CV_WRAP static Ptr create(int nfeatures=500, float scaleFactor=1.2f, int nlevels=8, int edgeThreshold=31, int firstLevel=0, int WTA_K=2, int scoreType=ORB::HARRIS_SCORE, int patchSize=31, int fastThreshold=20); @@ -196,15 +317,16 @@ public: CV_WRAP virtual int getFastThreshold() const = 0; }; -/*! - Maximal Stable Extremal Regions class. +/** @brief Maximally stable extremal region extractor. : - The class implements MSER algorithm introduced by J. Matas. - Unlike SIFT, SURF and many other detectors in OpenCV, this is salient region detector, - not the salient point detector. +The class encapsulates all the parameters of the MSER extraction algorithm (see +). Also see + for useful comments and parameters description. - It returns the regions, each of those is encoded as a contour. -*/ +@note + - (Python) A complete example showing the use of the MSER detector can be found at + opencv_source_code/samples/python2/mser.py + */ class CV_EXPORTS_W MSER : public Feature2D { public: @@ -231,13 +353,38 @@ public: CV_WRAP virtual bool getPass2Only() const = 0; }; -//! detects corners using FAST algorithm by E. Rosten +/** @overload */ CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector& keypoints, int threshold, bool nonmaxSuppression=true ); +/** @brief Detects corners using the FAST algorithm + +@param image grayscale image where keypoints (corners) are detected. +@param keypoints keypoints detected on the image. +@param threshold threshold on difference between intensity of the central pixel and pixels of a +circle around this pixel. +@param nonmaxSuppression if true, non-maximum suppression is applied to detected corners +(keypoints). +@param type one of the three neighborhoods as defined in the paper: +FastFeatureDetector::TYPE_9_16, FastFeatureDetector::TYPE_7_12, +FastFeatureDetector::TYPE_5_8 + +Detects corners using the FAST algorithm by @cite Rosten06 . + +@note In Python API, types are given as cv2.FAST_FEATURE_DETECTOR_TYPE_5_8, +cv2.FAST_FEATURE_DETECTOR_TYPE_7_12 and cv2.FAST_FEATURE_DETECTOR_TYPE_9_16. For corner +detection, use cv2.FAST.detect() method. + */ CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector& keypoints, int threshold, bool nonmaxSuppression, int type ); +//! @} features2d_main + +//! @addtogroup features2d_main +//! @{ + +/** @brief Wrapping class for feature detection using the FAST method. : + */ class CV_EXPORTS_W FastFeatureDetector : public Feature2D { public: @@ -261,7 +408,8 @@ public: CV_WRAP virtual int getType() const = 0; }; - +/** @brief Wrapping class for feature detection using the goodFeaturesToTrack function. : + */ class CV_EXPORTS_W GFTTDetector : public Feature2D { public: @@ -286,7 +434,37 @@ public: CV_WRAP virtual double getK() const = 0; }; - +/** @brief Class for extracting blobs from an image. : + +The class implements a simple algorithm for extracting blobs from an image: + +1. Convert the source image to binary images by applying thresholding with several thresholds from + minThreshold (inclusive) to maxThreshold (exclusive) with distance thresholdStep between + neighboring thresholds. +2. Extract connected components from every binary image by findContours and calculate their + centers. +3. Group centers from several binary images by their coordinates. Close centers form one group that + corresponds to one blob, which is controlled by the minDistBetweenBlobs parameter. +4. From the groups, estimate final centers of blobs and their radiuses and return as locations and + sizes of keypoints. + +This class performs several filtrations of returned blobs. You should set filterBy\* to true/false +to turn on/off corresponding filtration. Available filtrations: + +- **By color**. This filter compares the intensity of a binary image at the center of a blob to +blobColor. If they differ, the blob is filtered out. Use blobColor = 0 to extract dark blobs +and blobColor = 255 to extract light blobs. +- **By area**. Extracted blobs have an area between minArea (inclusive) and maxArea (exclusive). +- **By circularity**. Extracted blobs have circularity +(\f$\frac{4*\pi*Area}{perimeter * perimeter}\f$) between minCircularity (inclusive) and +maxCircularity (exclusive). +- **By ratio of the minimum inertia to maximum inertia**. Extracted blobs have this ratio +between minInertiaRatio (inclusive) and maxInertiaRatio (exclusive). +- **By convexity**. Extracted blobs have convexity (area / area of blob convex hull) between +minConvexity (inclusive) and maxConvexity (exclusive). + +Default values of parameters are tuned to extract dark circular blobs. + */ class CV_EXPORTS_W SimpleBlobDetector : public Feature2D { public: @@ -322,9 +500,16 @@ public: create(const SimpleBlobDetector::Params ¶meters = SimpleBlobDetector::Params()); }; +//! @} features2d_main + +//! @addtogroup features2d_main +//! @{ -/*! -KAZE implementation +/** @brief Class implementing the KAZE keypoint detector and descriptor extractor, described in @cite ABD12 . + +@note AKAZE descriptor can only be used with KAZE or AKAZE keypoints .. [ABD12] KAZE Features. Pablo +F. Alcantarilla, Adrien Bartoli and Andrew J. Davison. In European Conference on Computer Vision +(ECCV), Fiorenze, Italy, October 2012. */ class CV_EXPORTS_W KAZE : public Feature2D { @@ -337,6 +522,16 @@ public: DIFF_CHARBONNIER = 3 }; + /** @brief The KAZE constructor + + @param extended Set to enable extraction of extended (128-byte) descriptor. + @param upright Set to enable use of upright descriptors (non rotation-invariant). + @param threshold Detector response threshold to accept point + @param nOctaves Maximum octave evolution of the image + @param nOctaveLayers Default number of sublevels per scale level + @param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or + DIFF_CHARBONNIER + */ CV_WRAP static Ptr create(bool extended=false, bool upright=false, float threshold = 0.001f, int nOctaves = 4, int nOctaveLayers = 4, @@ -361,9 +556,13 @@ public: CV_WRAP virtual int getDiffusivity() const = 0; }; -/*! -AKAZE implementation -*/ +/** @brief Class implementing the AKAZE keypoint detector and descriptor extractor, described in @cite ANB13 . : + +@note AKAZE descriptors can only be used with KAZE or AKAZE keypoints. Try to avoid using *extract* +and *detect* instead of *operator()* due to performance reasons. .. [ANB13] Fast Explicit Diffusion +for Accelerated Features in Nonlinear Scale Spaces. Pablo F. Alcantarilla, Jesús Nuevo and Adrien +Bartoli. In British Machine Vision Conference (BMVC), Bristol, UK, September 2013. + */ class CV_EXPORTS_W AKAZE : public Feature2D { public: @@ -376,6 +575,18 @@ public: DESCRIPTOR_MLDB = 5 }; + /** @brief The AKAZE constructor + + @param descriptor_type Type of the extracted descriptor: DESCRIPTOR_KAZE, + DESCRIPTOR_KAZE_UPRIGHT, DESCRIPTOR_MLDB or DESCRIPTOR_MLDB_UPRIGHT. + @param descriptor_size Size of the descriptor in bits. 0 -\> Full size + @param descriptor_channels Number of channels in the descriptor (1, 2, 3) + @param threshold Detector response threshold to accept point + @param nOctaves Maximum octave evolution of the image + @param nOctaveLayers Default number of sublevels per scale level + @param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or + DIFF_CHARBONNIER + */ CV_WRAP static Ptr create(int descriptor_type=AKAZE::DESCRIPTOR_MLDB, int descriptor_size = 0, int descriptor_channels = 3, float threshold = 0.001f, int nOctaves = 4, @@ -403,6 +614,8 @@ public: CV_WRAP virtual int getDiffusivity() const = 0; }; +//! @} features2d_main + /****************************************************************************************\ * Distance * \****************************************************************************************/ @@ -501,76 +714,153 @@ template struct HammingMultilevel /****************************************************************************************\ * DescriptorMatcher * \****************************************************************************************/ -/* - * Abstract base class for matching two sets of descriptors. + +//! @addtogroup features2d_match +//! @{ + +/** @brief Abstract base class for matching keypoint descriptors. + +It has two groups of match methods: for matching descriptors of an image with another image or with +an image set. */ class CV_EXPORTS_W DescriptorMatcher : public Algorithm { public: virtual ~DescriptorMatcher(); - /* - * Add descriptors to train descriptor collection. - * descriptors Descriptors to add. Each descriptors[i] is a descriptors set from one image. + /** @brief Adds descriptors to train a CPU(trainDescCollectionis) or GPU(utrainDescCollectionis) descriptor + collection. + + If the collection is not empty, the new descriptors are added to existing train descriptors. + + @param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same + train image. */ CV_WRAP virtual void add( InputArrayOfArrays descriptors ); - /* - * Get train descriptors collection. + + /** @brief Returns a constant link to the train descriptor collection trainDescCollection . */ CV_WRAP const std::vector& getTrainDescriptors() const; - /* - * Clear train descriptors collection. + + /** @brief Clears the train descriptor collections. */ CV_WRAP virtual void clear(); - /* - * Return true if there are not train descriptors in collection. + /** @brief Returns true if there are no train descriptors in the both collections. */ CV_WRAP virtual bool empty() const; - /* - * Return true if the matcher supports mask in match methods. + + /** @brief Returns true if the descriptor matcher supports masking permissible matches. */ CV_WRAP virtual bool isMaskSupported() const = 0; - /* - * Train matcher (e.g. train flann index). - * In all methods to match the method train() is run every time before matching. - * Some descriptor matchers (e.g. BruteForceMatcher) have empty implementation - * of this method, other matchers really train their inner structures - * (e.g. FlannBasedMatcher trains flann::Index). So nonempty implementation - * of train() should check the class object state and do traing/retraining - * only if the state requires that (e.g. FlannBasedMatcher trains flann::Index - * if it has not trained yet or if new descriptors have been added to the train - * collection). + /** @brief Trains a descriptor matcher + + Trains a descriptor matcher (for example, the flann index). In all methods to match, the method + train() is run every time before matching. Some descriptor matchers (for example, BruteForceMatcher) + have an empty implementation of this method. Other matchers really train their inner structures (for + example, FlannBasedMatcher trains flann::Index ). */ CV_WRAP virtual void train(); - /* - * Group of methods to match descriptors from image pair. - * Method train() is run in this methods. + + /** @brief Finds the best match for each descriptor from a query set. + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches. If a query descriptor is masked out in mask , no match is added for this + descriptor. So, matches size may be smaller than the query descriptors count. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + + In the first variant of this method, the train descriptors are passed as an input argument. In the + second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is + used. Optional mask (or masks) can be passed to specify which query and training descriptors can be + matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if + mask.at\(i,j) is non-zero. */ - // Find one best match for each query descriptor (if mask is empty). CV_WRAP void match( InputArray queryDescriptors, InputArray trainDescriptors, CV_OUT std::vector& matches, InputArray mask=noArray() ) const; - // Find k best matches for each query descriptor (in increasing order of distances). - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. + + /** @brief Finds the k best matches for each descriptor from a query set. + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param matches Matches. Each matches[i] is k or less matches for the same query descriptor. + @param k Count of best matches found per each query descriptor or less if a query descriptor has + less than k possible matches in total. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + + These extended variants of DescriptorMatcher::match methods find several best matches for each query + descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match + for the details about query and train descriptors. + */ CV_WRAP void knnMatch( InputArray queryDescriptors, InputArray trainDescriptors, CV_OUT std::vector >& matches, int k, InputArray mask=noArray(), bool compactResult=false ) const; - // Find best matches for each query descriptor which have distance less than - // maxDistance (in increasing order of distances). + + /** @brief For each query descriptor, finds the training descriptors not farther than the specified distance. + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Found matches. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + @param maxDistance Threshold for the distance between matched descriptors. Distance means here + metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured + in Pixels)! + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + + For each query descriptor, the methods find such training descriptors that the distance between the + query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are + returned in the distance increasing order. + */ void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors, std::vector >& matches, float maxDistance, InputArray mask=noArray(), bool compactResult=false ) const; - /* - * Group of methods to match descriptors from one image to image set. - * See description of similar methods for matching image pair above. - */ + + /** @overload + @param queryDescriptors Query set of descriptors. + @param matches Matches. If a query descriptor is masked out in mask , no match is added for this + descriptor. So, matches size may be smaller than the query descriptors count. + @param masks Set of masks. Each masks[i] specifies permissible matches between the input query + descriptors and stored train descriptors from the i-th image trainDescCollection[i]. + */ CV_WRAP void match( InputArray queryDescriptors, CV_OUT std::vector& matches, InputArrayOfArrays masks=noArray() ); + /** @overload + @param queryDescriptors Query set of descriptors. + @param matches Matches. Each matches[i] is k or less matches for the same query descriptor. + @param k Count of best matches found per each query descriptor or less if a query descriptor has + less than k possible matches in total. + @param masks Set of masks. Each masks[i] specifies permissible matches between the input query + descriptors and stored train descriptors from the i-th image trainDescCollection[i]. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + */ CV_WRAP void knnMatch( InputArray queryDescriptors, CV_OUT std::vector >& matches, int k, InputArrayOfArrays masks=noArray(), bool compactResult=false ); + /** @overload + @param queryDescriptors Query set of descriptors. + @param matches Found matches. + @param maxDistance Threshold for the distance between matched descriptors. Distance means here + metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured + in Pixels)! + @param masks Set of masks. Each masks[i] specifies permissible matches between the input query + descriptors and stored train descriptors from the i-th image trainDescCollection[i]. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + */ void radiusMatch( InputArray queryDescriptors, std::vector >& matches, float maxDistance, InputArrayOfArrays masks=noArray(), bool compactResult=false ); @@ -579,14 +869,28 @@ public: // Writes matcher object to a file storage virtual void write( FileStorage& ) const; - // Clone the matcher. If emptyTrainData is false the method create deep copy of the object, i.e. copies - // both parameters and train data. If emptyTrainData is true the method create object copy with current parameters - // but with empty train data. + /** @brief Clones the matcher. + + @param emptyTrainData If emptyTrainData is false, the method creates a deep copy of the object, + that is, copies both parameters and train data. If emptyTrainData is true, the method creates an + object copy with the current parameters but with empty train data. + */ virtual Ptr clone( bool emptyTrainData=false ) const = 0; + /** @brief Creates a descriptor matcher of a given type with the default parameters (using default + constructor). + + @param descriptorMatcherType Descriptor matcher type. Now the following matcher types are + supported: + - `BruteForce` (it uses L2 ) + - `BruteForce-L1` + - `BruteForce-Hamming` + - `BruteForce-Hamming(2)` + - `FlannBased` + */ CV_WRAP static Ptr create( const String& descriptorMatcherType ); protected: - /* + /** * Class to work with descriptors from several images as with one merged matrix. * It is used e.g. in FlannBasedMatcher. */ @@ -613,9 +917,9 @@ protected: std::vector startIdxs; }; - // In fact the matching is implemented only by the following two methods. These methods suppose - // that the class object has been trained already. Public match methods call these methods - // after calling train(). + //! In fact the matching is implemented only by the following two methods. These methods suppose + //! that the class object has been trained already. Public match methods call these methods + //! after calling train(). virtual void knnMatchImpl( InputArray queryDescriptors, std::vector >& matches, int k, InputArrayOfArrays masks=noArray(), bool compactResult=false ) = 0; virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector >& matches, float maxDistance, @@ -627,23 +931,33 @@ protected: static Mat clone_op( Mat m ) { return m.clone(); } void checkMasks( InputArrayOfArrays masks, int queryDescriptorsCount ) const; - // Collection of descriptors from train images. + //! Collection of descriptors from train images. std::vector trainDescCollection; std::vector utrainDescCollection; }; -/* - * Brute-force descriptor matcher. - * - * For each descriptor in the first set, this matcher finds the closest - * descriptor in the second set by trying each one. - * - * For efficiency, BruteForceMatcher is templated on the distance metric. - * For float descriptors, a common choice would be cv::L2. +/** @brief Brute-force descriptor matcher. + +For each descriptor in the first set, this matcher finds the closest descriptor in the second set +by trying each one. This descriptor matcher supports masking permissible matches of descriptor +sets. */ class CV_EXPORTS_W BFMatcher : public DescriptorMatcher { public: + /** @brief Brute-force matcher constructor. + + @param normType One of NORM_L1, NORM_L2, NORM_HAMMING, NORM_HAMMING2. L1 and L2 norms are + preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and + BRIEF, NORM_HAMMING2 should be used with ORB when WTA_K==3 or 4 (see ORB::ORB constructor + description). + @param crossCheck If it is false, this is will be default BFMatcher behaviour when it finds the k + nearest neighbors for each query descriptor. If crossCheck==true, then the knnMatch() method with + k=1 will only return pairs (i,j) such that for i-th query descriptor the j-th descriptor in the + matcher's collection is the nearest and vice versa, i.e. the BFMatcher will only return consistent + pairs. Such technique usually produces best results with minimal number of outliers when there are + enough matches. This is alternative to the ratio test, used by D. Lowe in SIFT paper. + */ CV_WRAP BFMatcher( int normType=NORM_L2, bool crossCheck=false ); virtual ~BFMatcher() {} @@ -661,8 +975,12 @@ protected: }; -/* - * Flann based matcher +/** @brief Flann-based descriptor matcher. + +This matcher trains flann::Index_ on a train descriptor collection and calls its nearest search +methods to find the best matches. So, this matcher may be faster when matching a large train +collection than the brute force matcher. FlannBasedMatcher does not support masking permissible +matches of descriptor sets because flann::Index does not support this. : */ class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher { @@ -700,42 +1018,85 @@ protected: int addedDescCount; }; +//! @} features2d_match /****************************************************************************************\ * Drawing functions * \****************************************************************************************/ + +//! @addtogroup features2d_draw +//! @{ + struct CV_EXPORTS DrawMatchesFlags { - enum{ DEFAULT = 0, // Output image matrix will be created (Mat::create), - // i.e. existing memory of output image may be reused. - // Two source image, matches and single keypoints will be drawn. - // For each keypoint only the center point will be drawn (without - // the circle around keypoint with keypoint size and orientation). - DRAW_OVER_OUTIMG = 1, // Output image matrix will not be created (Mat::create). - // Matches will be drawn on existing content of output image. - NOT_DRAW_SINGLE_POINTS = 2, // Single keypoints will not be drawn. - DRAW_RICH_KEYPOINTS = 4 // For each keypoint the circle around keypoint with keypoint size and - // orientation will be drawn. + enum{ DEFAULT = 0, //!< Output image matrix will be created (Mat::create), + //!< i.e. existing memory of output image may be reused. + //!< Two source image, matches and single keypoints will be drawn. + //!< For each keypoint only the center point will be drawn (without + //!< the circle around keypoint with keypoint size and orientation). + DRAW_OVER_OUTIMG = 1, //!< Output image matrix will not be created (Mat::create). + //!< Matches will be drawn on existing content of output image. + NOT_DRAW_SINGLE_POINTS = 2, //!< Single keypoints will not be drawn. + DRAW_RICH_KEYPOINTS = 4 //!< For each keypoint the circle around keypoint with keypoint size and + //!< orientation will be drawn. }; }; -// Draw keypoints. +/** @brief Draws keypoints. + +@param image Source image. +@param keypoints Keypoints from the source image. +@param outImage Output image. Its content depends on the flags value defining what is drawn in the +output image. See possible flags bit values below. +@param color Color of keypoints. +@param flags Flags setting drawing features. Possible flags bit values are defined by +DrawMatchesFlags. See details above in drawMatches . + +@note +For Python API, flags are modified as cv2.DRAW_MATCHES_FLAGS_DEFAULT, +cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS, cv2.DRAW_MATCHES_FLAGS_DRAW_OVER_OUTIMG, +cv2.DRAW_MATCHES_FLAGS_NOT_DRAW_SINGLE_POINTS + */ CV_EXPORTS_W void drawKeypoints( InputArray image, const std::vector& keypoints, InputOutputArray outImage, const Scalar& color=Scalar::all(-1), int flags=DrawMatchesFlags::DEFAULT ); -// Draws matches of keypints from two images on output image. +/** @brief Draws the found matches of keypoints from two images. + +@param img1 First source image. +@param keypoints1 Keypoints from the first source image. +@param img2 Second source image. +@param keypoints2 Keypoints from the second source image. +@param matches1to2 Matches from the first image to the second one, which means that keypoints1[i] +has a corresponding point in keypoints2[matches[i]] . +@param outImg Output image. Its content depends on the flags value defining what is drawn in the +output image. See possible flags bit values below. +@param matchColor Color of matches (lines and connected keypoints). If matchColor==Scalar::all(-1) +, the color is generated randomly. +@param singlePointColor Color of single keypoints (circles), which means that keypoints do not +have the matches. If singlePointColor==Scalar::all(-1) , the color is generated randomly. +@param matchesMask Mask determining which matches are drawn. If the mask is empty, all matches are +drawn. +@param flags Flags setting drawing features. Possible flags bit values are defined by +DrawMatchesFlags. + +This function draws matches of keypoints from two images in the output image. Match is a line +connecting two keypoints (circles). See cv::DrawMatchesFlags. + */ CV_EXPORTS_W void drawMatches( InputArray img1, const std::vector& keypoints1, InputArray img2, const std::vector& keypoints2, const std::vector& matches1to2, InputOutputArray outImg, const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1), const std::vector& matchesMask=std::vector(), int flags=DrawMatchesFlags::DEFAULT ); +/** @overload */ CV_EXPORTS_AS(drawMatchesKnn) void drawMatches( InputArray img1, const std::vector& keypoints1, InputArray img2, const std::vector& keypoints2, const std::vector >& matches1to2, InputOutputArray outImg, const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1), const std::vector >& matchesMask=std::vector >(), int flags=DrawMatchesFlags::DEFAULT ); +//! @} features2d_draw + /****************************************************************************************\ * Functions to evaluate the feature detectors and [generic] descriptor extractors * \****************************************************************************************/ @@ -755,8 +1116,14 @@ CV_EXPORTS int getNearestPoint( const std::vector& recallPrecisionCurve /****************************************************************************************\ * Bag of visual words * \****************************************************************************************/ -/* - * Abstract base class for training of a 'bag of visual words' vocabulary from a set of descriptors + +//! @addtogroup features2d_category +//! @{ + +/** @brief Abstract base class for training the *bag of visual words* vocabulary from a set of descriptors. + +For details, see, for example, *Visual Categorization with Bags of Keypoints* by Gabriella Csurka, +Christopher R. Dance, Lixin Fan, Jutta Willamowski, Cedric Bray, 2004. : */ class CV_EXPORTS_W BOWTrainer { @@ -764,20 +1131,37 @@ public: BOWTrainer(); virtual ~BOWTrainer(); + /** @brief Adds descriptors to a training set. + + @param descriptors Descriptors to add to a training set. Each row of the descriptors matrix is a + descriptor. + + The training set is clustered using clustermethod to construct the vocabulary. + */ CV_WRAP void add( const Mat& descriptors ); + + /** @brief Returns a training set of descriptors. + */ CV_WRAP const std::vector& getDescriptors() const; + + /** @brief Returns the count of all descriptors stored in the training set. + */ CV_WRAP int descriptorsCount() const; CV_WRAP virtual void clear(); - /* - * Train visual words vocabulary, that is cluster training descriptors and - * compute cluster centers. - * Returns cluster centers. - * - * descriptors Training descriptors computed on images keypoints. - */ + /** @overload */ CV_WRAP virtual Mat cluster() const = 0; + + /** @brief Clusters train descriptors. + + @param descriptors Descriptors to cluster. Each row of the descriptors matrix is a descriptor. + Descriptors are not added to the inner train descriptor set. + + The vocabulary consists of cluster centers. So, this method returns the vocabulary. In the first + variant of the method, train descriptors stored in the object are clustered. In the second variant, + input descriptors are clustered. + */ CV_WRAP virtual Mat cluster( const Mat& descriptors ) const = 0; protected: @@ -785,12 +1169,15 @@ protected: int size; }; -/* - * This is BOWTrainer using cv::kmeans to get vocabulary. +/** @brief kmeans -based class to train visual vocabulary using the *bag of visual words* approach. : */ class CV_EXPORTS_W BOWKMeansTrainer : public BOWTrainer { public: + /** @brief The constructor. + + @see cv::kmeans + */ CV_WRAP BOWKMeansTrainer( int clusterCount, const TermCriteria& termcrit=TermCriteria(), int attempts=3, int flags=KMEANS_PP_CENTERS ); virtual ~BOWKMeansTrainer(); @@ -807,21 +1194,62 @@ protected: int flags; }; -/* - * Class to compute image descriptor using bag of visual words. +/** @brief Class to compute an image descriptor using the *bag of visual words*. + +Such a computation consists of the following steps: + +1. Compute descriptors for a given image and its keypoints set. +2. Find the nearest visual words from the vocabulary for each keypoint descriptor. +3. Compute the bag-of-words image descriptor as is a normalized histogram of vocabulary words +encountered in the image. The i-th bin of the histogram is a frequency of i-th word of the +vocabulary in the given image. */ class CV_EXPORTS_W BOWImgDescriptorExtractor { public: + /** @brief The constructor. + + @param dextractor Descriptor extractor that is used to compute descriptors for an input image and + its keypoints. + @param dmatcher Descriptor matcher that is used to find the nearest word of the trained vocabulary + for each keypoint descriptor of the image. + */ CV_WRAP BOWImgDescriptorExtractor( const Ptr& dextractor, const Ptr& dmatcher ); + /** @overload */ BOWImgDescriptorExtractor( const Ptr& dmatcher ); virtual ~BOWImgDescriptorExtractor(); + /** @brief Sets a visual vocabulary. + + @param vocabulary Vocabulary (can be trained using the inheritor of BOWTrainer ). Each row of the + vocabulary is a visual word (cluster center). + */ CV_WRAP void setVocabulary( const Mat& vocabulary ); + + /** @brief Returns the set vocabulary. + */ CV_WRAP const Mat& getVocabulary() const; + + /** @brief Computes an image descriptor using the set visual vocabulary. + + @param image Image, for which the descriptor is computed. + @param keypoints Keypoints detected in the input image. + @param imgDescriptor Computed output image descriptor. + @param pointIdxsOfClusters Indices of keypoints that belong to the cluster. This means that + pointIdxsOfClusters[i] are keypoint indices that belong to the i -th cluster (word of vocabulary) + returned if it is non-zero. + @param descriptors Descriptors of the image keypoints that are returned if they are non-zero. + */ void compute( InputArray image, std::vector& keypoints, OutputArray imgDescriptor, std::vector >* pointIdxsOfClusters=0, Mat* descriptors=0 ); + /** @overload + @param keypointDescriptors Computed descriptors to match with vocabulary. + @param imgDescriptor Computed output image descriptor. + @param pointIdxsOfClusters Indices of keypoints that belong to the cluster. This means that + pointIdxsOfClusters[i] are keypoint indices that belong to the i -th cluster (word of vocabulary) + returned if it is non-zero. + */ void compute( InputArray keypointDescriptors, OutputArray imgDescriptor, std::vector >* pointIdxsOfClusters=0 ); // compute() is not constant because DescriptorMatcher::match is not constant @@ -829,7 +1257,12 @@ public: CV_WRAP_AS(compute) void compute2( const Mat& image, std::vector& keypoints, CV_OUT Mat& imgDescriptor ) { compute(image,keypoints,imgDescriptor); } + /** @brief Returns an image descriptor size if the vocabulary is set. Otherwise, it returns 0. + */ CV_WRAP int descriptorSize() const; + + /** @brief Returns an image descriptor type. + */ CV_WRAP int descriptorType() const; protected: @@ -838,6 +1271,10 @@ protected: Ptr dmatcher; }; +//! @} features2d_category + +//! @} features2d + } /* namespace cv */ #endif diff --git a/modules/flann/include/opencv2/flann.hpp b/modules/flann/include/opencv2/flann.hpp index 36ca8c7c3a..4f92d57e3e 100644 --- a/modules/flann/include/opencv2/flann.hpp +++ b/modules/flann/include/opencv2/flann.hpp @@ -47,6 +47,15 @@ #include "opencv2/flann/miniflann.hpp" #include "opencv2/flann/flann_base.hpp" +/** +@defgroup flann Clustering and Search in Multi-Dimensional Spaces + +This section documents OpenCV's interface to the FLANN library. FLANN (Fast Library for Approximate +Nearest Neighbors) is a library that contains a collection of algorithms optimized for fast nearest +neighbor search in large datasets and for high dimensional features. More information about FLANN +can be found in @cite Muja2009 . +*/ + namespace cvflann { CV_EXPORTS flann_distance_t flann_distance_type(); @@ -59,6 +68,10 @@ namespace cv namespace flann { + +//! @addtogroup flann +//! @{ + template struct CvType {}; template <> struct CvType { static int type() { return CV_8U; } }; template <> struct CvType { static int type() { return CV_8S; } }; @@ -88,7 +101,9 @@ using ::cvflann::ChiSquareDistance; using ::cvflann::KL_Divergence; - +/** @brief The FLANN nearest neighbor index class. This class is templated with the type of elements for which +the index is built. + */ template class GenericIndex { @@ -96,10 +111,108 @@ public: typedef typename Distance::ElementType ElementType; typedef typename Distance::ResultType DistanceType; + /** @brief Constructs a nearest neighbor search index for a given dataset. + + @param features Matrix of containing the features(points) to index. The size of the matrix is + num_features x feature_dimensionality and the data type of the elements in the matrix must + coincide with the type of the index. + @param params Structure containing the index parameters. The type of index that will be + constructed depends on the type of this parameter. See the description. + @param distance + + The method constructs a fast search structure from a set of features using the specified algorithm + with specified parameters, as defined by params. params is a reference to one of the following class + IndexParams descendants: + + - **LinearIndexParams** When passing an object of this type, the index will perform a linear, + brute-force search. : + @code + struct LinearIndexParams : public IndexParams + { + }; + @endcode + - **KDTreeIndexParams** When passing an object of this type the index constructed will consist of + a set of randomized kd-trees which will be searched in parallel. : + @code + struct KDTreeIndexParams : public IndexParams + { + KDTreeIndexParams( int trees = 4 ); + }; + @endcode + - **KMeansIndexParams** When passing an object of this type the index constructed will be a + hierarchical k-means tree. : + @code + struct KMeansIndexParams : public IndexParams + { + KMeansIndexParams( + int branching = 32, + int iterations = 11, + flann_centers_init_t centers_init = CENTERS_RANDOM, + float cb_index = 0.2 ); + }; + @endcode + - **CompositeIndexParams** When using a parameters object of this type the index created + combines the randomized kd-trees and the hierarchical k-means tree. : + @code + struct CompositeIndexParams : public IndexParams + { + CompositeIndexParams( + int trees = 4, + int branching = 32, + int iterations = 11, + flann_centers_init_t centers_init = CENTERS_RANDOM, + float cb_index = 0.2 ); + }; + @endcode + - **LshIndexParams** When using a parameters object of this type the index created uses + multi-probe LSH (by Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity Search + by Qin Lv, William Josephson, Zhe Wang, Moses Charikar, Kai Li., Proceedings of the 33rd + International Conference on Very Large Data Bases (VLDB). Vienna, Austria. September 2007) : + @code + struct LshIndexParams : public IndexParams + { + LshIndexParams( + unsigned int table_number, + unsigned int key_size, + unsigned int multi_probe_level ); + }; + @endcode + - **AutotunedIndexParams** When passing an object of this type the index created is + automatically tuned to offer the best performance, by choosing the optimal index type + (randomized kd-trees, hierarchical kmeans, linear) and parameters for the dataset provided. : + @code + struct AutotunedIndexParams : public IndexParams + { + AutotunedIndexParams( + float target_precision = 0.9, + float build_weight = 0.01, + float memory_weight = 0, + float sample_fraction = 0.1 ); + }; + @endcode + - **SavedIndexParams** This object type is used for loading a previously saved index from the + disk. : + @code + struct SavedIndexParams : public IndexParams + { + SavedIndexParams( String filename ); + }; + @endcode + */ GenericIndex(const Mat& features, const ::cvflann::IndexParams& params, Distance distance = Distance()); ~GenericIndex(); + /** @brief Performs a K-nearest neighbor search for a given query point using the index. + + @param query The query point + @param indices Vector that will contain the indices of the K-nearest neighbors found. It must have + at least knn size. + @param dists Vector that will contain the distances to the K-nearest neighbors found. It must have + at least knn size. + @param knn Number of nearest neighbors to search for. + @param params SearchParams + */ void knnSearch(const std::vector& query, std::vector& indices, std::vector& dists, int knn, const ::cvflann::SearchParams& params); void knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& params); @@ -123,6 +236,7 @@ private: ::cvflann::Index* nnIndex; }; +//! @cond IGNORED #define FLANN_DISTANCE_CHECK \ if ( ::cvflann::flann_distance_type() != cvflann::FLANN_DIST_L2) { \ @@ -218,6 +332,8 @@ int GenericIndex::radiusSearch(const Mat& query, Mat& indices, Mat& di return nnIndex->radiusSearch(m_query,m_indices,m_dists,radius,searchParams); } +//! @endcond + /** * @deprecated Use GenericIndex class instead */ @@ -283,6 +399,8 @@ template class FLANN_DEPRECATED Index_; #endif +//! @cond IGNORED + template Index_::Index_(const Mat& dataset, const ::cvflann::IndexParams& params) { @@ -377,7 +495,25 @@ int Index_::radiusSearch(const Mat& query, Mat& indices, Mat& dists, Distance if (nnIndex_L2) return nnIndex_L2->radiusSearch(m_query,m_indices,m_dists,radius,searchParams); } +//! @endcond + +/** @brief Clusters features using hierarchical k-means algorithm. + +@param features The points to be clustered. The matrix must have elements of type +Distance::ElementType. +@param centers The centers of the clusters obtained. The matrix must have type +Distance::ResultType. The number of rows in this matrix represents the number of clusters desired, +however, because of the way the cut in the hierarchical tree is chosen, the number of clusters +computed will be the highest number of the form (branching-1)\*k+1 that's lower than the number of +clusters desired, where branching is the tree's branching factor (see description of the +KMeansIndexParams). +@param params Parameters used in the construction of the hierarchical k-means tree. +@param d Distance to be used for clustering. +The method clusters the given feature vectors by constructing a hierarchical k-means tree and +choosing a cut in the tree that minimizes the cluster's variance. It returns the number of clusters +found. + */ template int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::KMeansIndexParams& params, Distance d = Distance()) @@ -396,7 +532,8 @@ int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::K return ::cvflann::hierarchicalClustering(m_features, m_centers, params, d); } - +/** @deprecated +*/ template FLANN_DEPRECATED int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::KMeansIndexParams& params) { @@ -417,6 +554,8 @@ FLANN_DEPRECATED int hierarchicalClustering(const Mat& features, Mat& centers, c } } +//! @} flann + } } // namespace cv::flann #endif diff --git a/modules/highgui/include/opencv2/highgui.hpp b/modules/highgui/include/opencv2/highgui.hpp index 0247cc38a0..1c06bf0787 100644 --- a/modules/highgui/include/opencv2/highgui.hpp +++ b/modules/highgui/include/opencv2/highgui.hpp @@ -47,11 +47,92 @@ #include "opencv2/imgcodecs.hpp" #include "opencv2/videoio.hpp" +/** +@defgroup highgui High-level GUI + +While OpenCV was designed for use in full-scale applications and can be used within functionally +rich UI frameworks (such as Qt\*, WinForms\*, or Cocoa\*) or without any UI at all, sometimes there +it is required to try functionality quickly and visualize the results. This is what the HighGUI +module has been designed for. + +It provides easy interface to: + +- Create and manipulate windows that can display images and "remember" their content (no need to + handle repaint events from OS). +- Add trackbars to the windows, handle simple mouse events as well as keyboard commands. + +@{ + @defgroup highgui_opengl OpenGL support + @defgroup highgui_qt Qt New Functions + + ![image](pics/qtgui.png) + + This figure explains new functionality implemented with Qt\* GUI. The new GUI provides a statusbar, + a toolbar, and a control panel. The control panel can have trackbars and buttonbars attached to it. + If you cannot see the control panel, press Ctrl+P or right-click any Qt window and select **Display + properties window**. + + - To attach a trackbar, the window name parameter must be NULL. + + - To attach a buttonbar, a button must be created. If the last bar attached to the control panel + is a buttonbar, the new button is added to the right of the last button. If the last bar + attached to the control panel is a trackbar, or the control panel is empty, a new buttonbar is + created. Then, a new button is attached to it. + + See below the example used to generate the figure: : + @code + int main(int argc, char *argv[]) + int value = 50; + int value2 = 0; + + cvNamedWindow("main1",CV_WINDOW_NORMAL); + cvNamedWindow("main2",CV_WINDOW_AUTOSIZE | CV_GUI_NORMAL); + + cvCreateTrackbar( "track1", "main1", &value, 255, NULL);//OK tested + char* nameb1 = "button1"; + char* nameb2 = "button2"; + cvCreateButton(nameb1,callbackButton,nameb1,CV_CHECKBOX,1); + + cvCreateButton(nameb2,callbackButton,nameb2,CV_CHECKBOX,0); + cvCreateTrackbar( "track2", NULL, &value2, 255, NULL); + cvCreateButton("button5",callbackButton1,NULL,CV_RADIOBOX,0); + cvCreateButton("button6",callbackButton2,NULL,CV_RADIOBOX,1); + + cvSetMouseCallback( "main2",on_mouse,NULL ); + + IplImage* img1 = cvLoadImage("files/flower.jpg"); + IplImage* img2 = cvCreateImage(cvGetSize(img1),8,3); + CvCapture* video = cvCaptureFromFile("files/hockey.avi"); + IplImage* img3 = cvCreateImage(cvGetSize(cvQueryFrame(video)),8,3); + + while(cvWaitKey(33) != 27) + { + cvAddS(img1,cvScalarAll(value),img2); + cvAddS(cvQueryFrame(video),cvScalarAll(value2),img3); + cvShowImage("main1",img2); + cvShowImage("main2",img3); + } + + cvDestroyAllWindows(); + cvReleaseImage(&img1); + cvReleaseImage(&img2); + cvReleaseImage(&img3); + cvReleaseCapture(&video); + return 0; + } + @endcode + + @defgroup highgui_c C API +@} +*/ ///////////////////////// graphical user interface ////////////////////////// namespace cv { +//! @addtogroup highgui +//! @{ + // Flags for namedWindow enum { WINDOW_NORMAL = 0x00000000, // the user can resize the window (no constraint) / also use to switch a fullscreen window to a normal size WINDOW_AUTOSIZE = 0x00000001, // the user cannot resize the window, the size is constrainted by the image displayed @@ -117,54 +198,334 @@ typedef void (*TrackbarCallback)(int pos, void* userdata); typedef void (*OpenGlDrawCallback)(void* userdata); typedef void (*ButtonCallback)(int state, void* userdata); +/** @brief Creates a window. + +@param winname Name of the window in the window caption that may be used as a window identifier. +@param flags Flags of the window. The supported flags are: +> - **WINDOW_NORMAL** If this is set, the user can resize the window (no constraint). +> - **WINDOW_AUTOSIZE** If this is set, the window size is automatically adjusted to fit the +> displayed image (see imshow ), and you cannot change the window size manually. +> - **WINDOW_OPENGL** If this is set, the window will be created with OpenGL support. + +The function namedWindow creates a window that can be used as a placeholder for images and +trackbars. Created windows are referred to by their names. + +If a window with the same name already exists, the function does nothing. + +You can call destroyWindow or destroyAllWindows to close the window and de-allocate any associated +memory usage. For a simple program, you do not really have to call these functions because all the +resources and windows of the application are closed automatically by the operating system upon exit. +@note + +Qt backend supports additional flags: + - **CV_WINDOW_NORMAL or CV_WINDOW_AUTOSIZE:** CV_WINDOW_NORMAL enables you to resize the + window, whereas CV_WINDOW_AUTOSIZE adjusts automatically the window size to fit the + displayed image (see imshow ), and you cannot change the window size manually. + - **CV_WINDOW_FREERATIO or CV_WINDOW_KEEPRATIO:** CV_WINDOW_FREERATIO adjusts the image + with no respect to its ratio, whereas CV_WINDOW_KEEPRATIO keeps the image ratio. + - **CV_GUI_NORMAL or CV_GUI_EXPANDED:** CV_GUI_NORMAL is the old way to draw the window + without statusbar and toolbar, whereas CV_GUI_EXPANDED is a new enhanced GUI. +By default, flags == CV_WINDOW_AUTOSIZE | CV_WINDOW_KEEPRATIO | CV_GUI_EXPANDED + */ CV_EXPORTS_W void namedWindow(const String& winname, int flags = WINDOW_AUTOSIZE); +/** @brief Destroys a window. + +@param winname Name of the window to be destroyed. + +The function destroyWindow destroys the window with the given name. + */ CV_EXPORTS_W void destroyWindow(const String& winname); +/** @brief Destroys all of the HighGUI windows. + +The function destroyAllWindows destroys all of the opened HighGUI windows. + */ CV_EXPORTS_W void destroyAllWindows(); CV_EXPORTS_W int startWindowThread(); +/** @brief Waits for a pressed key. + +@param delay Delay in milliseconds. 0 is the special value that means "forever". + +The function waitKey waits for a key event infinitely (when \f$\texttt{delay}\leq 0\f$ ) or for delay +milliseconds, when it is positive. Since the OS has a minimum time between switching threads, the +function will not wait exactly delay ms, it will wait at least delay ms, depending on what else is +running on your computer at that time. It returns the code of the pressed key or -1 if no key was +pressed before the specified time had elapsed. + +@note + +This function is the only method in HighGUI that can fetch and handle events, so it needs to be +called periodically for normal event processing unless HighGUI is used within an environment that +takes care of event processing. + +@note + +The function only works if there is at least one HighGUI window created and the window is active. +If there are several HighGUI windows, any of them can be active. + */ CV_EXPORTS_W int waitKey(int delay = 0); +/** @brief Displays an image in the specified window. + +@param winname Name of the window. +@param mat Image to be shown. + +The function imshow displays an image in the specified window. If the window was created with the +CV_WINDOW_AUTOSIZE flag, the image is shown with its original size. Otherwise, the image is scaled +to fit the window. The function may scale the image, depending on its depth: + +- If the image is 8-bit unsigned, it is displayed as is. +- If the image is 16-bit unsigned or 32-bit integer, the pixels are divided by 256. That is, the + value range [0,255\*256] is mapped to [0,255]. +- If the image is 32-bit floating-point, the pixel values are multiplied by 255. That is, the + value range [0,1] is mapped to [0,255]. + +If window was created with OpenGL support, imshow also support ogl::Buffer , ogl::Texture2D and +cuda::GpuMat as input. + +@note This function should be followed by waitKey function which displays the image for specified +milliseconds. Otherwise, it won't display the image. For example, waitKey(0) will display the window +infinitely until any keypress (it is suitable for image display). waitKey(25) will display a frame +for 25 ms, after which display will be automatically closed. (If you put it in a loop to read +videos, it will display the video frame-by-frame) + +@note + +[Windows Backend Only] Pressing Ctrl+C will copy the image to the clipboard. + + */ CV_EXPORTS_W void imshow(const String& winname, InputArray mat); +/** @brief Resizes window to the specified size + +@param winname Window name +@param width The new window width +@param height The new window height + +@note + +- The specified window size is for the image area. Toolbars are not counted. +- Only windows created without CV_WINDOW_AUTOSIZE flag can be resized. + */ CV_EXPORTS_W void resizeWindow(const String& winname, int width, int height); +/** @brief Moves window to the specified position + +@param winname Window name +@param x The new x-coordinate of the window +@param y The new y-coordinate of the window + */ CV_EXPORTS_W void moveWindow(const String& winname, int x, int y); +/** @brief Changes parameters of a window dynamically. + +@param winname Name of the window. +@param prop_id Window property to edit. The following operation flags are available: + - **CV_WND_PROP_FULLSCREEN** Change if the window is fullscreen ( CV_WINDOW_NORMAL or + CV_WINDOW_FULLSCREEN ). + - **CV_WND_PROP_AUTOSIZE** Change if the window is resizable (CV_WINDOW_NORMAL or + CV_WINDOW_AUTOSIZE ). + - **CV_WND_PROP_ASPECTRATIO** Change if the aspect ratio of the image is preserved ( + CV_WINDOW_FREERATIO or CV_WINDOW_KEEPRATIO ). +@param prop_value New value of the window property. The following operation flags are available: + - **CV_WINDOW_NORMAL** Change the window to normal size or make the window resizable. + - **CV_WINDOW_AUTOSIZE** Constrain the size by the displayed image. The window is not + resizable. + - **CV_WINDOW_FULLSCREEN** Change the window to fullscreen. + - **CV_WINDOW_FREERATIO** Make the window resizable without any ratio constraints. + - **CV_WINDOW_KEEPRATIO** Make the window resizable, but preserve the proportions of the + displayed image. + +The function setWindowProperty enables changing properties of a window. + */ CV_EXPORTS_W void setWindowProperty(const String& winname, int prop_id, double prop_value); +/** @brief Updates window title +*/ CV_EXPORTS_W void setWindowTitle(const String& winname, const String& title); +/** @brief Provides parameters of a window. + +@param winname Name of the window. +@param prop_id Window property to retrieve. The following operation flags are available: + - **CV_WND_PROP_FULLSCREEN** Change if the window is fullscreen ( CV_WINDOW_NORMAL or + CV_WINDOW_FULLSCREEN ). + - **CV_WND_PROP_AUTOSIZE** Change if the window is resizable (CV_WINDOW_NORMAL or + CV_WINDOW_AUTOSIZE ). + - **CV_WND_PROP_ASPECTRATIO** Change if the aspect ratio of the image is preserved + (CV_WINDOW_FREERATIO or CV_WINDOW_KEEPRATIO ). + +See setWindowProperty to know the meaning of the returned values. + +The function getWindowProperty returns properties of a window. + */ CV_EXPORTS_W double getWindowProperty(const String& winname, int prop_id); -//! assigns callback for mouse events +/** @brief Sets mouse handler for the specified window + +@param winname Window name +@param onMouse Mouse callback. See OpenCV samples, such as +, on how to specify and +use the callback. +@param userdata The optional parameter passed to the callback. + */ CV_EXPORTS void setMouseCallback(const String& winname, MouseCallback onMouse, void* userdata = 0); +/** @brief Gets the mouse-wheel motion delta, when handling mouse-wheel events EVENT_MOUSEWHEEL and +EVENT_MOUSEHWHEEL. + +@param flags The mouse callback flags parameter. + +For regular mice with a scroll-wheel, delta will be a multiple of 120. The value 120 corresponds to +a one notch rotation of the wheel or the threshold for action to be taken and one such action should +occur for each delta. Some high-precision mice with higher-resolution freely-rotating wheels may +generate smaller values. + +For EVENT_MOUSEWHEEL positive and negative values mean forward and backward scrolling, +respectively. For EVENT_MOUSEHWHEEL, where available, positive and negative values mean right and +left scrolling, respectively. + +With the C API, the macro CV_GET_WHEEL_DELTA(flags) can be used alternatively. + +@note + +Mouse-wheel events are currently supported only on Windows. + */ CV_EXPORTS int getMouseWheelDelta(int flags); +/** @brief Creates a trackbar and attaches it to the specified window. + +@param trackbarname Name of the created trackbar. +@param winname Name of the window that will be used as a parent of the created trackbar. +@param value Optional pointer to an integer variable whose value reflects the position of the +slider. Upon creation, the slider position is defined by this variable. +@param count Maximal position of the slider. The minimal position is always 0. +@param onChange Pointer to the function to be called every time the slider changes position. This +function should be prototyped as void Foo(int,void\*); , where the first parameter is the trackbar +position and the second parameter is the user data (see the next parameter). If the callback is +the NULL pointer, no callbacks are called, but only value is updated. +@param userdata User data that is passed as is to the callback. It can be used to handle trackbar +events without using global variables. + +The function createTrackbar creates a trackbar (a slider or range control) with the specified name +and range, assigns a variable value to be a position synchronized with the trackbar and specifies +the callback function onChange to be called on the trackbar position change. The created trackbar is +displayed in the specified window winname. + +@note + +**[Qt Backend Only]** winname can be empty (or NULL) if the trackbar should be attached to the +control panel. + +Clicking the label of each trackbar enables editing the trackbar values manually. + +@note + +- An example of using the trackbar functionality can be found at + opencv_source_code/samples/cpp/connected_components.cpp + */ CV_EXPORTS int createTrackbar(const String& trackbarname, const String& winname, int* value, int count, TrackbarCallback onChange = 0, void* userdata = 0); +/** @brief Returns the trackbar position. + +@param trackbarname Name of the trackbar. +@param winname Name of the window that is the parent of the trackbar. + +The function returns the current position of the specified trackbar. + +@note + +**[Qt Backend Only]** winname can be empty (or NULL) if the trackbar is attached to the control +panel. + + */ CV_EXPORTS_W int getTrackbarPos(const String& trackbarname, const String& winname); +/** @brief Sets the trackbar position. + +@param trackbarname Name of the trackbar. +@param winname Name of the window that is the parent of trackbar. +@param pos New position. + +The function sets the position of the specified trackbar in the specified window. + +@note + +**[Qt Backend Only]** winname can be empty (or NULL) if the trackbar is attached to the control +panel. + */ CV_EXPORTS_W void setTrackbarPos(const String& trackbarname, const String& winname, int pos); +//! @addtogroup highgui_opengl OpenGL support +//! @{ -// OpenGL support CV_EXPORTS void imshow(const String& winname, const ogl::Texture2D& tex); +/** @brief Sets a callback function to be called to draw on top of displayed image. + +@param winname Name of the window. +@param onOpenGlDraw Pointer to the function to be called every frame. This function should be +prototyped as void Foo(void\*) . +@param userdata Pointer passed to the callback function. *(Optional)* + +The function setOpenGlDrawCallback can be used to draw 3D data on the window. See the example of +callback function below: : +@code + void on_opengl(void* param) + { + glLoadIdentity(); + + glTranslated(0.0, 0.0, -1.0); + + glRotatef( 55, 1, 0, 0 ); + glRotatef( 45, 0, 1, 0 ); + glRotatef( 0, 0, 0, 1 ); + + static const int coords[6][4][3] = { + { { +1, -1, -1 }, { -1, -1, -1 }, { -1, +1, -1 }, { +1, +1, -1 } }, + { { +1, +1, -1 }, { -1, +1, -1 }, { -1, +1, +1 }, { +1, +1, +1 } }, + { { +1, -1, +1 }, { +1, -1, -1 }, { +1, +1, -1 }, { +1, +1, +1 } }, + { { -1, -1, -1 }, { -1, -1, +1 }, { -1, +1, +1 }, { -1, +1, -1 } }, + { { +1, -1, +1 }, { -1, -1, +1 }, { -1, -1, -1 }, { +1, -1, -1 } }, + { { -1, -1, +1 }, { +1, -1, +1 }, { +1, +1, +1 }, { -1, +1, +1 } } + }; + + for (int i = 0; i < 6; ++i) { + glColor3ub( i*20, 100+i*10, i*42 ); + glBegin(GL_QUADS); + for (int j = 0; j < 4; ++j) { + glVertex3d(0.2 * coords[i][j][0], 0.2 * coords[i][j][1], 0.2 * coords[i][j][2]); + } + glEnd(); + } + } +@endcode + */ CV_EXPORTS void setOpenGlDrawCallback(const String& winname, OpenGlDrawCallback onOpenGlDraw, void* userdata = 0); +/** @brief Sets the specified window as current OpenGL context. + +@param winname Window name + */ CV_EXPORTS void setOpenGlContext(const String& winname); +/** @brief Force window to redraw its context and call draw callback ( setOpenGlDrawCallback ). + +@param winname Window name + */ CV_EXPORTS void updateWindow(const String& winname); +//! @} highgui_opengl +//! @addtogroup highgui_qt +//! @{ // Only for Qt struct QtFont @@ -182,27 +543,138 @@ struct QtFont int line_type; // Qt: PointSize }; +/** @brief Creates the font to draw a text on an image. + +@param nameFont Name of the font. The name should match the name of a system font (such as +*Times*). If the font is not found, a default one is used. +@param pointSize Size of the font. If not specified, equal zero or negative, the point size of the +font is set to a system-dependent default value. Generally, this is 12 points. +@param color Color of the font in BGRA where A = 255 is fully transparent. Use the macro CV _ RGB +for simplicity. +@param weight Font weight. The following operation flags are available: + - **CV_FONT_LIGHT** Weight of 25 + - **CV_FONT_NORMAL** Weight of 50 + - **CV_FONT_DEMIBOLD** Weight of 63 + - **CV_FONT_BOLD** Weight of 75 + - **CV_FONT_BLACK** Weight of 87 + + You can also specify a positive integer for better control. +@param style Font style. The following operation flags are available: + - **CV_STYLE_NORMAL** Normal font + - **CV_STYLE_ITALIC** Italic font + - **CV_STYLE_OBLIQUE** Oblique font +@param spacing Spacing between characters. It can be negative or positive. + +The function fontQt creates a CvFont object. This CvFont is not compatible with putText . + +A basic usage of this function is the following: : +@code + CvFont font = fontQt(''Times''); + addText( img1, ``Hello World !'', Point(50,50), font); +@endcode + */ CV_EXPORTS QtFont fontQt(const String& nameFont, int pointSize = -1, Scalar color = Scalar::all(0), int weight = QT_FONT_NORMAL, int style = QT_STYLE_NORMAL, int spacing = 0); +/** @brief Creates the font to draw a text on an image. + +@param img 8-bit 3-channel image where the text should be drawn. +@param text Text to write on an image. +@param org Point(x,y) where the text should start on an image. +@param font Font to use to draw a text. + +The function addText draws *text* on an image *img* using a specific font *font* (see example fontQt +) + */ CV_EXPORTS void addText( const Mat& img, const String& text, Point org, const QtFont& font); +/** @brief Displays a text on a window image as an overlay for a specified duration. + +@param winname Name of the window. +@param text Overlay text to write on a window image. +@param delayms The period (in milliseconds), during which the overlay text is displayed. If this +function is called before the previous overlay text timed out, the timer is restarted and the text +is updated. If this value is zero, the text never disappears. + +The function displayOverlay displays useful information/tips on top of the window for a certain +amount of time *delayms*. The function does not modify the image, displayed in the window, that is, +after the specified delay the original content of the window is restored. + */ CV_EXPORTS void displayOverlay(const String& winname, const String& text, int delayms = 0); +/** @brief Displays a text on the window statusbar during the specified period of time. + +@param winname Name of the window. +@param text Text to write on the window statusbar. +@param delayms Duration (in milliseconds) to display the text. If this function is called before +the previous text timed out, the timer is restarted and the text is updated. If this value is +zero, the text never disappears. + +The function displayOverlay displays useful information/tips on top of the window for a certain +amount of time *delayms* . This information is displayed on the window statusbar (the window must be +created with the CV_GUI_EXPANDED flags). + */ CV_EXPORTS void displayStatusBar(const String& winname, const String& text, int delayms = 0); +/** @brief Saves parameters of the specified window. + +@param windowName Name of the window. + +The function saveWindowParameters saves size, location, flags, trackbars value, zoom and panning +location of the window window_name . + */ CV_EXPORTS void saveWindowParameters(const String& windowName); +/** @brief Loads parameters of the specified window. + +@param windowName Name of the window. + +The function loadWindowParameters loads size, location, flags, trackbars value, zoom and panning +location of the window window_name . + */ CV_EXPORTS void loadWindowParameters(const String& windowName); CV_EXPORTS int startLoop(int (*pt2Func)(int argc, char *argv[]), int argc, char* argv[]); CV_EXPORTS void stopLoop(); +/** @brief Attaches a button to the control panel. + +@param bar_name + Name of the button. +@param on_change Pointer to the function to be called every time the button changes its state. +This function should be prototyped as void Foo(int state,\*void); . *state* is the current state +of the button. It could be -1 for a push button, 0 or 1 for a check/radio box button. +@param userdata Pointer passed to the callback function. +@param type Optional type of the button. + - **CV_PUSH_BUTTON** Push button + - **CV_CHECKBOX** Checkbox button + - **CV_RADIOBOX** Radiobox button. The radiobox on the same buttonbar (same line) are + exclusive, that is only one can be selected at a time. +@param initial_button_state Default state of the button. Use for checkbox and radiobox. Its +value could be 0 or 1. *(Optional)* + +The function createButton attaches a button to the control panel. Each button is added to a +buttonbar to the right of the last button. A new buttonbar is created if nothing was attached to the +control panel before, or if the last element attached to the control panel was a trackbar. + +See below various examples of the createButton function call: : +@code + createButton(NULL,callbackButton);//create a push button "button 0", that will call callbackButton. + createButton("button2",callbackButton,NULL,CV_CHECKBOX,0); + createButton("button3",callbackButton,&value); + createButton("button5",callbackButton1,NULL,CV_RADIOBOX); + createButton("button6",callbackButton2,NULL,CV_PUSH_BUTTON,1); +@endcode +*/ CV_EXPORTS int createButton( const String& bar_name, ButtonCallback on_change, void* userdata = 0, int type = QT_PUSH_BUTTON, bool initial_button_state = false); +//! @} highgui_qt + +//! @} highgui + } // cv #endif diff --git a/modules/highgui/include/opencv2/highgui/highgui_c.h b/modules/highgui/include/opencv2/highgui/highgui_c.h index 13849e2540..a8780ade06 100644 --- a/modules/highgui/include/opencv2/highgui/highgui_c.h +++ b/modules/highgui/include/opencv2/highgui/highgui_c.h @@ -51,6 +51,10 @@ extern "C" { #endif /* __cplusplus */ +/** @addtogroup highgui_c + @{ + */ + /****************************************************************************************\ * Basic GUI functions * \****************************************************************************************/ @@ -237,6 +241,8 @@ CVAPI(void) cvSetPostprocessFuncWin32_(const void* callback); #endif +/** @} highgui_c */ + #ifdef __cplusplus } #endif diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp index fd5c08a933..f8c6900b26 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp +++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp @@ -45,10 +45,21 @@ #include "opencv2/core.hpp" +/** + @defgroup imgcodecs Image file reading and writing + @{ + @defgroup imgcodecs_c C API + @defgroup imgcodecs_ios iOS glue + @} +*/ + //////////////////////////////// image codec //////////////////////////////// namespace cv { +//! @addtogroup imgcodecs +//! @{ + enum { IMREAD_UNCHANGED = -1, // 8bit, color or not IMREAD_GRAYSCALE = 0, // 8bit, gray IMREAD_COLOR = 1, // ?, color @@ -77,19 +88,166 @@ enum { IMWRITE_PNG_STRATEGY_DEFAULT = 0, IMWRITE_PNG_STRATEGY_FIXED = 4 }; +/** @brief Loads an image from a file. + +@param filename Name of file to be loaded. +@param flags Flags specifying the color type of a loaded image: +- CV_LOAD_IMAGE_ANYDEPTH - If set, return 16-bit/32-bit image when the input has the + corresponding depth, otherwise convert it to 8-bit. +- CV_LOAD_IMAGE_COLOR - If set, always convert image to the color one +- CV_LOAD_IMAGE_GRAYSCALE - If set, always convert image to the grayscale one +- **\>0** Return a 3-channel color image. + +@note In the current implementation the alpha channel, if any, is stripped from the output image. +Use negative value if you need the alpha channel. + +- **=0** Return a grayscale image. +- **\<0** Return the loaded image as is (with alpha channel). + +The function imread loads an image from the specified file and returns it. If the image cannot be +read (because of missing file, improper permissions, unsupported or invalid format), the function +returns an empty matrix ( Mat::data==NULL ). Currently, the following file formats are supported: + +- Windows bitmaps - \*.bmp, \*.dib (always supported) +- JPEG files - \*.jpeg, \*.jpg, \*.jpe (see the *Notes* section) +- JPEG 2000 files - \*.jp2 (see the *Notes* section) +- Portable Network Graphics - \*.png (see the *Notes* section) +- WebP - \*.webp (see the *Notes* section) +- Portable image format - \*.pbm, \*.pgm, \*.ppm (always supported) +- Sun rasters - \*.sr, \*.ras (always supported) +- TIFF files - \*.tiff, \*.tif (see the *Notes* section) + +@note + +- The function determines the type of an image by the content, not by the file extension. +- On Microsoft Windows\* OS and MacOSX\*, the codecs shipped with an OpenCV image (libjpeg, + libpng, libtiff, and libjasper) are used by default. So, OpenCV can always read JPEGs, PNGs, + and TIFFs. On MacOSX, there is also an option to use native MacOSX image readers. But beware + that currently these native image loaders give images with different pixel values because of + the color management embedded into MacOSX. +- On Linux\*, BSD flavors and other Unix-like open-source operating systems, OpenCV looks for + codecs supplied with an OS image. Install the relevant packages (do not forget the development + files, for example, "libjpeg-dev", in Debian\* and Ubuntu\*) to get the codec support or turn + on the OPENCV_BUILD_3RDPARTY_LIBS flag in CMake. + +@note In the case of color images, the decoded images will have the channels stored in B G R order. + */ CV_EXPORTS_W Mat imread( const String& filename, int flags = IMREAD_COLOR ); +/** @brief Saves an image to a specified file. + +@param filename Name of the file. +@param img Image to be saved. +@param params Format-specific save parameters encoded as pairs +paramId_1, paramValue_1, paramId_2, paramValue_2, ... . The following parameters are currently +supported: +- For JPEG, it can be a quality ( CV_IMWRITE_JPEG_QUALITY ) from 0 to 100 (the higher is + the better). Default value is 95. +- For WEBP, it can be a quality ( CV_IMWRITE_WEBP_QUALITY ) from 1 to 100 (the higher is + the better). By default (without any parameter) and for quality above 100 the lossless + compression is used. +- For PNG, it can be the compression level ( CV_IMWRITE_PNG_COMPRESSION ) from 0 to 9. A + higher value means a smaller size and longer compression time. Default value is 3. +- For PPM, PGM, or PBM, it can be a binary format flag ( CV_IMWRITE_PXM_BINARY ), 0 or 1. + Default value is 1. + +The function imwrite saves the image to the specified file. The image format is chosen based on the +filename extension (see imread for the list of extensions). Only 8-bit (or 16-bit unsigned (CV_16U) +in case of PNG, JPEG 2000, and TIFF) single-channel or 3-channel (with 'BGR' channel order) images +can be saved using this function. If the format, depth or channel order is different, use +Mat::convertTo , and cvtColor to convert it before saving. Or, use the universal FileStorage I/O +functions to save the image to XML or YAML format. + +It is possible to store PNG images with an alpha channel using this function. To do this, create +8-bit (or 16-bit) 4-channel image BGRA, where the alpha channel goes last. Fully transparent pixels +should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535. The sample below +shows how to create such a BGRA image and store to PNG file. It also demonstrates how to set custom +compression parameters : +@code + #include + #include + #include + + using namespace cv; + using namespace std; + + void createAlphaMat(Mat &mat) + { + for (int i = 0; i < mat.rows; ++i) { + for (int j = 0; j < mat.cols; ++j) { + Vec4b& rgba = mat.at(i, j); + rgba[0] = UCHAR_MAX; + rgba[1] = saturate_cast((float (mat.cols - j)) / ((float)mat.cols) * UCHAR_MAX); + rgba[2] = saturate_cast((float (mat.rows - i)) / ((float)mat.rows) * UCHAR_MAX); + rgba[3] = saturate_cast(0.5 * (rgba[1] + rgba[2])); + } + } + } + + int main(int argv, char **argc) + { + // Create mat with alpha channel + Mat mat(480, 640, CV_8UC4); + createAlphaMat(mat); + + vector compression_params; + compression_params.push_back(CV_IMWRITE_PNG_COMPRESSION); + compression_params.push_back(9); + + try { + imwrite("alpha.png", mat, compression_params); + } + catch (runtime_error& ex) { + fprintf(stderr, "Exception converting image to PNG format: %s\n", ex.what()); + return 1; + } + + fprintf(stdout, "Saved PNG file with alpha data.\n"); + return 0; + } +@endcode + */ CV_EXPORTS_W bool imwrite( const String& filename, InputArray img, const std::vector& params = std::vector()); +/** @overload */ CV_EXPORTS_W Mat imdecode( InputArray buf, int flags ); +/** @brief Reads an image from a buffer in memory. + +@param buf Input array or vector of bytes. +@param flags The same flags as in imread . +@param dst The optional output placeholder for the decoded matrix. It can save the image +reallocations when the function is called repeatedly for images of the same size. + +The function reads an image from the specified buffer in the memory. If the buffer is too short or +contains invalid data, the empty matrix/image is returned. + +See imread for the list of supported formats and flags description. + +@note In the case of color images, the decoded images will have the channels stored in B G R order. + */ CV_EXPORTS Mat imdecode( InputArray buf, int flags, Mat* dst); +/** @brief Encodes an image into a memory buffer. + +@param ext File extension that defines the output format. +@param img Image to be written. +@param buf Output buffer resized to fit the compressed image. +@param params Format-specific parameters. See imwrite . + +The function compresses the image and stores it in the memory buffer that is resized to fit the +result. See imwrite for the list of supported formats and flags description. + +@note cvEncodeImage returns single-row matrix of type CV_8UC1 that contains encoded image as array +of bytes. + */ CV_EXPORTS_W bool imencode( const String& ext, InputArray img, CV_OUT std::vector& buf, const std::vector& params = std::vector()); +//! @} imgcodecs + } // cv #endif //__OPENCV_IMGCODECS_HPP__ diff --git a/modules/imgcodecs/include/opencv2/imgcodecs/imgcodecs_c.h b/modules/imgcodecs/include/opencv2/imgcodecs/imgcodecs_c.h index ccd29a7c1c..ad793cc94a 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs/imgcodecs_c.h +++ b/modules/imgcodecs/include/opencv2/imgcodecs/imgcodecs_c.h @@ -48,6 +48,10 @@ extern "C" { #endif /* __cplusplus */ +/** @addtogroup imgcodecs_c + @{ + */ + enum { /* 8bit, color or not */ @@ -124,6 +128,7 @@ CVAPI(int) cvHaveImageWriter(const char* filename); #define cvvSaveImage cvSaveImage #define cvvConvertImage cvConvertImage +/** @} imgcodecs_c */ #ifdef __cplusplus } diff --git a/modules/imgcodecs/include/opencv2/imgcodecs/ios.h b/modules/imgcodecs/include/opencv2/imgcodecs/ios.h index 8ec1356053..fbd6371e58 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs/ios.h +++ b/modules/imgcodecs/include/opencv2/imgcodecs/ios.h @@ -47,6 +47,11 @@ #import #include "opencv2/core/core.hpp" +//! @addtogroup imgcodecs_ios +//! @{ + UIImage* MatToUIImage(const cv::Mat& image); void UIImageToMat(const UIImage* image, cv::Mat& m, bool alphaExist = false); + +//! @} diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index f416c99e4d..1d02506e92 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -969,7 +969,7 @@ An example using the LineSegmentDetector /** @brief Line segment detector class -following the algorithm described at @cite Rafael12. +following the algorithm described at @cite Rafael12 . */ class CV_EXPORTS_W LineSegmentDetector : public Algorithm { @@ -1361,7 +1361,7 @@ call is equivalent to -\f[\texttt{Sobel(src, dst, ddepth, dx, dy, CV_SCHARR, scale, delta, borderType)} .\f] +\f[\texttt{Sobel(src, dst, ddepth, dx, dy, CV\_SCHARR, scale, delta, borderType)} .\f] @param src input image. @param dst output image of the same size and the same number of channels as src. @@ -1418,7 +1418,7 @@ CV_EXPORTS_W void Laplacian( InputArray src, OutputArray dst, int ddepth, An example on using the canny edge detector */ -/** @brief Finds edges in an image using the Canny algorithm @cite Canny86. +/** @brief Finds edges in an image using the Canny algorithm @cite Canny86 . The function finds edges in the input image image and marks them in the output map edges using the Canny algorithm. The smallest value between threshold1 and threshold2 is used for edge linking. The @@ -2940,7 +2940,7 @@ An example using the watershed algorithm /** @brief Performs a marker-based image segmentation using the watershed algorithm. The function implements one of the variants of watershed, non-parametric marker-based segmentation -algorithm, described in @cite Meyer92. +algorithm, described in @cite Meyer92 . Before passing the image to the function, you have to roughly outline the desired regions in the image markers with positive (\>0) indices. So, every region is represented as one or more connected @@ -3050,7 +3050,7 @@ The functions distanceTransform calculate the approximate or precise distance fr image pixel to the nearest zero pixel. For zero image pixels, the distance will obviously be zero. When maskSize == DIST_MASK_PRECISE and distanceType == DIST_L2 , the function runs the -algorithm described in @cite Felzenszwalb04. This algorithm is parallelized with the TBB library. +algorithm described in @cite Felzenszwalb04 . This algorithm is parallelized with the TBB library. In other cases, the algorithm @cite Borgefors86 is used. This means that for a pixel the function finds the shortest path to the nearest zero pixel consisting of basic shifts: horizontal, vertical, @@ -3371,7 +3371,7 @@ CV_EXPORTS_W int connectedComponentsWithStats(InputArray image, OutputArray labe /** @brief Finds contours in a binary image. -The function retrieves contours from the binary image using the algorithm @cite Suzuki85. The contours +The function retrieves contours from the binary image using the algorithm @cite Suzuki85 . The contours are a useful tool for shape analysis and object detection and recognition. See squares.c in the OpenCV sample directory. diff --git a/modules/ml/include/opencv2/ml.hpp b/modules/ml/include/opencv2/ml.hpp index 07623cb86e..5e633c4d0a 100644 --- a/modules/ml/include/opencv2/ml.hpp +++ b/modules/ml/include/opencv2/ml.hpp @@ -54,12 +54,468 @@ #include #include +/** + @defgroup ml Machine Learning + @{ +@defgroup ml_stat Statistical Models +@defgroup ml_bayes Normal Bayes Classifier + +This simple classification model assumes that feature vectors from each class are normally +distributed (though, not necessarily independently distributed). So, the whole data distribution +function is assumed to be a Gaussian mixture, one component per class. Using the training data the +algorithm estimates mean vectors and covariance matrices for every class, and then it uses them for +prediction. + +@defgroup ml_knearest K-Nearest Neighbors + +The algorithm caches all training samples and predicts the response for a new sample by analyzing a +certain number (**K**) of the nearest neighbors of the sample using voting, calculating weighted +sum, and so on. The method is sometimes referred to as "learning by example" because for prediction +it looks for the feature vector with a known response that is closest to the given vector. + +@defgroup ml_svm Support Vector Machines + +Originally, support vector machines (SVM) was a technique for building an optimal binary (2-class) +classifier. Later the technique was extended to regression and clustering problems. SVM is a partial +case of kernel-based methods. It maps feature vectors into a higher-dimensional space using a kernel +function and builds an optimal linear discriminating function in this space or an optimal +hyper-plane that fits into the training data. In case of SVM, the kernel is not defined explicitly. +Instead, a distance between any 2 points in the hyper-space needs to be defined. + +The solution is optimal, which means that the margin between the separating hyper-plane and the +nearest feature vectors from both classes (in case of 2-class classifier) is maximal. The feature +vectors that are the closest to the hyper-plane are called *support vectors*, which means that the +position of other vectors does not affect the hyper-plane (the decision function). + +SVM implementation in OpenCV is based on @cite LibSVM . + +Prediction with SVM +------------------- + +StatModel::predict(samples, results, flags) should be used. Pass flags=StatModel::RAW_OUTPUT to get +the raw response from SVM (in the case of regression, 1-class or 2-class classification problem). + +@defgroup ml_decsiontrees Decision Trees + +The ML classes discussed in this section implement Classification and Regression Tree algorithms +described in @cite Breiman84 . + +The class cv::ml::DTrees represents a single decision tree or a collection of decision trees. It's +also a base class for RTrees and Boost. + +A decision tree is a binary tree (tree where each non-leaf node has two child nodes). It can be used +either for classification or for regression. For classification, each tree leaf is marked with a +class label; multiple leaves may have the same label. For regression, a constant is also assigned to +each tree leaf, so the approximation function is piecewise constant. + +Predicting with Decision Trees +------------------------------ + +To reach a leaf node and to obtain a response for the input feature vector, the prediction procedure +starts with the root node. From each non-leaf node the procedure goes to the left (selects the left +child node as the next observed node) or to the right based on the value of a certain variable whose +index is stored in the observed node. The following variables are possible: + +- **Ordered variables.** The variable value is compared with a threshold that is also stored in + the node. If the value is less than the threshold, the procedure goes to the left. Otherwise, it + goes to the right. For example, if the weight is less than 1 kilogram, the procedure goes to the + left, else to the right. + +- **Categorical variables.** A discrete variable value is tested to see whether it belongs to a + certain subset of values (also stored in the node) from a limited set of values the variable + could take. If it does, the procedure goes to the left. Otherwise, it goes to the right. For + example, if the color is green or red, go to the left, else to the right. + +So, in each node, a pair of entities (variable_index , `decision_rule (threshold/subset)` ) is +used. This pair is called a *split* (split on the variable variable_index ). Once a leaf node is +reached, the value assigned to this node is used as the output of the prediction procedure. + +Sometimes, certain features of the input vector are missed (for example, in the darkness it is +difficult to determine the object color), and the prediction procedure may get stuck in the certain +node (in the mentioned example, if the node is split by color). To avoid such situations, decision +trees use so-called *surrogate splits*. That is, in addition to the best "primary" split, every tree +node may also be split to one or more other variables with nearly the same results. + +Training Decision Trees +----------------------- + +The tree is built recursively, starting from the root node. All training data (feature vectors and +responses) is used to split the root node. In each node the optimum decision rule (the best +"primary" split) is found based on some criteria. In machine learning, gini "purity" criteria are +used for classification, and sum of squared errors is used for regression. Then, if necessary, the +surrogate splits are found. They resemble the results of the primary split on the training data. All +the data is divided using the primary and the surrogate splits (like it is done in the prediction +procedure) between the left and the right child node. Then, the procedure recursively splits both +left and right nodes. At each node the recursive procedure may stop (that is, stop splitting the +node further) in one of the following cases: + +- Depth of the constructed tree branch has reached the specified maximum value. +- Number of training samples in the node is less than the specified threshold when it is not + statistically representative to split the node further. +- All the samples in the node belong to the same class or, in case of regression, the variation is + too small. +- The best found split does not give any noticeable improvement compared to a random choice. + +When the tree is built, it may be pruned using a cross-validation procedure, if necessary. That is, +some branches of the tree that may lead to the model overfitting are cut off. Normally, this +procedure is only applied to standalone decision trees. Usually tree ensembles build trees that are +small enough and use their own protection schemes against overfitting. + +Variable Importance +------------------- + +Besides the prediction that is an obvious use of decision trees, the tree can be also used for +various data analyses. One of the key properties of the constructed decision tree algorithms is an +ability to compute the importance (relative decisive power) of each variable. For example, in a spam +filter that uses a set of words occurred in the message as a feature vector, the variable importance +rating can be used to determine the most "spam-indicating" words and thus help keep the dictionary +size reasonable. + +Importance of each variable is computed over all the splits on this variable in the tree, primary +and surrogate ones. Thus, to compute variable importance correctly, the surrogate splits must be +enabled in the training parameters, even if there is no missing data. + +@defgroup ml_boost Boosting + +A common machine learning task is supervised learning. In supervised learning, the goal is to learn +the functional relationship \f$F: y = F(x)\f$ between the input \f$x\f$ and the output \f$y\f$ . Predicting the +qualitative output is called *classification*, while predicting the quantitative output is called +*regression*. + +Boosting is a powerful learning concept that provides a solution to the supervised classification +learning task. It combines the performance of many "weak" classifiers to produce a powerful +committee @cite HTF01 . A weak classifier is only required to be better than chance, and thus can be +very simple and computationally inexpensive. However, many of them smartly combine results to a +strong classifier that often outperforms most "monolithic" strong classifiers such as SVMs and +Neural Networks. + +Decision trees are the most popular weak classifiers used in boosting schemes. Often the simplest +decision trees with only a single split node per tree (called stumps ) are sufficient. + +The boosted model is based on \f$N\f$ training examples \f${(x_i,y_i)}1N\f$ with \f$x_i \in{R^K}\f$ and +\f$y_i \in{-1, +1}\f$ . \f$x_i\f$ is a \f$K\f$ -component vector. Each component encodes a feature relevant to +the learning task at hand. The desired two-class output is encoded as -1 and +1. + +Different variants of boosting are known as Discrete Adaboost, Real AdaBoost, LogitBoost, and Gentle +AdaBoost @cite FHT98 . All of them are very similar in their overall structure. Therefore, this chapter +focuses only on the standard two-class Discrete AdaBoost algorithm, outlined below. Initially the +same weight is assigned to each sample (step 2). Then, a weak classifier \f$f_{m(x)}\f$ is trained on +the weighted training data (step 3a). Its weighted training error and scaling factor \f$c_m\f$ is +computed (step 3b). The weights are increased for training samples that have been misclassified +(step 3c). All weights are then normalized, and the process of finding the next weak classifier +continues for another \f$M\f$ -1 times. The final classifier \f$F(x)\f$ is the sign of the weighted sum over +the individual weak classifiers (step 4). + +**Two-class Discrete AdaBoost Algorithm** + +- Set \f$N\f$ examples \f${(x_i,y_i)}1N\f$ with \f$x_i \in{R^K}, y_i \in{-1, +1}\f$ . + +- Assign weights as \f$w_i = 1/N, i = 1,...,N\f$ . + +- Repeat for \f$m = 1,2,...,M\f$ : + + 3.1. Fit the classifier \f$f_m(x) \in{-1,1}\f$, using weights \f$w_i\f$ on the training data. + + 3.2. Compute \f$err_m = E_w [1_{(y \neq f_m(x))}], c_m = log((1 - err_m)/err_m)\f$ . + + 3.3. Set \f$w_i \Leftarrow w_i exp[c_m 1_{(y_i \neq f_m(x_i))}], i = 1,2,...,N,\f$ and renormalize + so that \f$\Sigma i w_i = 1\f$ . + +1. Classify new samples *x* using the formula: \f$\textrm{sign} (\Sigma m = 1M c_m f_m(x))\f$ . + +@note Similar to the classical boosting methods, the current implementation supports two-class +classifiers only. For M \> 2 classes, there is the **AdaBoost.MH** algorithm (described in +@cite FHT98) that reduces the problem to the two-class problem, yet with a much larger training set. +To reduce computation time for boosted models without substantially losing accuracy, the influence +trimming technique can be employed. As the training algorithm proceeds and the number of trees in +the ensemble is increased, a larger number of the training samples are classified correctly and with +increasing confidence, thereby those samples receive smaller weights on the subsequent iterations. +Examples with a very low relative weight have a small impact on the weak classifier training. Thus, +such examples may be excluded during the weak classifier training without having much effect on the +induced classifier. This process is controlled with the weight_trim_rate parameter. Only examples +with the summary fraction weight_trim_rate of the total weight mass are used in the weak +classifier training. Note that the weights for **all** training examples are recomputed at each +training iteration. Examples deleted at a particular iteration may be used again for learning some +of the weak classifiers further @cite FHT98 . + +Prediction with Boost +--------------------- +StatModel::predict(samples, results, flags) should be used. Pass flags=StatModel::RAW_OUTPUT to get +the raw sum from Boost classifier. + +@defgroup ml_randomtrees Random Trees + +Random trees have been introduced by Leo Breiman and Adele Cutler: + . The algorithm can deal with both +classification and regression problems. Random trees is a collection (ensemble) of tree predictors +that is called *forest* further in this section (the term has been also introduced by L. Breiman). +The classification works as follows: the random trees classifier takes the input feature vector, +classifies it with every tree in the forest, and outputs the class label that received the majority +of "votes". In case of a regression, the classifier response is the average of the responses over +all the trees in the forest. + +All the trees are trained with the same parameters but on different training sets. These sets are +generated from the original training set using the bootstrap procedure: for each training set, you +randomly select the same number of vectors as in the original set ( =N ). The vectors are chosen +with replacement. That is, some vectors will occur more than once and some will be absent. At each +node of each trained tree, not all the variables are used to find the best split, but a random +subset of them. With each node a new subset is generated. However, its size is fixed for all the +nodes and all the trees. It is a training parameter set to \f$\sqrt{number_of_variables}\f$ by +default. None of the built trees are pruned. + +In random trees there is no need for any accuracy estimation procedures, such as cross-validation or +bootstrap, or a separate test set to get an estimate of the training error. The error is estimated +internally during the training. When the training set for the current tree is drawn by sampling with +replacement, some vectors are left out (so-called *oob (out-of-bag) data* ). The size of oob data is +about N/3 . The classification error is estimated by using this oob-data as follows: + +- Get a prediction for each vector, which is oob relative to the i-th tree, using the very i-th + tree. + +- After all the trees have been trained, for each vector that has ever been oob, find the + class-*winner* for it (the class that has got the majority of votes in the trees where the + vector was oob) and compare it to the ground-truth response. + +- Compute the classification error estimate as a ratio of the number of misclassified oob vectors + to all the vectors in the original data. In case of regression, the oob-error is computed as the + squared error for oob vectors difference divided by the total number of vectors. + +For the random trees usage example, please, see letter_recog.cpp sample in OpenCV distribution. + +**References:** + +- *Machine Learning*, Wald I, July 2002. + +- *Looking Inside the Black Box*, Wald II, July 2002. + +- *Software for the Masses*, Wald III, July 2002. + +- And other articles from the web site + + +@defgroup ml_em Expectation Maximization + +The Expectation Maximization(EM) algorithm estimates the parameters of the multivariate probability +density function in the form of a Gaussian mixture distribution with a specified number of mixtures. + +Consider the set of the N feature vectors { \f$x_1, x_2,...,x_{N}\f$ } from a d-dimensional Euclidean +space drawn from a Gaussian mixture: + +\f[p(x;a_k,S_k, \pi _k) = \sum _{k=1}^{m} \pi _kp_k(x), \quad \pi _k \geq 0, \quad \sum _{k=1}^{m} \pi _k=1,\f] + +\f[p_k(x)= \varphi (x;a_k,S_k)= \frac{1}{(2\pi)^{d/2}\mid{S_k}\mid^{1/2}} exp \left \{ - \frac{1}{2} (x-a_k)^TS_k^{-1}(x-a_k) \right \} ,\f] + +where \f$m\f$ is the number of mixtures, \f$p_k\f$ is the normal distribution density with the mean \f$a_k\f$ +and covariance matrix \f$S_k\f$, \f$\pi_k\f$ is the weight of the k-th mixture. Given the number of mixtures +\f$M\f$ and the samples \f$x_i\f$, \f$i=1..N\f$ the algorithm finds the maximum-likelihood estimates (MLE) of +all the mixture parameters, that is, \f$a_k\f$, \f$S_k\f$ and \f$\pi_k\f$ : + +\f[L(x, \theta )=logp(x, \theta )= \sum _{i=1}^{N}log \left ( \sum _{k=1}^{m} \pi _kp_k(x) \right ) \to \max _{ \theta \in \Theta },\f] + +\f[\Theta = \left \{ (a_k,S_k, \pi _k): a_k \in \mathbbm{R} ^d,S_k=S_k^T>0,S_k \in \mathbbm{R} ^{d \times d}, \pi _k \geq 0, \sum _{k=1}^{m} \pi _k=1 \right \} .\f] + +The EM algorithm is an iterative procedure. Each iteration includes two steps. At the first step +(Expectation step or E-step), you find a probability \f$p_{i,k}\f$ (denoted \f$\alpha_{i,k}\f$ in the +formula below) of sample i to belong to mixture k using the currently available mixture parameter +estimates: + +\f[\alpha _{ki} = \frac{\pi_k\varphi(x;a_k,S_k)}{\sum\limits_{j=1}^{m}\pi_j\varphi(x;a_j,S_j)} .\f] + +At the second step (Maximization step or M-step), the mixture parameter estimates are refined using +the computed probabilities: + +\f[\pi _k= \frac{1}{N} \sum _{i=1}^{N} \alpha _{ki}, \quad a_k= \frac{\sum\limits_{i=1}^{N}\alpha_{ki}x_i}{\sum\limits_{i=1}^{N}\alpha_{ki}} , \quad S_k= \frac{\sum\limits_{i=1}^{N}\alpha_{ki}(x_i-a_k)(x_i-a_k)^T}{\sum\limits_{i=1}^{N}\alpha_{ki}}\f] + +Alternatively, the algorithm may start with the M-step when the initial values for \f$p_{i,k}\f$ can be +provided. Another alternative when \f$p_{i,k}\f$ are unknown is to use a simpler clustering algorithm to +pre-cluster the input samples and thus obtain initial \f$p_{i,k}\f$ . Often (including machine learning) +the k-means algorithm is used for that purpose. + +One of the main problems of the EM algorithm is a large number of parameters to estimate. The +majority of the parameters reside in covariance matrices, which are \f$d \times d\f$ elements each where +\f$d\f$ is the feature space dimensionality. However, in many practical problems, the covariance +matrices are close to diagonal or even to \f$\mu_k*I\f$ , where \f$I\f$ is an identity matrix and \f$\mu_k\f$ is +a mixture-dependent "scale" parameter. So, a robust computation scheme could start with harder +constraints on the covariance matrices and then use the estimated parameters as an input for a less +constrained optimization problem (often a diagonal covariance matrix is already a good enough +approximation). + +References: +- Bilmes98 J. A. Bilmes. *A Gentle Tutorial of the EM Algorithm and its Application to Parameter + Estimation for Gaussian Mixture and Hidden Markov Models*. Technical Report TR-97-021, + International Computer Science Institute and Computer Science Division, University of California + at Berkeley, April 1998. + +@defgroup ml_neural Neural Networks + +ML implements feed-forward artificial neural networks or, more particularly, multi-layer perceptrons +(MLP), the most commonly used type of neural networks. MLP consists of the input layer, output +layer, and one or more hidden layers. Each layer of MLP includes one or more neurons directionally +linked with the neurons from the previous and the next layer. The example below represents a 3-layer +perceptron with three inputs, two outputs, and the hidden layer including five neurons: + +![image](pics/mlp.png) + +All the neurons in MLP are similar. Each of them has several input links (it takes the output values +from several neurons in the previous layer as input) and several output links (it passes the +response to several neurons in the next layer). The values retrieved from the previous layer are +summed up with certain weights, individual for each neuron, plus the bias term. The sum is +transformed using the activation function \f$f\f$ that may be also different for different neurons. + +![image](pics/neuron_model.png) + +In other words, given the outputs \f$x_j\f$ of the layer \f$n\f$ , the outputs \f$y_i\f$ of the layer \f$n+1\f$ are +computed as: + +\f[u_i = \sum _j (w^{n+1}_{i,j}*x_j) + w^{n+1}_{i,bias}\f] + +\f[y_i = f(u_i)\f] + +Different activation functions may be used. ML implements three standard functions: + +- Identity function ( ANN_MLP::IDENTITY ): \f$f(x)=x\f$ + +- Symmetrical sigmoid ( ANN_MLP::SIGMOID_SYM ): \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x}\f$ + ), which is the default choice for MLP. The standard sigmoid with \f$\beta =1, \alpha =1\f$ is shown + below: + + ![image](pics/sigmoid_bipolar.png) + +- Gaussian function ( ANN_MLP::GAUSSIAN ): \f$f(x)=\beta e^{-\alpha x*x}\f$ , which is not completely + supported at the moment. + +In ML, all the neurons have the same activation functions, with the same free parameters ( +\f$\alpha, \beta\f$ ) that are specified by user and are not altered by the training algorithms. + +So, the whole trained network works as follows: + +1. Take the feature vector as input. The vector size is equal to the size of the input layer. +2. Pass values as input to the first hidden layer. +3. Compute outputs of the hidden layer using the weights and the activation functions. +4. Pass outputs further downstream until you compute the output layer. + +So, to compute the network, you need to know all the weights \f$w^{n+1)}_{i,j}\f$ . The weights are +computed by the training algorithm. The algorithm takes a training set, multiple input vectors with +the corresponding output vectors, and iteratively adjusts the weights to enable the network to give +the desired response to the provided input vectors. + +The larger the network size (the number of hidden layers and their sizes) is, the more the potential +network flexibility is. The error on the training set could be made arbitrarily small. But at the +same time the learned network also "learns" the noise present in the training set, so the error on +the test set usually starts increasing after the network size reaches a limit. Besides, the larger +networks are trained much longer than the smaller ones, so it is reasonable to pre-process the data, +using PCA::operator() or similar technique, and train a smaller network on only essential features. + +Another MLP feature is an inability to handle categorical data as is. However, there is a +workaround. If a certain feature in the input or output (in case of n -class classifier for \f$n>2\f$ ) +layer is categorical and can take \f$M>2\f$ different values, it makes sense to represent it as a binary +tuple of M elements, where the i -th element is 1 if and only if the feature is equal to the i -th +value out of M possible. It increases the size of the input/output layer but speeds up the training +algorithm convergence and at the same time enables "fuzzy" values of such variables, that is, a +tuple of probabilities instead of a fixed value. + +ML implements two algorithms for training MLP's. The first algorithm is a classical random +sequential back-propagation algorithm. The second (default) one is a batch RPROP algorithm. + +@defgroup ml_lr Logistic Regression + +ML implements logistic regression, which is a probabilistic classification technique. Logistic +Regression is a binary classification algorithm which is closely related to Support Vector Machines +(SVM). Like SVM, Logistic Regression can be extended to work on multi-class classification problems +like digit recognition (i.e. recognizing digitis like 0,1 2, 3,... from the given images). This +version of Logistic Regression supports both binary and multi-class classifications (for multi-class +it creates a multiple 2-class classifiers). In order to train the logistic regression classifier, +Batch Gradient Descent and Mini-Batch Gradient Descent algorithms are used (see ). +Logistic Regression is a discriminative classifier (see for more details). +Logistic Regression is implemented as a C++ class in LogisticRegression. + +In Logistic Regression, we try to optimize the training paramater \f$\theta\f$ such that the hypothesis +\f$0 \leq h_\theta(x) \leq 1\f$ is acheived. We have \f$h_\theta(x) = g(h_\theta(x))\f$ and +\f$g(z) = \frac{1}{1+e^{-z}}\f$ as the logistic or sigmoid function. The term "Logistic" in Logistic +Regression refers to this function. For given data of a binary classification problem of classes 0 +and 1, one can determine that the given data instance belongs to class 1 if \f$h_\theta(x) \geq 0.5\f$ +or class 0 if \f$h_\theta(x) < 0.5\f$ . + +In Logistic Regression, choosing the right parameters is of utmost importance for reducing the +training error and ensuring high training accuracy. LogisticRegression::Params is the structure that +defines parameters that are required to train a Logistic Regression classifier. The learning rate is +determined by LogisticRegression::Params.alpha. It determines how faster we approach the solution. +It is a positive real number. Optimization algorithms like Batch Gradient Descent and Mini-Batch +Gradient Descent are supported in LogisticRegression. It is important that we mention the number of +iterations these optimization algorithms have to run. The number of iterations are mentioned by +LogisticRegression::Params.num_iters. The number of iterations can be thought as number of steps +taken and learning rate specifies if it is a long step or a short step. These two parameters define +how fast we arrive at a possible solution. In order to compensate for overfitting regularization is +performed, which can be enabled by setting LogisticRegression::Params.regularized to a positive +integer (greater than zero). One can specify what kind of regularization has to be performed by +setting LogisticRegression::Params.norm to LogisticRegression::REG_L1 or +LogisticRegression::REG_L2 values. LogisticRegression provides a choice of 2 training methods with +Batch Gradient Descent or the Mini-Batch Gradient Descent. To specify this, set +LogisticRegression::Params.train_method to either LogisticRegression::BATCH or +LogisticRegression::MINI_BATCH. If LogisticRegression::Params is set to +LogisticRegression::MINI_BATCH, the size of the mini batch has to be to a postive integer using +LogisticRegression::Params.mini_batch_size. + +A sample set of training parameters for the Logistic Regression classifier can be initialized as +follows: +@code + LogisticRegression::Params params; + params.alpha = 0.5; + params.num_iters = 10000; + params.norm = LogisticRegression::REG_L2; + params.regularized = 1; + params.train_method = LogisticRegression::MINI_BATCH; + params.mini_batch_size = 10; +@endcode + +@defgroup ml_data Training Data + +In machine learning algorithms there is notion of training data. Training data includes several +components: + +- A set of training samples. Each training sample is a vector of values (in Computer Vision it's + sometimes referred to as feature vector). Usually all the vectors have the same number of + components (features); OpenCV ml module assumes that. Each feature can be ordered (i.e. its + values are floating-point numbers that can be compared with each other and strictly ordered, + i.e. sorted) or categorical (i.e. its value belongs to a fixed set of values that can be + integers, strings etc.). +- Optional set of responses corresponding to the samples. Training data with no responses is used + in unsupervised learning algorithms that learn structure of the supplied data based on distances + between different samples. Training data with responses is used in supervised learning + algorithms, which learn the function mapping samples to responses. Usually the responses are + scalar values, ordered (when we deal with regression problem) or categorical (when we deal with + classification problem; in this case the responses are often called "labels"). Some algorithms, + most noticeably Neural networks, can handle not only scalar, but also multi-dimensional or + vector responses. +- Another optional component is the mask of missing measurements. Most algorithms require all the + components in all the training samples be valid, but some other algorithms, such as decision + tress, can handle the cases of missing measurements. +- In the case of classification problem user may want to give different weights to different + classes. This is useful, for example, when + - user wants to shift prediction accuracy towards lower false-alarm rate or higher hit-rate. + - user wants to compensate for significantly different amounts of training samples from + different classes. +- In addition to that, each training sample may be given a weight, if user wants the algorithm to + pay special attention to certain training samples and adjust the training model accordingly. +- Also, user may wish not to use the whole training data at once, but rather use parts of it, e.g. + to do parameter optimization via cross-validation procedure. + +As you can see, training data can have rather complex structure; besides, it may be very big and/or +not entirely available, so there is need to make abstraction for this concept. In OpenCV ml there is +cv::ml::TrainData class for that. + + @} + */ + namespace cv { namespace ml { +//! @addtogroup ml +//! @{ + /* Variable type */ enum { @@ -80,9 +536,44 @@ enum COL_SAMPLE = 1 }; +//! @addtogroup ml_svm +//! @{ + +/** @brief The structure represents the logarithmic grid range of statmodel parameters. + +It is used for optimizing statmodel accuracy by varying model parameters, the accuracy estimate +being computed by cross-validation. +- member double ParamGrid::minVal +Minimum value of the statmodel parameter. +- member double ParamGrid::maxVal +Maximum value of the statmodel parameter. +- member double ParamGrid::logStep +Logarithmic step for iterating the statmodel parameter. +The grid determines the following iteration sequence of the statmodel parameter values: + +\f[(minVal, minVal*step, minVal*{step}^2, \dots, minVal*{logStep}^n),\f] + +where \f$n\f$ is the maximal index satisfying + +\f[\texttt{minVal} * \texttt{logStep} ^n < \texttt{maxVal}\f] + +The grid is logarithmic, so logStep must always be greater then 1. + */ class CV_EXPORTS_W_MAP ParamGrid { public: + /** @brief The constructors. + + The full constructor initializes corresponding members. The default constructor creates a dummy + grid: + @code + ParamGrid::ParamGrid() + { + minVal = maxVal = 0; + logStep = 1; + } + @endcode + */ ParamGrid(); ParamGrid(double _minVal, double _maxVal, double _logStep); @@ -91,6 +582,18 @@ public: CV_PROP_RW double logStep; }; +//! @} ml_svm + +//! @addtogroup ml_data +//! @{ + +/** @brief Class encapsulating training data. + +Please note that the class only specifies the interface of training data, but not implementation. +All the statistical model classes in ml take Ptr\. In other words, you can create your +own class derived from TrainData and supply smart pointer to the instance of this class into +StatModel::train. + */ class CV_EXPORTS TrainData { public: @@ -107,10 +610,36 @@ public: virtual void getSample(InputArray varIdx, int sidx, float* buf) const = 0; virtual Mat getSamples() const = 0; virtual Mat getMissing() const = 0; + + /** @brief Returns matrix of train samples + + @param layout The requested layout. If it's different from the initial one, the matrix is + transposed. + @param compressSamples if true, the function returns only the training samples (specified by + sampleIdx) + @param compressVars if true, the function returns the shorter training samples, containing only + the active variables. + + In current implementation the function tries to avoid physical data copying and returns the matrix + stored inside TrainData (unless the transposition or compression is needed). + */ virtual Mat getTrainSamples(int layout=ROW_SAMPLE, bool compressSamples=true, bool compressVars=true) const = 0; + + /** @brief Returns the vector of responses + + The function returns ordered or the original categorical responses. Usually it's used in regression + algorithms. + */ virtual Mat getTrainResponses() const = 0; + + /** @brief Returns the vector of normalized categorical responses + + The function returns vector of responses. Each response is integer from 0 to \-1. The actual label value can be retrieved then from the class label vector, see + TrainData::getClassLabels. + */ virtual Mat getTrainNormCatResponses() const = 0; virtual Mat getTestResponses() const = 0; virtual Mat getTestNormCatResponses() const = 0; @@ -129,16 +658,52 @@ public: virtual Mat getDefaultSubstValues() const = 0; virtual int getCatCount(int vi) const = 0; + + /** @brief Returns the vector of class labels + + The function returns vector of unique labels occurred in the responses. + */ virtual Mat getClassLabels() const = 0; virtual Mat getCatOfs() const = 0; virtual Mat getCatMap() const = 0; virtual void setTrainTestSplit(int count, bool shuffle=true) = 0; + + /** @brief Splits the training data into the training and test parts + + The function selects a subset of specified relative size and then returns it as the training set. If + the function is not called, all the data is used for training. Please, note that for each of + TrainData::getTrain\* there is corresponding TrainData::getTest\*, so that the test subset can be + retrieved and processed as well. + */ virtual void setTrainTestSplitRatio(double ratio, bool shuffle=true) = 0; virtual void shuffleTrainTest() = 0; static Mat getSubVector(const Mat& vec, const Mat& idx); + + /** @brief Reads the dataset from a .csv file and returns the ready-to-use training data. + + @param filename The input file name + @param headerLineCount The number of lines in the beginning to skip; besides the header, the + function also skips empty lines and lines staring with '\#' + @param responseStartIdx Index of the first output variable. If -1, the function considers the last + variable as the response + @param responseEndIdx Index of the last output variable + 1. If -1, then there is single response + variable at responseStartIdx. + @param varTypeSpec The optional text string that specifies the variables' types. It has the format ord[n1-n2,n3,n4-n5,...]cat[n6,n7-n8,...]. That is, variables from n1 to n2 (inclusive range), n3, n4 to n5 ... are considered ordered and n6, n7 to n8 ... are considered as categorical. The range [n1..n2] + [n3] + [n4..n5] + ... + [n6] + [n7..n8] should cover all the variables. If varTypeSpec is not specified, then algorithm uses the following rules: + # all input variables are considered ordered by default. If some column contains has + non-numerical values, e.g. 'apple', 'pear', 'apple', 'apple', 'mango', the corresponding + variable is considered categorical. + # if there are several output variables, they are all considered as ordered. Error is + reported when non-numerical values are used. + # if there is a single output variable, then if its values are non-numerical or are all + integers, then it's considered categorical. Otherwise, it's considered ordered. + @param delimiter The character used to separate values in each line. + @param missch The character used to specify missing measurements. It should not be a digit. + Although it's a non-numerical value, it surely does not affect the decision of whether the + variable ordered or categorical. + */ static Ptr loadFromCSV(const String& filename, int headerLineCount, int responseStartIdx=-1, @@ -146,28 +711,114 @@ public: const String& varTypeSpec=String(), char delimiter=',', char missch='?'); + /** @brief Creates training data from in-memory arrays. + + @param samples matrix of samples. It should have CV_32F type. + @param layout it's either ROW_SAMPLE, which means that each training sample is a row of samples, + or COL_SAMPLE, which means that each training sample occupies a column of samples. + @param responses matrix of responses. If the responses are scalar, they should be stored as a + single row or as a single column. The matrix should have type CV_32F or CV_32S (in the former + case the responses are considered as ordered by default; in the latter case - as categorical) + @param varIdx vector specifying which variables to use for training. It can be an integer vector + (CV_32S) containing 0-based variable indices or byte vector (CV_8U) containing a mask of active + variables. + @param sampleIdx vector specifying which samples to use for training. It can be an integer vector + (CV_32S) containing 0-based sample indices or byte vector (CV_8U) containing a mask of training + samples. + @param sampleWeights optional vector with weights for each sample. It should have CV_32F type. + @param varType optional vector of type CV_8U and size \ + + \, containing types of each input and output variable. The + ordered variables are denoted by value VAR_ORDERED, and categorical - by VAR_CATEGORICAL. + */ static Ptr create(InputArray samples, int layout, InputArray responses, InputArray varIdx=noArray(), InputArray sampleIdx=noArray(), InputArray sampleWeights=noArray(), InputArray varType=noArray()); }; +//! @} ml_data +//! @addtogroup ml_stat +//! @{ + +/** @brief Base class for statistical models in OpenCV ML. + */ class CV_EXPORTS_W StatModel : public Algorithm { public: enum { UPDATE_MODEL = 1, RAW_OUTPUT=1, COMPRESSED_INPUT=2, PREPROCESSED_INPUT=4 }; virtual void clear(); + /** @brief Returns the number of variables in training samples + + The method must be overwritten in the derived classes. + */ virtual int getVarCount() const = 0; + /** @brief Returns true if the model is trained + + The method must be overwritten in the derived classes. + */ virtual bool isTrained() const = 0; + /** @brief Returns true if the model is classifier + + The method must be overwritten in the derived classes. + */ virtual bool isClassifier() const = 0; + /** @brief Trains the statistical model + + @param trainData training data that can be loaded from file using TrainData::loadFromCSV or + created with TrainData::create. + @param flags optional flags, depending on the model. Some of the models can be updated with the + new training samples, not completely overwritten (such as NormalBayesClassifier or ANN_MLP). + + There are 2 instance methods and 2 static (class) template methods. The first two train the already + created model (the very first method must be overwritten in the derived classes). And the latter two + variants are convenience methods that construct empty model and then call its train method. + */ virtual bool train( const Ptr& trainData, int flags=0 ); + /** @overload + @param samples training samples + @param layout ROW_SAMPLE (training samples are the matrix rows) or COL_SAMPLE (training samples + are the matrix columns) + @param responses vector of responses associated with the training samples. + */ virtual bool train( InputArray samples, int layout, InputArray responses ); + + /** @brief Computes error on the training or test dataset + + @param data the training data + @param test if true, the error is computed over the test subset of the data, otherwise it's + computed over the training subset of the data. Please note that if you loaded a completely + different dataset to evaluate already trained classifier, you will probably want not to set the + test subset at all with TrainData::setTrainTestSplitRatio and specify test=false, so that the + error is computed for the whole new set. Yes, this sounds a bit confusing. + @param resp the optional output responses. + + The method uses StatModel::predict to compute the error. For regression models the error is computed + as RMS, for classifiers - as a percent of missclassified samples (0%-100%). + */ virtual float calcError( const Ptr& data, bool test, OutputArray resp ) const; + + /** @brief Predicts response(s) for the provided sample(s) + + @param samples The input samples, floating-point matrix + @param results The optional output matrix of results. + @param flags The optional flags, model-dependent. Some models, such as Boost, SVM recognize + StatModel::RAW_OUTPUT flag, which makes the method return the raw results (the sum), not the + class label. + */ virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0; + /** @brief Loads model from the file + + This is static template method of StatModel. It's usage is following (in the case of SVM): : + + Ptr svm = StatModel::load("my_svm_model.xml"); + + In order to make this method work, the derived class must overwrite + Algorithm::read(const FileNode& fn). + */ template static Ptr<_Tp> load(const String& filename) { FileStorage fs(filename, FileStorage::READ); @@ -189,19 +840,26 @@ public: return !model.empty() && model->train(TrainData::create(samples, layout, responses), flags) ? model : Ptr<_Tp>(); } + /** @brief Saves the model to a file. + + In order to make this method work, the derived class must overwrite + Algorithm::write(FileStorage& fs). + */ virtual void save(const String& filename) const; virtual String getDefaultModelName() const = 0; }; +//! @} ml_stat + /****************************************************************************************\ * Normal Bayes Classifier * \****************************************************************************************/ -/* The structure, representing the grid range of statmodel parameters. - It is used for optimizing statmodel accuracy by varying model parameters, - the accuracy estimate being computed by cross-validation. - The grid is logarithmic, so must be greater then 1. */ +//! @addtogroup ml_bayes +//! @{ +/** @brief Bayes classifier for normally distributed data. + */ class CV_EXPORTS_W NormalBayesClassifier : public StatModel { public: @@ -210,19 +868,49 @@ public: public: Params(); }; + /** @brief Predicts the response for sample(s). + + The method estimates the most probable classes for input vectors. Input vectors (one or more) are + stored as rows of the matrix inputs. In case of multiple input vectors, there should be one output + vector outputs. The predicted class for a single input vector is returned by the method. The vector + outputProbs contains the output probabilities corresponding to each element of result. + */ virtual float predictProb( InputArray inputs, OutputArray outputs, OutputArray outputProbs, int flags=0 ) const = 0; virtual void setParams(const Params& params) = 0; virtual Params getParams() const = 0; + /** @brief Creates empty model + + @param params The model parameters. There is none so far, the structure is used as a placeholder + for possible extensions. + + Use StatModel::train to train the model, + StatModel::train\(traindata, params) to create and train the model, + StatModel::load\(filename) to load the pre-trained model. + */ static Ptr create(const Params& params=Params()); }; +//! @} ml_bayes + /****************************************************************************************\ * K-Nearest Neighbour Classifier * \****************************************************************************************/ -// k Nearest Neighbors +//! @addtogroup ml_knearest +//! @{ + +/** @brief The class implements K-Nearest Neighbors model as described in the beginning of this section. + +@note + - (Python) An example of digit recognition using KNearest can be found at + opencv_source/samples/python2/digits.py + - (Python) An example of grid search digit recognition using KNearest can be found at + opencv_source/samples/python2/digits_adjust.py + - (Python) An example of video digit recognition using KNearest can be found at + opencv_source/samples/python2/digits_video.py + */ class CV_EXPORTS_W KNearest : public StatModel { public: @@ -238,6 +926,33 @@ public: }; virtual void setParams(const Params& p) = 0; virtual Params getParams() const = 0; + + /** @brief Finds the neighbors and predicts responses for input vectors. + + @param samples Input samples stored by rows. It is a single-precision floating-point matrix of + \ \* k size. + @param k Number of used nearest neighbors. Should be greater than 1. + @param results Vector with results of prediction (regression or classification) for each input + sample. It is a single-precision floating-point vector with \ elements. + @param neighborResponses Optional output values for corresponding neighbors. It is a + single-precision floating-point matrix of \ \* k size. + @param dist Optional output distances from the input vectors to the corresponding neighbors. It is + a single-precision floating-point matrix of \ \* k size. + + For each input vector (a row of the matrix samples), the method finds the k nearest neighbors. In + case of regression, the predicted result is a mean value of the particular vector's neighbor + responses. In case of classification, the class is determined by voting. + + For each input vector, the neighbors are sorted by their distances to the vector. + + In case of C++ interface you can use output pointers to empty matrices and the function will + allocate memory itself. + + If only a single input vector is passed, all output matrices are optional and the predicted value is + returned by the method. + + The function is parallelized with the TBB library. + */ virtual float findNearest( InputArray samples, int k, OutputArray results, OutputArray neighborResponses=noArray(), @@ -245,21 +960,106 @@ public: enum { BRUTE_FORCE=1, KDTREE=2 }; + /** @brief Creates the empty model + + @param params The model parameters: default number of neighbors to use in predict method (in + KNearest::findNearest this number must be passed explicitly) and the flag on whether + classification or regression model should be trained. + + The static method creates empty KNearest classifier. It should be then trained using train method + (see StatModel::train). Alternatively, you can load boost model from file using + StatModel::load\(filename). + */ static Ptr create(const Params& params=Params()); }; +//! @} ml_knearest + /****************************************************************************************\ * Support Vector Machines * \****************************************************************************************/ -// SVM model +//! @addtogroup ml_svm +//! @{ + +/** @brief Support Vector Machines. + +@note + - (Python) An example of digit recognition using SVM can be found at + opencv_source/samples/python2/digits.py + - (Python) An example of grid search digit recognition using SVM can be found at + opencv_source/samples/python2/digits_adjust.py + - (Python) An example of video digit recognition using SVM can be found at + opencv_source/samples/python2/digits_video.py + */ class CV_EXPORTS_W SVM : public StatModel { public: + /** @brief SVM training parameters. + + The structure must be initialized and passed to the training method of SVM. + */ class CV_EXPORTS_W_MAP Params { public: Params(); + /** @brief The constructors + + @param svm_type Type of a SVM formulation. Possible values are: + - **SVM::C_SVC** C-Support Vector Classification. n-class classification (n \f$\geq\f$ 2), allows + imperfect separation of classes with penalty multiplier C for outliers. + - **SVM::NU_SVC** \f$\nu\f$-Support Vector Classification. n-class classification with possible + imperfect separation. Parameter \f$\nu\f$ (in the range 0..1, the larger the value, the smoother + the decision boundary) is used instead of C. + - **SVM::ONE_CLASS** Distribution Estimation (One-class SVM). All the training data are from + the same class, SVM builds a boundary that separates the class from the rest of the feature + space. + - **SVM::EPS_SVR** \f$\epsilon\f$-Support Vector Regression. The distance between feature vectors + from the training set and the fitting hyper-plane must be less than p. For outliers the + penalty multiplier C is used. + - **SVM::NU_SVR** \f$\nu\f$-Support Vector Regression. \f$\nu\f$ is used instead of p. + See @cite LibSVM for details. + @param kernel_type Type of a SVM kernel. Possible values are: + - **SVM::LINEAR** Linear kernel. No mapping is done, linear discrimination (or regression) is + done in the original feature space. It is the fastest option. \f$K(x_i, x_j) = x_i^T x_j\f$. + - **SVM::POLY** Polynomial kernel: + \f$K(x_i, x_j) = (\gamma x_i^T x_j + coef0)^{degree}, \gamma > 0\f$. + - **SVM::RBF** Radial basis function (RBF), a good choice in most cases. + \f$K(x_i, x_j) = e^{-\gamma ||x_i - x_j||^2}, \gamma > 0\f$. + - **SVM::SIGMOID** Sigmoid kernel: \f$K(x_i, x_j) = \tanh(\gamma x_i^T x_j + coef0)\f$. + - **SVM::CHI2** Exponential Chi2 kernel, similar to the RBF kernel: + \f$K(x_i, x_j) = e^{-\gamma \chi^2(x_i,x_j)}, \chi^2(x_i,x_j) = (x_i-x_j)^2/(x_i+x_j), \gamma > 0\f$. + - **SVM::INTER** Histogram intersection kernel. A fast kernel. \f$K(x_i, x_j) = min(x_i,x_j)\f$. + @param degree Parameter degree of a kernel function (POLY). + @param gamma Parameter \f$\gamma\f$ of a kernel function (POLY / RBF / SIGMOID / CHI2). + @param coef0 Parameter coef0 of a kernel function (POLY / SIGMOID). + @param Cvalue Parameter C of a SVM optimization problem (C_SVC / EPS_SVR / NU_SVR). + @param nu Parameter \f$\nu\f$ of a SVM optimization problem (NU_SVC / ONE_CLASS / NU_SVR). + @param p Parameter \f$\epsilon\f$ of a SVM optimization problem (EPS_SVR). + @param classWeights Optional weights in the C_SVC problem , assigned to particular classes. They + are multiplied by C so the parameter C of class \#i becomes classWeights(i) \* C. Thus these + weights affect the misclassification penalty for different classes. The larger weight, the larger + penalty on misclassification of data from the corresponding class. + @param termCrit Termination criteria of the iterative SVM training procedure which solves a + partial case of constrained quadratic optimization problem. You can specify tolerance and/or the + maximum number of iterations. + + The default constructor initialize the structure with following values: + @code + SVMParams::SVMParams() : + svmType(SVM::C_SVC), kernelType(SVM::RBF), degree(0), + gamma(1), coef0(0), C(1), nu(0), p(0), classWeights(0) + { + termCrit = TermCriteria( TermCriteria::MAX_ITER+TermCriteria::EPS, 1000, FLT_EPSILON ); + } + @endcode + A comparison of different kernels on the following 2D test case with four classes. Four C_SVC SVMs + have been trained (one against rest) with auto_train. Evaluation on three different kernels (CHI2, + INTER, RBF). The color depicts the class with max score. Bright means max-score \> 0, dark means + max-score \< 0. + + ![image](pics/SVM_Comparison.png) + */ Params( int svm_type, int kernel_type, double degree, double gamma, double coef0, double Cvalue, double nu, double p, @@ -292,6 +1092,41 @@ public: // SVM params type enum { C=0, GAMMA=1, P=2, NU=3, COEF=4, DEGREE=5 }; + /** @brief Trains an SVM with optimal parameters. + + @param data the training data that can be constructed using TrainData::create or + TrainData::loadFromCSV. + @param kFold Cross-validation parameter. The training set is divided into kFold subsets. One + subset is used to test the model, the others form the train set. So, the SVM algorithm is executed + kFold times. + @param Cgrid + @param gammaGrid + @param pGrid + @param nuGrid + @param coeffGrid + @param degreeGrid Iteration grid for the corresponding SVM parameter. + @param balanced If true and the problem is 2-class classification then the method creates more + balanced cross-validation subsets that is proportions between classes in subsets are close to such + proportion in the whole train dataset. + + The method trains the SVM model automatically by choosing the optimal parameters C, gamma, p, nu, + coef0, degree from SVM::Params. Parameters are considered optimal when the cross-validation estimate + of the test set error is minimal. + + If there is no need to optimize a parameter, the corresponding grid step should be set to any value + less than or equal to 1. For example, to avoid optimization in gamma, set gammaGrid.step = 0, + gammaGrid.minVal, gamma_grid.maxVal as arbitrary numbers. In this case, the value params.gamma is + taken for gamma. + + And, finally, if the optimization in a parameter is required but the corresponding grid is unknown, + you may call the function SVM::getDefaulltGrid. To generate a grid, for example, for gamma, call + SVM::getDefaulltGrid(SVM::GAMMA). + + This function works for the classification (params.svmType=SVM::C_SVC or + params.svmType=SVM::NU_SVC) as well as for the regression (params.svmType=SVM::EPS_SVR or + params.svmType=SVM::NU_SVR). If params.svmType=SVM::ONE_CLASS, no optimization is made and the + usual SVM with parameters specified in params is executed. + */ virtual bool trainAuto( const Ptr& data, int kFold = 10, ParamGrid Cgrid = SVM::getDefaultGrid(SVM::C), ParamGrid gammaGrid = SVM::getDefaultGrid(SVM::GAMMA), @@ -301,20 +1136,80 @@ public: ParamGrid degreeGrid = SVM::getDefaultGrid(SVM::DEGREE), bool balanced=false) = 0; + /** @brief Retrieves all the support vectors + + The method returns all the support vector as floating-point matrix, where support vectors are stored + as matrix rows. + */ CV_WRAP virtual Mat getSupportVectors() const = 0; virtual void setParams(const Params& p, const Ptr& customKernel=Ptr()) = 0; + + /** @brief Returns the current SVM parameters. + + This function may be used to get the optimal parameters obtained while automatically training + SVM::trainAuto. + */ virtual Params getParams() const = 0; virtual Ptr getKernel() const = 0; + + /** @brief Retrieves the decision function + + @param i the index of the decision function. If the problem solved is regression, 1-class or + 2-class classification, then there will be just one decision function and the index should always + be 0. Otherwise, in the case of N-class classification, there will be N\*(N-1)/2 decision + functions. + @param alpha the optional output vector for weights, corresponding to different support vectors. + In the case of linear SVM all the alpha's will be 1's. + @param svidx the optional output vector of indices of support vectors within the matrix of support + vectors (which can be retrieved by SVM::getSupportVectors). In the case of linear SVM each + decision function consists of a single "compressed" support vector. + + The method returns rho parameter of the decision function, a scalar subtracted from the weighted sum + of kernel responses. + */ virtual double getDecisionFunction(int i, OutputArray alpha, OutputArray svidx) const = 0; + /** @brief Generates a grid for SVM parameters. + + @param param_id SVM parameters IDs that must be one of the following: + - **SVM::C** + - **SVM::GAMMA** + - **SVM::P** + - **SVM::NU** + - **SVM::COEF** + - **SVM::DEGREE** + The grid is generated for the parameter with this ID. + + The function generates a grid for the specified parameter of the SVM algorithm. The grid may be + passed to the function SVM::trainAuto. + */ static ParamGrid getDefaultGrid( int param_id ); + + /** @brief Creates empty model + + @param p SVM parameters + @param customKernel the optional custom kernel to use. It must implement SVM::Kernel interface. + + Use StatModel::train to train the model, StatModel::train\(traindata, params) to create and + train the model, StatModel::load\(filename) to load the pre-trained model. Since SVM has + several parameters, you may want to find the best parameters for your problem. It can be done with + SVM::trainAuto. + */ static Ptr create(const Params& p=Params(), const Ptr& customKernel=Ptr()); }; +//! @} ml_svm + /****************************************************************************************\ * Expectation - Maximization * \****************************************************************************************/ + +//! @addtogroup ml_em +//! @{ + +/** @brief The class implements the EM algorithm as described in the beginning of this section. + */ class CV_EXPORTS_W EM : public StatModel { public: @@ -327,9 +1222,36 @@ public: // The initial step enum {START_E_STEP=1, START_M_STEP=2, START_AUTO_STEP=0}; + /** @brief The class describes EM training parameters. + */ class CV_EXPORTS_W_MAP Params { public: + /** @brief The constructor + + @param nclusters The number of mixture components in the Gaussian mixture model. Default value of + the parameter is EM::DEFAULT_NCLUSTERS=5. Some of EM implementation could determine the optimal + number of mixtures within a specified value range, but that is not the case in ML yet. + @param covMatType Constraint on covariance matrices which defines type of matrices. Possible + values are: + - **EM::COV_MAT_SPHERICAL** A scaled identity matrix \f$\mu_k * I\f$. There is the only + parameter \f$\mu_k\f$ to be estimated for each matrix. The option may be used in special cases, + when the constraint is relevant, or as a first step in the optimization (for example in case + when the data is preprocessed with PCA). The results of such preliminary estimation may be + passed again to the optimization procedure, this time with + covMatType=EM::COV_MAT_DIAGONAL. + - **EM::COV_MAT_DIAGONAL** A diagonal matrix with positive diagonal elements. The number of + free parameters is d for each matrix. This is most commonly used option yielding good + estimation results. + - **EM::COV_MAT_GENERIC** A symmetric positively defined matrix. The number of free + parameters in each matrix is about \f$d^2/2\f$. It is not recommended to use this option, unless + there is pretty accurate initial estimation of the parameters and/or a huge number of + training samples. + @param termCrit The termination criteria of the EM algorithm. The EM algorithm can be terminated + by the number of iterations termCrit.maxCount (number of M-steps) or when relative change of + likelihood logarithm is less than termCrit.epsilon. Default maximum number of iterations is + EM::DEFAULT_MAX_ITERS=100. + */ explicit Params(int nclusters=DEFAULT_NCLUSTERS, int covMatType=EM::COV_MAT_DIAGONAL, const TermCriteria& termCrit=TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, EM::DEFAULT_MAX_ITERS, 1e-6)); @@ -340,20 +1262,102 @@ public: virtual void setParams(const Params& p) = 0; virtual Params getParams() const = 0; + /** @brief Returns weights of the mixtures + + Returns vector with the number of elements equal to the number of mixtures. + */ virtual Mat getWeights() const = 0; + /** @brief Returns the cluster centers (means of the Gaussian mixture) + + Returns matrix with the number of rows equal to the number of mixtures and number of columns equal + to the space dimensionality. + */ virtual Mat getMeans() const = 0; + /** @brief Returns covariation matrices + + Returns vector of covariation matrices. Number of matrices is the number of gaussian mixtures, each + matrix is a square floating-point matrix NxN, where N is the space dimensionality. + */ virtual void getCovs(std::vector& covs) const = 0; + /** @brief Returns a likelihood logarithm value and an index of the most probable mixture component for the + given sample. + + @param sample A sample for classification. It should be a one-channel matrix of \f$1 \times dims\f$ or + \f$dims \times 1\f$ size. + @param probs Optional output matrix that contains posterior probabilities of each component given + the sample. It has \f$1 \times nclusters\f$ size and CV_64FC1 type. + + The method returns a two-element double vector. Zero element is a likelihood logarithm value for the + sample. First element is an index of the most probable mixture component for the given sample. + */ CV_WRAP virtual Vec2d predict2(InputArray sample, OutputArray probs) const = 0; virtual bool train( const Ptr& trainData, int flags=0 ) = 0; + /** @brief Static methods that estimate the Gaussian mixture parameters from a samples set + + @param samples Samples from which the Gaussian mixture model will be estimated. It should be a + one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type it + will be converted to the inner matrix of such type for the further computing. + @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for + each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type. + @param labels The optional output "class label" for each sample: + \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable mixture + component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type. + @param probs The optional output matrix that contains posterior probabilities of each Gaussian + mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and CV_64FC1 + type. + @param params The Gaussian mixture params, see EM::Params description + @return true if the Gaussian mixture model was trained successfully, otherwise it returns + false. + + Starts with Expectation step. Initial values of the model parameters will be estimated by the + k-means algorithm. + + Unlike many of the ML models, EM is an unsupervised learning algorithm and it does not take + responses (class labels or function values) as input. Instead, it computes the *Maximum Likelihood + Estimate* of the Gaussian mixture parameters from an input sample set, stores all the parameters + inside the structure: \f$p_{i,k}\f$ in probs, \f$a_k\f$ in means , \f$S_k\f$ in covs[k], \f$\pi_k\f$ in weights , + and optionally computes the output "class label" for each sample: + \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable mixture + component for each sample). + + The trained model can be used further for prediction, just like any other classifier. The trained + model is similar to the NormalBayesClassifier. + */ static Ptr train(InputArray samples, OutputArray logLikelihoods=noArray(), OutputArray labels=noArray(), OutputArray probs=noArray(), const Params& params=Params()); + /** Starts with Expectation step. You need to provide initial means \f$a_k\f$ of mixture + components. Optionally you can pass initial weights \f$\pi_k\f$ and covariance matrices + \f$S_k\f$ of mixture components. + + @param samples Samples from which the Gaussian mixture model will be estimated. It should be a + one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type it + will be converted to the inner matrix of such type for the further computing. + @param means0 Initial means \f$a_k\f$ of mixture components. It is a one-channel matrix of + \f$nclusters \times dims\f$ size. If the matrix does not have CV_64F type it will be converted to the + inner matrix of such type for the further computing. + @param covs0 The vector of initial covariance matrices \f$S_k\f$ of mixture components. Each of + covariance matrices is a one-channel matrix of \f$dims \times dims\f$ size. If the matrices do not + have CV_64F type they will be converted to the inner matrices of such type for the further + computing. + @param weights0 Initial weights \f$\pi_k\f$ of mixture components. It should be a one-channel + floating-point matrix with \f$1 \times nclusters\f$ or \f$nclusters \times 1\f$ size. + @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for + each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type. + @param labels The optional output "class label" for each sample: + \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable mixture + component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type. + @param probs The optional output matrix that contains posterior probabilities of each Gaussian + mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and CV_64FC1 + type. + @param params The Gaussian mixture params, see EM::Params description + */ static Ptr train_startWithE(InputArray samples, InputArray means0, InputArray covs0=noArray(), InputArray weights0=noArray(), @@ -362,28 +1366,133 @@ public: OutputArray probs=noArray(), const Params& params=Params()); + /** Starts with Maximization step. You need to provide initial probabilities \f$p_{i,k}\f$ to + use this option. + + @param samples Samples from which the Gaussian mixture model will be estimated. It should be a + one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type it + will be converted to the inner matrix of such type for the further computing. + @param probs0 + @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for + each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type. + @param labels The optional output "class label" for each sample: + \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable mixture + component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type. + @param probs The optional output matrix that contains posterior probabilities of each Gaussian + mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and CV_64FC1 + type. + @param params The Gaussian mixture params, see EM::Params description + */ static Ptr train_startWithM(InputArray samples, InputArray probs0, OutputArray logLikelihoods=noArray(), OutputArray labels=noArray(), OutputArray probs=noArray(), const Params& params=Params()); + + /** @brief Creates empty EM model + + @param params EM parameters + + The model should be trained then using StatModel::train(traindata, flags) method. Alternatively, you + can use one of the EM::train\* methods or load it from file using StatModel::load\(filename). + */ static Ptr create(const Params& params=Params()); }; +//! @} ml_em /****************************************************************************************\ * Decision Tree * \****************************************************************************************/ +//! @addtogroup ml_decsiontrees +//! @{ + +/** @brief The class represents a single decision tree or a collection of decision trees. The current public +interface of the class allows user to train only a single decision tree, however the class is +capable of storing multiple decision trees and using them for prediction (by summing responses or +using a voting schemes), and the derived from DTrees classes (such as RTrees and Boost) use this +capability to implement decision tree ensembles. + */ class CV_EXPORTS_W DTrees : public StatModel { public: enum { PREDICT_AUTO=0, PREDICT_SUM=(1<<8), PREDICT_MAX_VOTE=(2<<8), PREDICT_MASK=(3<<8) }; + /** @brief The structure contains all the decision tree training parameters. You can initialize it by default + constructor and then override any parameters directly before training, or the structure may be fully + initialized using the advanced variant of the constructor. + */ class CV_EXPORTS_W_MAP Params { public: Params(); + /** @brief The constructors + + @param maxDepth The maximum possible depth of the tree. That is the training algorithms attempts + to split a node while its depth is less than maxDepth. The root node has zero depth. The actual + depth may be smaller if the other termination criteria are met (see the outline of the training + procedure in the beginning of the section), and/or if the tree is pruned. + @param minSampleCount If the number of samples in a node is less than this parameter then the node + will not be split. + @param regressionAccuracy Termination criteria for regression trees. If all absolute differences + between an estimated value in a node and values of train samples in this node are less than this + parameter then the node will not be split further. + @param useSurrogates If true then surrogate splits will be built. These splits allow to work with + missing data and compute variable importance correctly. + + @note currently it's not implemented. + + @param maxCategories Cluster possible values of a categorical variable into K\<=maxCategories + clusters to find a suboptimal split. If a discrete variable, on which the training procedure + tries to make a split, takes more than maxCategories values, the precise best subset estimation + may take a very long time because the algorithm is exponential. Instead, many decision trees + engines (including our implementation) try to find sub-optimal split in this case by clustering + all the samples into maxCategories clusters that is some categories are merged together. The + clustering is applied only in n \> 2-class classification problems for categorical variables + with N \> max_categories possible values. In case of regression and 2-class classification the + optimal split can be found efficiently without employing clustering, thus the parameter is not + used in these cases. + + @param CVFolds If CVFolds \> 1 then algorithms prunes the built decision tree using K-fold + cross-validation procedure where K is equal to CVFolds. + + @param use1SERule If true then a pruning will be harsher. This will make a tree more compact and + more resistant to the training data noise but a bit less accurate. + + @param truncatePrunedTree If true then pruned branches are physically removed from the tree. + Otherwise they are retained and it is possible to get results from the original unpruned (or + pruned less aggressively) tree. + + @param priors The array of a priori class probabilities, sorted by the class label value. The + parameter can be used to tune the decision tree preferences toward a certain class. For example, + if you want to detect some rare anomaly occurrence, the training base will likely contain much + more normal cases than anomalies, so a very good classification performance will be achieved + just by considering every case as normal. To avoid this, the priors can be specified, where the + anomaly probability is artificially increased (up to 0.5 or even greater), so the weight of the + misclassified anomalies becomes much bigger, and the tree is adjusted properly. You can also + think about this parameter as weights of prediction categories which determine relative weights + that you give to misclassification. That is, if the weight of the first category is 1 and the + weight of the second category is 10, then each mistake in predicting the second category is + equivalent to making 10 mistakes in predicting the first category. + + The default constructor initializes all the parameters with the default values tuned for the + standalone classification tree: + @code + DTrees::Params::Params() + { + maxDepth = INT_MAX; + minSampleCount = 10; + regressionAccuracy = 0.01f; + useSurrogates = false; + maxCategories = 10; + CVFolds = 10; + use1SERule = true; + truncatePrunedTree = true; + priors = Mat(); + } + @endcode + */ Params( int maxDepth, int minSampleCount, double regressionAccuracy, bool useSurrogates, int maxCategories, int CVFolds, @@ -401,6 +1510,24 @@ public: CV_PROP_RW Mat priors; }; + /** @brief The class represents a decision tree node. It has public members: + - member double value + Value at the node: a class label in case of classification or estimated function value in case + of regression. + - member int classIdx + Class index normalized to 0..class_count-1 range and assigned to the node. It is used + internally in classification trees and tree ensembles. + - member int parent + Index of the parent node + - member int left + Index of the left child node + - member int right + Index of right child node. + - member int defaultDir + Default direction where to go (-1: left or +1: right). It helps in the case of missing values. + - member int split + Index of the first split + */ class CV_EXPORTS Node { public: @@ -416,6 +1543,27 @@ public: int split; }; + /** @brief The class represents split in a decision tree. It has public members: + - member int varIdx + Index of variable on which the split is created. + - member bool inversed + If true, then the inverse split rule is used (i.e. left and right branches are exchanged in + the rule expressions below). + - member float quality + The split quality, a positive number. It is used to choose the best split. + - member int next + Index of the next split in the list of splits for the node + - member float c + The threshold value in case of split on an ordered variable. The rule is: : + if var_value < c + then next_node<-left + else next_node<-right + - member int subsetOfs + Offset of the bitset used by the split on a categorical variable. The rule is: : + if bitset[var_value] == 1 + then next_node <- left + else next_node <- right + */ class CV_EXPORTS Split { public: @@ -428,28 +1576,112 @@ public: int subsetOfs; }; + /** @brief Sets the training parameters + + @param p Training parameters of type DTrees::Params. + + The method sets the training parameters. + */ virtual void setDParams(const Params& p); + /** @brief Returns the training parameters + + The method returns the training parameters. + */ virtual Params getDParams() const; + /** @brief Returns indices of root nodes + */ virtual const std::vector& getRoots() const = 0; + /** @brief Returns all the nodes + + all the node indices, mentioned above (left, right, parent, root indices) are indices in the + returned vector + */ virtual const std::vector& getNodes() const = 0; + /** @brief Returns all the splits + + all the split indices, mentioned above (split, next etc.) are indices in the returned vector + */ virtual const std::vector& getSplits() const = 0; + /** @brief Returns all the bitsets for categorical splits + + Split::subsetOfs is an offset in the returned vector + */ virtual const std::vector& getSubsets() const = 0; + /** @brief Creates the empty model + + The static method creates empty decision tree with the specified parameters. It should be then + trained using train method (see StatModel::train). Alternatively, you can load the model from file + using StatModel::load\(filename). + */ static Ptr create(const Params& params=Params()); }; +//! @} ml_decsiontrees + /****************************************************************************************\ * Random Trees Classifier * \****************************************************************************************/ +//! @addtogroup ml_randomtrees +//! @{ + +/** @brief The class implements the random forest predictor as described in the beginning of this section. + */ class CV_EXPORTS_W RTrees : public DTrees { public: + /** @brief The set of training parameters for the forest is a superset of the training + parameters for a single tree. + + However, random trees do not need all the functionality/features of decision trees. Most + noticeably, the trees are not pruned, so the cross-validation parameters are not used. + */ class CV_EXPORTS_W_MAP Params : public DTrees::Params { public: Params(); + /** @brief The constructors + + @param maxDepth the depth of the tree. A low value will likely underfit and conversely a high + value will likely overfit. The optimal value can be obtained using cross validation or other + suitable methods. + @param minSampleCount minimum samples required at a leaf node for it to be split. A reasonable + value is a small percentage of the total data e.g. 1%. + @param regressionAccuracy + @param useSurrogates + @param maxCategories Cluster possible values of a categorical variable into K \<= maxCategories + clusters to find a suboptimal split. If a discrete variable, on which the training procedure tries + to make a split, takes more than max_categories values, the precise best subset estimation may + take a very long time because the algorithm is exponential. Instead, many decision trees engines + (including ML) try to find sub-optimal split in this case by clustering all the samples into + maxCategories clusters that is some categories are merged together. The clustering is applied only + in n\>2-class classification problems for categorical variables with N \> max_categories possible + values. In case of regression and 2-class classification the optimal split can be found + efficiently without employing clustering, thus the parameter is not used in these cases. + @param priors + @param calcVarImportance If true then variable importance will be calculated and then it can be + retrieved by RTrees::getVarImportance. + @param nactiveVars The size of the randomly selected subset of features at each tree node and that + are used to find the best split(s). If you set it to 0 then the size will be set to the square + root of the total number of features. + @param termCrit The termination criteria that specifies when the training algorithm stops - either + when the specified number of trees is trained and added to the ensemble or when sufficient + accuracy (measured as OOB error) is achieved. Typically the more trees you have the better the + accuracy. However, the improvement in accuracy generally diminishes and asymptotes pass a certain + number of trees. Also to keep in mind, the number of tree increases the prediction time linearly. + + The default constructor sets all parameters to default values which are different from default + values of `DTrees::Params`: + @code + RTrees::Params::Params() : DTrees::Params( 5, 10, 0, false, 10, 0, false, false, Mat() ), + calcVarImportance(false), nactiveVars(0) + { + termCrit = cvTermCriteria( TermCriteria::MAX_ITERS + TermCriteria::EPS, 50, 0.1 ); + } + @endcode + */ Params( int maxDepth, int minSampleCount, double regressionAccuracy, bool useSurrogates, int maxCategories, const Mat& priors, @@ -464,18 +1696,42 @@ public: virtual void setRParams(const Params& p) = 0; virtual Params getRParams() const = 0; + /** @brief Returns the variable importance array. + + The method returns the variable importance vector, computed at the training stage when + RTParams::calcVarImportance is set to true. If this flag was set to false, the empty matrix is + returned. + */ virtual Mat getVarImportance() const = 0; + /** @brief Creates the empty model + + Use StatModel::train to train the model, StatModel::train to create and + train the model, StatModel::load to load the pre-trained model. + */ static Ptr create(const Params& params=Params()); }; +//! @} ml_randomtrees + /****************************************************************************************\ * Boosted tree classifier * \****************************************************************************************/ +//! @addtogroup ml_boost +//! @{ + +/** @brief Boosted tree classifier derived from DTrees + */ class CV_EXPORTS_W Boost : public DTrees { public: + /** @brief The structure is derived from DTrees::Params but not all of the decision tree parameters are + supported. In particular, cross-validation is not supported. + + All parameters are public. You can initialize them by a constructor and then override some of them + directly if you want. + */ class CV_EXPORTS_W_MAP Params : public DTrees::Params { public: @@ -484,6 +1740,38 @@ public: CV_PROP_RW double weightTrimRate; Params(); + /** @brief The constructors. + + @param boostType Type of the boosting algorithm. Possible values are: + - **Boost::DISCRETE** Discrete AdaBoost. + - **Boost::REAL** Real AdaBoost. It is a technique that utilizes confidence-rated predictions + and works well with categorical data. + - **Boost::LOGIT** LogitBoost. It can produce good regression fits. + - **Boost::GENTLE** Gentle AdaBoost. It puts less weight on outlier data points and for that + reason is often good with regression data. + Gentle AdaBoost and Real AdaBoost are often the preferable choices. + @param weakCount The number of weak classifiers. + @param weightTrimRate A threshold between 0 and 1 used to save computational time. Samples + with summary weight \f$\leq 1 - weight_trim_rate\f$ do not participate in the *next* iteration of + training. Set this parameter to 0 to turn off this functionality. + @param maxDepth + @param useSurrogates + @param priors + + See DTrees::Params for description of other parameters. + + Default parameters are: + @code + Boost::Params::Params() + { + boostType = Boost::REAL; + weakCount = 100; + weightTrimRate = 0.95; + CVFolds = 0; + maxDepth = 1; + } + @endcode + */ Params( int boostType, int weakCount, double weightTrimRate, int maxDepth, bool useSurrogates, const Mat& priors ); }; @@ -491,12 +1779,29 @@ public: // Boosting type enum { DISCRETE=0, REAL=1, LOGIT=2, GENTLE=3 }; + /** @brief Returns the boosting parameters + + The method returns the training parameters. + */ virtual Params getBParams() const = 0; + /** @brief Sets the boosting parameters + + @param p Training parameters of type Boost::Params. + + The method sets the training parameters. + */ virtual void setBParams(const Params& p) = 0; + /** @brief Creates the empty model + + Use StatModel::train to train the model, StatModel::train\(traindata, params) to create and + train the model, StatModel::load\(filename) to load the pre-trained model. + */ static Ptr create(const Params& params=Params()); }; +//! @} ml_boost + /****************************************************************************************\ * Gradient Boosted Trees * \****************************************************************************************/ @@ -532,12 +1837,99 @@ public: /////////////////////////////////// Multi-Layer Perceptrons ////////////////////////////// +//! @addtogroup ml_neural +//! @{ + +/** @brief MLP model. + +Unlike many other models in ML that are constructed and trained at once, in the MLP model these +steps are separated. First, a network with the specified topology is created using the non-default +constructor or the method ANN_MLP::create. All the weights are set to zeros. Then, the network is +trained using a set of input and output vectors. The training procedure can be repeated more than +once, that is, the weights can be adjusted based on the new training data. + */ class CV_EXPORTS_W ANN_MLP : public StatModel { public: + /** @brief Parameters of the MLP and of the training algorithm. + + You can initialize the structure by a constructor or the individual parameters can be adjusted + after the structure is created. + The network structure: + - member Mat layerSizes + The number of elements in each layer of network. The very first element specifies the number + of elements in the input layer. The last element - number of elements in the output layer. + - member int activateFunc + The activation function. Currently the only fully supported activation function is + ANN_MLP::SIGMOID_SYM. + - member double fparam1 + The first parameter of activation function, 0 by default. + - member double fparam2 + The second parameter of the activation function, 0 by default. + @note + If you are using the default ANN_MLP::SIGMOID_SYM activation function with the default + parameter values fparam1=0 and fparam2=0 then the function used is y = 1.7159\*tanh(2/3 \* x), + so the output will range from [-1.7159, 1.7159], instead of [0,1]. + + The back-propagation algorithm parameters: + - member double bpDWScale + Strength of the weight gradient term. The recommended value is about 0.1. + - member double bpMomentScale + Strength of the momentum term (the difference between weights on the 2 previous iterations). + This parameter provides some inertia to smooth the random fluctuations of the weights. It + can vary from 0 (the feature is disabled) to 1 and beyond. The value 0.1 or so is good + enough + The RPROP algorithm parameters (see @cite RPROP93 for details): + - member double prDW0 + Initial value \f$\Delta_0\f$ of update-values \f$\Delta_{ij}\f$. + - member double rpDWPlus + Increase factor \f$\eta^+\f$. It must be \>1. + - member double rpDWMinus + Decrease factor \f$\eta^-\f$. It must be \<1. + - member double rpDWMin + Update-values lower limit \f$\Delta_{min}\f$. It must be positive. + - member double rpDWMax + Update-values upper limit \f$\Delta_{max}\f$. It must be \>1. + */ struct CV_EXPORTS_W_MAP Params { Params(); + /** @brief Construct the parameter structure + + @param layerSizes Integer vector specifying the number of neurons in each layer including the + input and output layers. + @param activateFunc Parameter specifying the activation function for each neuron: one of + ANN_MLP::IDENTITY, ANN_MLP::SIGMOID_SYM, and ANN_MLP::GAUSSIAN. + @param fparam1 The first parameter of the activation function, \f$\alpha\f$. See the formulas in the + introduction section. + @param fparam2 The second parameter of the activation function, \f$\beta\f$. See the formulas in the + introduction section. + @param termCrit Termination criteria of the training algorithm. You can specify the maximum number + of iterations (maxCount) and/or how much the error could change between the iterations to make the + algorithm continue (epsilon). + @param trainMethod Training method of the MLP. Possible values are: + - **ANN_MLP_TrainParams::BACKPROP** The back-propagation algorithm. + - **ANN_MLP_TrainParams::RPROP** The RPROP algorithm. + @param param1 Parameter of the training method. It is rp_dw0 for RPROP and bp_dw_scale for + BACKPROP. + @param param2 Parameter of the training method. It is rp_dw_min for RPROP and bp_moment_scale + for BACKPROP. + + By default the RPROP algorithm is used: + @code + ANN_MLP_TrainParams::ANN_MLP_TrainParams() + { + layerSizes = Mat(); + activateFun = SIGMOID_SYM; + fparam1 = fparam2 = 0; + term_crit = TermCriteria( TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01 ); + train_method = RPROP; + bpDWScale = bpMomentScale = 0.1; + rpDW0 = 0.1; rpDWPlus = 1.2; rpDWMinus = 0.5; + rpDWMin = FLT_EPSILON; rpDWMax = 50.; + } + @endcode + */ Params( const Mat& layerSizes, int activateFunc, double fparam1, double fparam2, TermCriteria termCrit, int trainMethod, double param1, double param2=0 ); @@ -565,26 +1957,81 @@ public: enum { UPDATE_WEIGHTS = 1, NO_INPUT_SCALE = 2, NO_OUTPUT_SCALE = 4 }; virtual Mat getWeights(int layerIdx) const = 0; + + /** @brief Sets the new network parameters + + @param p The new parameters + + The existing network, if any, will be destroyed and new empty one will be created. It should be + re-trained after that. + */ virtual void setParams(const Params& p) = 0; + + /** @brief Retrieves the current network parameters + */ virtual Params getParams() const = 0; + /** @brief Creates empty model + + Use StatModel::train to train the model, StatModel::train\(traindata, params) to create + and train the model, StatModel::load\(filename) to load the pre-trained model. Note that + the train method has optional flags, and the following flags are handled by \`ANN_MLP\`: + + - **UPDATE_WEIGHTS** Algorithm updates the network weights, rather than computes them from + scratch. In the latter case the weights are initialized using the Nguyen-Widrow algorithm. + - **NO_INPUT_SCALE** Algorithm does not normalize the input vectors. If this flag is not set, + the training algorithm normalizes each input feature independently, shifting its mean value to + 0 and making the standard deviation equal to 1. If the network is assumed to be updated + frequently, the new training data could be much different from original one. In this case, you + should take care of proper normalization. + - **NO_OUTPUT_SCALE** Algorithm does not normalize the output vectors. If the flag is not set, + the training algorithm normalizes each output feature independently, by transforming it to the + certain range depending on the used activation function. + */ static Ptr create(const Params& params=Params()); }; +//! @} ml_neural + /****************************************************************************************\ * Logistic Regression * \****************************************************************************************/ +//! @addtogroup ml_lr +//! @{ + +/** @brief Implements Logistic Regression classifier. + */ class CV_EXPORTS LogisticRegression : public StatModel { public: class CV_EXPORTS Params { public: + /** @brief The constructors + + @param learning_rate Specifies the learning rate. + @param iters Specifies the number of iterations. + @param method Specifies the kind of training method used. It should be set to either + LogisticRegression::BATCH or LogisticRegression::MINI_BATCH. If using + LogisticRegression::MINI_BATCH, set LogisticRegression::Params.mini_batch_size to a positive + integer. + @param normalization Specifies the kind of regularization to be applied. + LogisticRegression::REG_L1 or LogisticRegression::REG_L2 (L1 norm or L2 norm). To use this, set + LogisticRegression::Params.regularized to a integer greater than zero. + @param reg To enable or disable regularization. Set to positive integer (greater than zero) to + enable and to 0 to disable. + @param batch_size Specifies the number of training samples taken in each step of Mini-Batch + Gradient Descent. Will only be used if using LogisticRegression::MINI_BATCH training algorithm. + It has to take values less than the total number of training samples. + + By initializing this structure, one can set all the parameters required for Logistic Regression + classifier. + */ Params(double learning_rate = 0.001, int iters = 1000, int method = LogisticRegression::BATCH, - int normlization = LogisticRegression::REG_L2, + int normalization = LogisticRegression::REG_L2, int reg = 1, int batch_size = 1); double alpha; @@ -599,36 +2046,65 @@ public: enum { REG_L1 = 0, REG_L2 = 1}; enum { BATCH = 0, MINI_BATCH = 1}; - // Algorithm interface + /** @brief This function writes the trained LogisticRegression clasifier to disk. + */ virtual void write( FileStorage &fs ) const = 0; + /** @brief This function reads the trained LogisticRegression clasifier from disk. + */ virtual void read( const FileNode &fn ) = 0; - // StatModel interface + /** @brief Trains the Logistic Regression classifier and returns true if successful. + + @param trainData Instance of ml::TrainData class holding learning data. + @param flags Not used. + */ virtual bool train( const Ptr& trainData, int flags=0 ) = 0; + /** @brief Predicts responses for input samples and returns a float type. + + @param samples The input data for the prediction algorithm. Matrix [m x n], where each row + contains variables (features) of one object being classified. Should have data type CV_32F. + @param results Predicted labels as a column matrix of type CV_32S. + @param flags Not used. + */ virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0; virtual void clear() = 0; + /** @brief This function returns the trained paramters arranged across rows. + + For a two class classifcation problem, it returns a row matrix. + It returns learnt paramters of the Logistic Regression as a matrix of type CV_32F. + */ virtual Mat get_learnt_thetas() const = 0; + /** @brief Creates empty model. + + @param params The training parameters for the classifier of type LogisticRegression::Params. + + Creates Logistic Regression model with parameters given. + */ static Ptr create( const Params& params = Params() ); }; +//! @} ml_lr + /****************************************************************************************\ * Auxilary functions declarations * \****************************************************************************************/ -/* Generates from multivariate normal distribution, where - is an - average row vector, - symmetric covariation matrix */ +/** Generates `sample` from multivariate normal distribution, where `mean` - is an + average row vector, `cov` - symmetric covariation matrix */ CV_EXPORTS void randMVNormal( InputArray mean, InputArray cov, int nsamples, OutputArray samples); -/* Generates sample from gaussian mixture distribution */ +/** Generates sample from gaussian mixture distribution */ CV_EXPORTS void randGaussMixture( InputArray means, InputArray covs, InputArray weights, int nsamples, OutputArray samples, OutputArray sampClasses ); -/* creates test set */ +/** creates test set */ CV_EXPORTS void createConcentricSpheresTestSet( int nsamples, int nfeatures, int nclasses, OutputArray samples, OutputArray responses); +//! @} ml + } } diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp index 4ccb810703..a35d206b98 100644 --- a/modules/objdetect/include/opencv2/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect.hpp @@ -46,15 +46,78 @@ #include "opencv2/core.hpp" +/** +@defgroup objdetect Object Detection + +Haar Feature-based Cascade Classifier for Object Detection +---------------------------------------------------------- + +The object detector described below has been initially proposed by Paul Viola @cite Viola01 and +improved by Rainer Lienhart @cite Lienhart02 . + +First, a classifier (namely a *cascade of boosted classifiers working with haar-like features*) is +trained with a few hundred sample views of a particular object (i.e., a face or a car), called +positive examples, that are scaled to the same size (say, 20x20), and negative examples - arbitrary +images of the same size. + +After a classifier is trained, it can be applied to a region of interest (of the same size as used +during the training) in an input image. The classifier outputs a "1" if the region is likely to show +the object (i.e., face/car), and "0" otherwise. To search for the object in the whole image one can +move the search window across the image and check every location using the classifier. The +classifier is designed so that it can be easily "resized" in order to be able to find the objects of +interest at different sizes, which is more efficient than resizing the image itself. So, to find an +object of an unknown size in the image the scan procedure should be done several times at different +scales. + +The word "cascade" in the classifier name means that the resultant classifier consists of several +simpler classifiers (*stages*) that are applied subsequently to a region of interest until at some +stage the candidate is rejected or all the stages are passed. The word "boosted" means that the +classifiers at every stage of the cascade are complex themselves and they are built out of basic +classifiers using one of four different boosting techniques (weighted voting). Currently Discrete +Adaboost, Real Adaboost, Gentle Adaboost and Logitboost are supported. The basic classifiers are +decision-tree classifiers with at least 2 leaves. Haar-like features are the input to the basic +classifiers, and are calculated as described below. The current algorithm uses the following +Haar-like features: + +![image](pics/haarfeatures.png) + +The feature used in a particular classifier is specified by its shape (1a, 2b etc.), position within +the region of interest and the scale (this scale is not the same as the scale used at the detection +stage, though these two scales are multiplied). For example, in the case of the third line feature +(2c) the response is calculated as the difference between the sum of image pixels under the +rectangle covering the whole feature (including the two white stripes and the black stripe in the +middle) and the sum of the image pixels under the black stripe multiplied by 3 in order to +compensate for the differences in the size of areas. The sums of pixel values over a rectangular +regions are calculated rapidly using integral images (see below and the integral description). + +To see the object detector at work, have a look at the facedetect demo: + + +The following reference is for the detection part only. There is a separate application called +opencv_traincascade that can train a cascade of boosted classifiers from a set of samples. + +@note In the new C++ interface it is also possible to use LBP (local binary pattern) features in +addition to Haar-like features. .. [Viola01] Paul Viola and Michael J. Jones. Rapid Object Detection +using a Boosted Cascade of Simple Features. IEEE CVPR, 2001. The paper is available online at + + +@{ + @defgroup objdetect_c C API +@} + */ + typedef struct CvHaarClassifierCascade CvHaarClassifierCascade; namespace cv { +//! @addtogroup objdetect +//! @{ + ///////////////////////////// Object Detection //////////////////////////// -// class for grouping object candidates, detected by Cascade Classifier, HOG etc. -// instance of the class is to be passed to cv::partition (see cxoperations.hpp) +//! class for grouping object candidates, detected by Cascade Classifier, HOG etc. +//! instance of the class is to be passed to cv::partition (see cxoperations.hpp) class CV_EXPORTS SimilarRects { public: @@ -70,13 +133,32 @@ public: double eps; }; +/** @brief Groups the object candidate rectangles. + +@param rectList Input/output vector of rectangles. Output vector includes retained and grouped +rectangles. (The Python list is not modified in place.) +@param groupThreshold Minimum possible number of rectangles minus 1. The threshold is used in a +group of rectangles to retain it. +@param eps Relative difference between sides of the rectangles to merge them into a group. + +The function is a wrapper for the generic function partition . It clusters all the input rectangles +using the rectangle equivalence criteria that combines rectangles with similar sizes and similar +locations. The similarity is defined by eps. When eps=0 , no clustering is done at all. If +\f$\texttt{eps}\rightarrow +\inf\f$ , all the rectangles are put in one cluster. Then, the small +clusters containing less than or equal to groupThreshold rectangles are rejected. In each other +cluster, the average rectangle is computed and put into the output rectangle list. + */ CV_EXPORTS void groupRectangles(std::vector& rectList, int groupThreshold, double eps = 0.2); +/** @overload */ CV_EXPORTS_W void groupRectangles(CV_IN_OUT std::vector& rectList, CV_OUT std::vector& weights, int groupThreshold, double eps = 0.2); +/** @overload */ CV_EXPORTS void groupRectangles(std::vector& rectList, int groupThreshold, double eps, std::vector* weights, std::vector* levelWeights ); +/** @overload */ CV_EXPORTS void groupRectangles(std::vector& rectList, std::vector& rejectLevels, std::vector& levelWeights, int groupThreshold, double eps = 0.2); +/** @overload */ CV_EXPORTS void groupRectangles_meanshift(std::vector& rectList, std::vector& foundWeights, std::vector& foundScales, double detectThreshold = 0.0, Size winDetSize = Size(64, 128)); @@ -133,15 +215,54 @@ public: virtual Ptr getMaskGenerator() = 0; }; +/** @brief Cascade classifier class for object detection. + */ class CV_EXPORTS_W CascadeClassifier { public: CV_WRAP CascadeClassifier(); + /** @brief Loads a classifier from a file. + + @param filename Name of the file from which the classifier is loaded. + */ CV_WRAP CascadeClassifier(const String& filename); ~CascadeClassifier(); + /** @brief Checks whether the classifier has been loaded. + */ CV_WRAP bool empty() const; + /** @brief Loads a classifier from a file. + + @param filename Name of the file from which the classifier is loaded. The file may contain an old + HAAR classifier trained by the haartraining application or a new cascade classifier trained by the + traincascade application. + */ CV_WRAP bool load( const String& filename ); + /** @brief Reads a classifier from a FileStorage node. + + @note The file may contain a new cascade classifier (trained traincascade application) only. + */ CV_WRAP bool read( const FileNode& node ); + + /** @brief Detects objects of different sizes in the input image. The detected objects are returned as a list + of rectangles. + + @param image Matrix of the type CV_8U containing an image where objects are detected. + @param objects Vector of rectangles where each rectangle contains the detected object, the + rectangles may be partially outside the original image. + @param scaleFactor Parameter specifying how much the image size is reduced at each image scale. + @param minNeighbors Parameter specifying how many neighbors each candidate rectangle should have + to retain it. + @param flags Parameter with the same meaning for an old cascade as in the function + cvHaarDetectObjects. It is not used for a new cascade. + @param minSize Minimum possible object size. Objects smaller than that are ignored. + @param maxSize Maximum possible object size. Objects larger than that are ignored. + + The function is parallelized with the TBB library. + + @note + - (Python) A face detection example using cascade classifiers can be found at + opencv_source_code/samples/python2/facedetect.py + */ CV_WRAP void detectMultiScale( InputArray image, CV_OUT std::vector& objects, double scaleFactor = 1.1, @@ -149,6 +270,21 @@ public: Size minSize = Size(), Size maxSize = Size() ); + /** @overload + @param image Matrix of the type CV_8U containing an image where objects are detected. + @param objects Vector of rectangles where each rectangle contains the detected object, the + rectangles may be partially outside the original image. + @param numDetections Vector of detection numbers for the corresponding objects. An object's number + of detections is the number of neighboring positively classified rectangles that were joined + together to form the object. + @param scaleFactor Parameter specifying how much the image size is reduced at each image scale. + @param minNeighbors Parameter specifying how many neighbors each candidate rectangle should have + to retain it. + @param flags Parameter with the same meaning for an old cascade as in the function + cvHaarDetectObjects. It is not used for a new cascade. + @param minSize Minimum possible object size. Objects smaller than that are ignored. + @param maxSize Maximum possible object size. Objects larger than that are ignored. + */ CV_WRAP_AS(detectMultiScale2) void detectMultiScale( InputArray image, CV_OUT std::vector& objects, CV_OUT std::vector& numDetections, @@ -157,6 +293,9 @@ public: Size minSize=Size(), Size maxSize=Size() ); + /** @overload + if `outputRejectLevels` is `true` returns `rejectLevels` and `levelWeights` + */ CV_WRAP_AS(detectMultiScale3) void detectMultiScale( InputArray image, CV_OUT std::vector& objects, CV_OUT std::vector& rejectLevels, @@ -184,14 +323,14 @@ CV_EXPORTS Ptr createFaceDetectionMaskGene //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector ////////////// -// struct for detection region of interest (ROI) +//! struct for detection region of interest (ROI) struct DetectionROI { - // scale(size) of the bounding box + //! scale(size) of the bounding box double scale; - // set of requrested locations to be evaluated + //! set of requrested locations to be evaluated std::vector locations; - // vector that will contain confidence values for each location + //! vector that will contain confidence values for each location std::vector confidences; }; @@ -250,24 +389,24 @@ public: Size winStride = Size(), Size padding = Size(), const std::vector& locations = std::vector()) const; - //with found weights output + //! with found weights output CV_WRAP virtual void detect(const Mat& img, CV_OUT std::vector& foundLocations, CV_OUT std::vector& weights, double hitThreshold = 0, Size winStride = Size(), Size padding = Size(), const std::vector& searchLocations = std::vector()) const; - //without found weights output + //! without found weights output virtual void detect(const Mat& img, CV_OUT std::vector& foundLocations, double hitThreshold = 0, Size winStride = Size(), Size padding = Size(), const std::vector& searchLocations=std::vector()) const; - //with result weights output + //! with result weights output CV_WRAP virtual void detectMultiScale(InputArray img, CV_OUT std::vector& foundLocations, CV_OUT std::vector& foundWeights, double hitThreshold = 0, Size winStride = Size(), Size padding = Size(), double scale = 1.05, double finalThreshold = 2.0,bool useMeanshiftGrouping = false) const; - //without found weights output + //! without found weights output virtual void detectMultiScale(InputArray img, CV_OUT std::vector& foundLocations, double hitThreshold = 0, Size winStride = Size(), Size padding = Size(), double scale = 1.05, @@ -295,24 +434,26 @@ public: CV_PROP int nlevels; - // evaluate specified ROI and return confidence value for each location + //! evaluate specified ROI and return confidence value for each location virtual void detectROI(const cv::Mat& img, const std::vector &locations, CV_OUT std::vector& foundLocations, CV_OUT std::vector& confidences, double hitThreshold = 0, cv::Size winStride = Size(), cv::Size padding = Size()) const; - // evaluate specified ROI and return confidence value for each location in multiple scales + //! evaluate specified ROI and return confidence value for each location in multiple scales virtual void detectMultiScaleROI(const cv::Mat& img, CV_OUT std::vector& foundLocations, std::vector& locations, double hitThreshold = 0, int groupThreshold = 0) const; - // read/parse Dalal's alt model file + //! read/parse Dalal's alt model file void readALTModel(String modelfile); void groupRectangles(std::vector& rectList, std::vector& weights, int groupThreshold, double eps) const; }; +//! @} objdetect + } #include "opencv2/objdetect/detection_based_tracker.hpp" diff --git a/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp b/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp index 0c4d130f0a..54117fdb91 100644 --- a/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp +++ b/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp @@ -51,6 +51,10 @@ namespace cv { + +//! @addtogroup objdetect +//! @{ + class CV_EXPORTS DetectionBasedTracker { public: @@ -211,6 +215,9 @@ class CV_EXPORTS DetectionBasedTracker cv::Rect calcTrackedObjectPositionToShow(int i, ObjectStatus& status) const; void detectInRegion(const cv::Mat& img, const cv::Rect& r, std::vector& detectedObjectsInRegions); }; + +//! @} objdetect + } //end of cv namespace #endif diff --git a/modules/objdetect/include/opencv2/objdetect/objdetect_c.h b/modules/objdetect/include/opencv2/objdetect/objdetect_c.h index 807efd7b0d..632a438625 100644 --- a/modules/objdetect/include/opencv2/objdetect/objdetect_c.h +++ b/modules/objdetect/include/opencv2/objdetect/objdetect_c.h @@ -53,6 +53,10 @@ extern "C" { #endif +/** @addtogroup objdetect_c + @{ + */ + /****************************************************************************************\ * Haar-like Object Detection functions * \****************************************************************************************/ @@ -143,6 +147,7 @@ CVAPI(void) cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* cascad CVAPI(int) cvRunHaarClassifierCascade( const CvHaarClassifierCascade* cascade, CvPoint pt, int start_stage CV_DEFAULT(0)); +/** @} objdetect_c */ #ifdef __cplusplus } diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp index ee6b12e9dc..0cc0cf619a 100644 --- a/modules/photo/include/opencv2/photo.hpp +++ b/modules/photo/include/opencv2/photo.hpp @@ -46,12 +46,27 @@ #include "opencv2/core.hpp" #include "opencv2/imgproc.hpp" -/*! \namespace cv - Namespace where all the C++ OpenCV functionality resides - */ +/** +@defgroup photo Computational Photography +@{ + @defgroup photo_denoise Denoising + @defgroup photo_hdr HDR imaging + +This section describes high dynamic range imaging algorithms namely tonemapping, exposure alignment, +camera calibration with multiple exposures and exposure fusion. + + @defgroup photo_clone Seamless Cloning + @defgroup photo_render Non-Photorealistic Rendering + @defgroup photo_c C API +@} + */ + namespace cv { +//! @addtogroup photo +//! @{ + //! the inpainting algorithm enum { @@ -72,44 +87,213 @@ enum NORMCONV_FILTER = 2 }; -//! restores the damaged image areas using one of the available intpainting algorithms +/** @brief Restores the selected region in an image using the region neighborhood. + +@param src Input 8-bit 1-channel or 3-channel image. +@param inpaintMask Inpainting mask, 8-bit 1-channel image. Non-zero pixels indicate the area that +needs to be inpainted. +@param dst Output image with the same size and type as src . +@param inpaintRadius Radius of a circular neighborhood of each point inpainted that is considered +by the algorithm. +@param flags Inpainting method that could be one of the following: +- **INPAINT_NS** Navier-Stokes based method [Navier01] +- **INPAINT_TELEA** Method by Alexandru Telea @cite Telea04 . + +The function reconstructs the selected image area from the pixel near the area boundary. The +function may be used to remove dust and scratches from a scanned photo, or to remove undesirable +objects from still images or video. See for more details. + +@note + - An example using the inpainting technique can be found at + opencv_source_code/samples/cpp/inpaint.cpp + - (Python) An example using the inpainting technique can be found at + opencv_source_code/samples/python2/inpaint.py + */ CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask, OutputArray dst, double inpaintRadius, int flags ); - +//! @addtogroup photo_denoise +//! @{ + +/** @brief Perform image denoising using Non-local Means Denoising algorithm + with several computational +optimizations. Noise expected to be a gaussian white noise + +@param src Input 8-bit 1-channel, 2-channel or 3-channel image. +@param dst Output image with the same size and type as src . +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength. Big h value perfectly removes noise but also +removes image details, smaller h value preserves details but also preserves some noise + +This function expected to be applied to grayscale images. For colored images look at +fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored +image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting +image to CIELAB colorspace and then separately denoise L and AB components with different h +parameter. + */ CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); +/** @brief Modification of fastNlMeansDenoising function for colored images + +@param src Input 8-bit 3-channel image. +@param dst Output image with the same size and type as src . +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly +removes noise but also removes image details, smaller h value preserves details but also preserves +some noise +@param hColor The same as h but for color components. For most images value equals 10 +will be enought to remove colored noise and do not distort colors + +The function converts image to CIELAB colorspace and then separately denoise L and AB components +with given h parameters using fastNlMeansDenoising function. + */ CV_EXPORTS_W void fastNlMeansDenoisingColored( InputArray src, OutputArray dst, float h = 3, float hColor = 3, int templateWindowSize = 7, int searchWindowSize = 21); +/** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been +captured in small period of time. For example video. This version of the function is for grayscale +images or for manual manipulation with colorspaces. For more details see + + +@param srcImgs Input 8-bit 1-channel, 2-channel or 3-channel images sequence. All images should +have the same type and size. +@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence +@param temporalWindowSize Number of surrounding images to use for target image denoising. Should +be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to +imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise +srcImgs[imgToDenoiseIndex] image. +@param dst Output image with the same size and type as srcImgs images. +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly +removes noise but also removes image details, smaller h value preserves details but also preserves +some noise + */ CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, int imgToDenoiseIndex, int temporalWindowSize, float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); +/** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences + +@param srcImgs Input 8-bit 3-channel images sequence. All images should have the same type and +size. +@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence +@param temporalWindowSize Number of surrounding images to use for target image denoising. Should +be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to +imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise +srcImgs[imgToDenoiseIndex] image. +@param dst Output image with the same size and type as srcImgs images. +@param templateWindowSize Size in pixels of the template patch that is used to compute weights. +Should be odd. Recommended value 7 pixels +@param searchWindowSize Size in pixels of the window that is used to compute weighted average for +given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater +denoising time. Recommended value 21 pixels +@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly +removes noise but also removes image details, smaller h value preserves details but also preserves +some noise. +@param hColor The same as h but for color components. + +The function converts images to CIELAB colorspace and then separately denoise L and AB components +with given h parameters using fastNlMeansDenoisingMulti function. + */ CV_EXPORTS_W void fastNlMeansDenoisingColoredMulti( InputArrayOfArrays srcImgs, OutputArray dst, int imgToDenoiseIndex, int temporalWindowSize, float h = 3, float hColor = 3, int templateWindowSize = 7, int searchWindowSize = 21); +/** @brief Primal-dual algorithm is an algorithm for solving special types of variational problems (that is, +finding a function to minimize some functional). As the image denoising, in particular, may be seen +as the variational problem, primal-dual algorithm then can be used to perform denoising and this is +exactly what is implemented. + +It should be noted, that this implementation was taken from the July 2013 blog entry +@cite MA13 , which also contained (slightly more general) ready-to-use source code on Python. +Subsequently, that code was rewritten on C++ with the usage of openCV by Vadim Pisarevsky at the end +of July 2013 and finally it was slightly adapted by later authors. + +Although the thorough discussion and justification of the algorithm involved may be found in +@cite ChambolleEtAl, it might make sense to skim over it here, following @cite MA13 . To begin +with, we consider the 1-byte gray-level images as the functions from the rectangular domain of +pixels (it may be seen as set +\f$\left\{(x,y)\in\mathbb{N}\times\mathbb{N}\mid 1\leq x\leq n,\;1\leq y\leq m\right\}\f$ for some +\f$m,\;n\in\mathbb{N}\f$) into \f$\{0,1,\dots,255\}\f$. We shall denote the noised images as \f$f_i\f$ and with +this view, given some image \f$x\f$ of the same size, we may measure how bad it is by the formula + +\f[\left\|\left\|\nabla x\right\|\right\| + \lambda\sum_i\left\|\left\|x-f_i\right\|\right\|\f] + +\f$\|\|\cdot\|\|\f$ here denotes \f$L_2\f$-norm and as you see, the first addend states that we want our +image to be smooth (ideally, having zero gradient, thus being constant) and the second states that +we want our result to be close to the observations we've got. If we treat \f$x\f$ as a function, this is +exactly the functional what we seek to minimize and here the Primal-Dual algorithm comes into play. + +@param observations This array should contain one or more noised versions of the image that is to +be restored. +@param result Here the denoised image will be stored. There is no need to do pre-allocation of +storage space, as it will be automatically allocated, if necessary. +@param lambda Corresponds to \f$\lambda\f$ in the formulas above. As it is enlarged, the smooth +(blurred) images are treated more favorably than detailed (but maybe more noised) ones. Roughly +speaking, as it becomes smaller, the result will be more blur but more sever outliers will be +removed. +@param niters Number of iterations that the algorithm will run. Of course, as more iterations as +better, but it is hard to quantitatively refine this statement, so just use the default and +increase it if the results are poor. + */ CV_EXPORTS_W void denoise_TVL1(const std::vector& observations,Mat& result, double lambda=1.0, int niters=30); +//! @} photo_denoise + +//! @addtogroup photo_hdr +//! @{ + enum { LDR_SIZE = 256 }; +/** @brief Base class for tonemapping algorithms - tools that are used to map HDR image to 8-bit range. + */ class CV_EXPORTS_W Tonemap : public Algorithm { public: + /** @brief Tonemaps image + + @param src source image - 32-bit 3-channel Mat + @param dst destination image - 32-bit 3-channel Mat with values in [0, 1] range + */ CV_WRAP virtual void process(InputArray src, OutputArray dst) = 0; CV_WRAP virtual float getGamma() const = 0; CV_WRAP virtual void setGamma(float gamma) = 0; }; +/** @brief Creates simple linear mapper with gamma correction + +@param gamma positive value for gamma correction. Gamma value of 1.0 implies no correction, gamma +equal to 2.2f is suitable for most displays. +Generally gamma \> 1 brightens the image and gamma \< 1 darkens it. + */ CV_EXPORTS_W Ptr createTonemap(float gamma = 1.0f); -// "Adaptive Logarithmic Mapping For Displaying HighContrast Scenes", Drago et al., 2003 +/** @brief Adaptive logarithmic mapping is a fast global tonemapping algorithm that scales the image in +logarithmic domain. +Since it's a global operator the same function is applied to all the pixels, it is controlled by the +bias parameter. + +Optional saturation enhancement is possible as described in @cite FL02 . + +For more information see @cite DM03 . + */ class CV_EXPORTS_W TonemapDrago : public Tonemap { public: @@ -121,10 +305,25 @@ public: CV_WRAP virtual void setBias(float bias) = 0; }; +/** @brief Creates TonemapDrago object + +@param gamma gamma value for gamma correction. See createTonemap +@param saturation positive saturation enhancement value. 1.0 preserves saturation, values greater +than 1 increase saturation and values less than 1 decrease it. +@param bias value for bias function in [0, 1] range. Values from 0.7 to 0.9 usually give best +results, default value is 0.85. + */ CV_EXPORTS_W Ptr createTonemapDrago(float gamma = 1.0f, float saturation = 1.0f, float bias = 0.85f); -// "Fast Bilateral Filtering for the Display of High-Dynamic-Range Images", Durand, Dorsey, 2002 +/** @brief This algorithm decomposes image into two layers: base layer and detail layer using bilateral filter +and compresses contrast of the base layer thus preserving all the details. +This implementation uses regular bilateral filter from opencv. + +Saturation enhancement is possible as in ocvTonemapDrago. + +For more information see @cite DD02 . + */ class CV_EXPORTS_W TonemapDurand : public Tonemap { public: @@ -142,11 +341,25 @@ public: CV_WRAP virtual void setSigmaColor(float sigma_color) = 0; }; +/** @brief Creates TonemapDurand object + +@param gamma gamma value for gamma correction. See createTonemap +@param contrast resulting contrast on logarithmic scale, i. e. log(max / min), where max and min +are maximum and minimum luminance values of the resulting image. +@param saturation saturation enhancement value. See createTonemapDrago +@param sigma_space bilateral filter sigma in color space +@param sigma_color bilateral filter sigma in coordinate space + */ CV_EXPORTS_W Ptr createTonemapDurand(float gamma = 1.0f, float contrast = 4.0f, float saturation = 1.0f, float sigma_space = 2.0f, float sigma_color = 2.0f); -// "Dynamic Range Reduction Inspired by Photoreceptor Physiology", Reinhard, Devlin, 2005 +/** @brief This is a global tonemapping operator that models human visual system. + +Mapping function is controlled by adaptation parameter, that is computed using light adaptation and +color adaptation. +For more information see @cite RD05 . + */ class CV_EXPORTS_W TonemapReinhard : public Tonemap { public: @@ -160,11 +373,24 @@ public: CV_WRAP virtual void setColorAdaptation(float color_adapt) = 0; }; +/** @brief Creates TonemapReinhard object + +@param gamma gamma value for gamma correction. See createTonemap +@param intensity result intensity in [-8, 8] range. Greater intensity produces brighter results. +@param light_adapt light adaptation in [0, 1] range. If 1 adaptation is based only on pixel +value, if 0 it's global, otherwise it's a weighted mean of this two cases. +@param color_adapt chromatic adaptation in [0, 1] range. If 1 channels are treated independently, +if 0 adaptation level is the same for each channel. + */ CV_EXPORTS_W Ptr createTonemapReinhard(float gamma = 1.0f, float intensity = 0.0f, float light_adapt = 1.0f, float color_adapt = 0.0f); -// "Perceptual Framework for Contrast Processing of High Dynamic Range Images", Mantiuk et al., 2006 +/** @brief This algorithm transforms image to contrast using gradients on all levels of gaussian pyramid, +transforms contrast values to HVS response and scales the response. After this the image is +reconstructed from new contrast values. +For more information see @cite MM06 . + */ class CV_EXPORTS_W TonemapMantiuk : public Tonemap { public: @@ -175,28 +401,75 @@ public: CV_WRAP virtual void setSaturation(float saturation) = 0; }; +/** @brief Creates TonemapMantiuk object + +@param gamma gamma value for gamma correction. See createTonemap +@param scale contrast scale factor. HVS response is multiplied by this parameter, thus compressing +dynamic range. Values from 0.6 to 0.9 produce best results. +@param saturation saturation enhancement value. See createTonemapDrago + */ CV_EXPORTS_W Ptr createTonemapMantiuk(float gamma = 1.0f, float scale = 0.7f, float saturation = 1.0f); +/** @brief The base class for algorithms that align images of the same scene with different exposures + */ class CV_EXPORTS_W AlignExposures : public Algorithm { public: + /** @brief Aligns images + + @param src vector of input images + @param dst vector of aligned images + @param times vector of exposure time values for each image + @param response 256x1 matrix with inverse camera response function for each pixel value, it should + have the same number of channels as images. + */ CV_WRAP virtual void process(InputArrayOfArrays src, std::vector& dst, InputArray times, InputArray response) = 0; }; -// "Fast, Robust Image Registration for Compositing High Dynamic Range Photographs from Handheld Exposures", Ward, 2003 +/** @brief This algorithm converts images to median threshold bitmaps (1 for pixels brighter than median +luminance and 0 otherwise) and than aligns the resulting bitmaps using bit operations. + +It is invariant to exposure, so exposure values and camera response are not necessary. + +In this implementation new image regions are filled with zeros. +For more information see @cite GW03 . + */ class CV_EXPORTS_W AlignMTB : public AlignExposures { public: CV_WRAP virtual void process(InputArrayOfArrays src, std::vector& dst, InputArray times, InputArray response) = 0; + /** @brief Short version of process, that doesn't take extra arguments. + + @param src vector of input images + @param dst vector of aligned images + */ CV_WRAP virtual void process(InputArrayOfArrays src, std::vector& dst) = 0; + /** @brief Calculates shift between two images, i. e. how to shift the second image to correspond it with the + first. + + @param img0 first image + @param img1 second image + */ CV_WRAP virtual Point calculateShift(InputArray img0, InputArray img1) = 0; + /** @brief Helper function, that shift Mat filling new regions with zeros. + + @param src input image + @param dst result image + @param shift shift value + */ CV_WRAP virtual void shiftMat(InputArray src, OutputArray dst, const Point shift) = 0; + /** @brief Computes median threshold and exclude bitmaps of given image. + + @param img input image + @param tb median threshold bitmap + @param eb exclude bitmap + */ CV_WRAP virtual void computeBitmaps(InputArray img, OutputArray tb, OutputArray eb) = 0; CV_WRAP virtual int getMaxBits() const = 0; @@ -209,16 +482,36 @@ public: CV_WRAP virtual void setCut(bool value) = 0; }; +/** @brief Creates AlignMTB object + +@param max_bits logarithm to the base 2 of maximal shift in each dimension. Values of 5 and 6 are +usually good enough (31 and 63 pixels shift respectively). +@param exclude_range range for exclusion bitmap that is constructed to suppress noise around the +median value. +@param cut if true cuts images, otherwise fills the new regions with zeros. + */ CV_EXPORTS_W Ptr createAlignMTB(int max_bits = 6, int exclude_range = 4, bool cut = true); +/** @brief The base class for camera response calibration algorithms. + */ class CV_EXPORTS_W CalibrateCRF : public Algorithm { public: + /** @brief Recovers inverse camera response. + + @param src vector of input images + @param dst 256x1 matrix with inverse camera response function + @param times vector of exposure time values for each image + */ CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0; }; -// "Recovering High Dynamic Range Radiance Maps from Photographs", Debevec, Malik, 1997 +/** @brief Inverse camera response function is extracted for each brightness value by minimizing an objective +function as linear system. Objective function is constructed using pixel values on the same position +in all images, extra term is added to make the result smoother. +For more information see @cite DM97 . + */ class CV_EXPORTS_W CalibrateDebevec : public CalibrateCRF { public: @@ -232,10 +525,21 @@ public: CV_WRAP virtual void setRandom(bool random) = 0; }; +/** @brief Creates CalibrateDebevec object + +@param samples number of pixel locations to use +@param lambda smoothness term weight. Greater values produce smoother results, but can alter the +response. +@param random if true sample pixel locations are chosen at random, otherwise the form a +rectangular grid. + */ CV_EXPORTS_W Ptr createCalibrateDebevec(int samples = 70, float lambda = 10.0f, bool random = false); -// "Dynamic range improvement through multiple exposures", Robertson et al., 1999 +/** @brief Inverse camera response function is extracted for each brightness value by minimizing an objective +function as linear system. This algorithm uses all image pixels. +For more information see @cite RB99 . + */ class CV_EXPORTS_W CalibrateRobertson : public CalibrateCRF { public: @@ -248,17 +552,35 @@ public: CV_WRAP virtual Mat getRadiance() const = 0; }; +/** @brief Creates CalibrateRobertson object + +@param max_iter maximal number of Gauss-Seidel solver iterations. +@param threshold target difference between results of two successive steps of the minimization. + */ CV_EXPORTS_W Ptr createCalibrateRobertson(int max_iter = 30, float threshold = 0.01f); +/** @brief The base class algorithms that can merge exposure sequence to a single image. + */ class CV_EXPORTS_W MergeExposures : public Algorithm { public: + /** @brief Merges images. + + @param src vector of input images + @param dst result image + @param times vector of exposure time values for each image + @param response 256x1 matrix with inverse camera response function for each pixel value, it should + have the same number of channels as images. + */ CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times, InputArray response) = 0; }; -// "Recovering High Dynamic Range Radiance Maps from Photographs", Debevec, Malik, 1997 +/** @brief The resulting HDR image is calculated as weighted average of the exposures considering exposure +values and camera response. +For more information see @cite DM97 . + */ class CV_EXPORTS_W MergeDebevec : public MergeExposures { public: @@ -267,15 +589,31 @@ public: CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0; }; +/** @brief Creates MergeDebevec object + */ CV_EXPORTS_W Ptr createMergeDebevec(); -// "Exposure Fusion", Mertens et al., 2007 +/** @brief Pixels are weighted using contrast, saturation and well-exposedness measures, than images are +combined using laplacian pyramids. + +The resulting image weight is constructed as weighted average of contrast, saturation and +well-exposedness measures. + +The resulting image doesn't require tonemapping and can be converted to 8-bit image by multiplying +by 255, but it's recommended to apply gamma correction and/or linear tonemapping. +For more information see @cite MK07 . + */ class CV_EXPORTS_W MergeMertens : public MergeExposures { public: CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times, InputArray response) = 0; + /** @brief Short version of process, that doesn't take extra arguments. + + @param src vector of input images + @param dst result image + */ CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst) = 0; CV_WRAP virtual float getContrastWeight() const = 0; @@ -288,11 +626,20 @@ public: CV_WRAP virtual void setExposureWeight(float exposure_weight) = 0; }; +/** @brief Creates MergeMertens object + +@param contrast_weight contrast measure weight. See MergeMertens. +@param saturation_weight saturation measure weight +@param exposure_weight well-exposedness measure weight + */ CV_EXPORTS_W Ptr createMergeMertens(float contrast_weight = 1.0f, float saturation_weight = 1.0f, float exposure_weight = 0.0f); -// "Dynamic range improvement through multiple exposures", Robertson et al., 1999 +/** @brief The resulting HDR image is calculated as weighted average of the exposures considering exposure +values and camera response. +For more information see @cite RB99 . + */ class CV_EXPORTS_W MergeRobertson : public MergeExposures { public: @@ -301,35 +648,158 @@ public: CV_WRAP virtual void process(InputArrayOfArrays src, OutputArray dst, InputArray times) = 0; }; +/** @brief Creates MergeRobertson object + */ CV_EXPORTS_W Ptr createMergeRobertson(); +//! @} photo_hdr + +/** @brief Transforms a color image to a grayscale image. It is a basic tool in digital printing, stylized +black-and-white photograph rendering, and in many single channel image processing applications +@cite CL12 . + +@param src Input 8-bit 3-channel image. +@param grayscale Output 8-bit 1-channel image. +@param color_boost Output 8-bit 3-channel image. + +This function is to be applied on color images. + */ CV_EXPORTS_W void decolor( InputArray src, OutputArray grayscale, OutputArray color_boost); +//! @addtogroup photo_clone +//! @{ + +/** @brief Image editing tasks concern either global changes (color/intensity corrections, filters, +deformations) or local changes concerned to a selection. Here we are interested in achieving local +changes, ones that are restricted to a region manually selected (ROI), in a seamless and effortless +manner. The extent of the changes ranges from slight distortions to complete replacement by novel +content @cite PM03 . + +@param src Input 8-bit 3-channel image. +@param dst Input 8-bit 3-channel image. +@param mask Input 8-bit 1 or 3-channel image. +@param p Point in dst image where object is placed. +@param blend Output image with the same size and type as dst. +@param flags Cloning method that could be one of the following: +- **NORMAL_CLONE** The power of the method is fully expressed when inserting objects with +complex outlines into a new background +- **MIXED_CLONE** The classic method, color-based selection and alpha masking might be time +consuming and often leaves an undesirable halo. Seamless cloning, even averaged with the +original image, is not effective. Mixed seamless cloning based on a loose selection proves +effective. +- **FEATURE_EXCHANGE** Feature exchange allows the user to easily replace certain features of +one object by alternative features. + */ CV_EXPORTS_W void seamlessClone( InputArray src, InputArray dst, InputArray mask, Point p, OutputArray blend, int flags); +/** @brief Given an original color image, two differently colored versions of this image can be mixed +seamlessly. + +@param src Input 8-bit 3-channel image. +@param mask Input 8-bit 1 or 3-channel image. +@param dst Output image with the same size and type as src . +@param red_mul R-channel multiply factor. +@param green_mul G-channel multiply factor. +@param blue_mul B-channel multiply factor. + +Multiplication factor is between .5 to 2.5. + */ CV_EXPORTS_W void colorChange(InputArray src, InputArray mask, OutputArray dst, float red_mul = 1.0f, float green_mul = 1.0f, float blue_mul = 1.0f); +/** @brief Applying an appropriate non-linear transformation to the gradient field inside the selection and +then integrating back with a Poisson solver, modifies locally the apparent illumination of an image. + +@param src Input 8-bit 3-channel image. +@param mask Input 8-bit 1 or 3-channel image. +@param dst Output image with the same size and type as src. +@param alpha Value ranges between 0-2. +@param beta Value ranges between 0-2. + +This is useful to highlight under-exposed foreground objects or to reduce specular reflections. + */ CV_EXPORTS_W void illuminationChange(InputArray src, InputArray mask, OutputArray dst, float alpha = 0.2f, float beta = 0.4f); +/** @brief By retaining only the gradients at edge locations, before integrating with the Poisson solver, one +washes out the texture of the selected region, giving its contents a flat aspect. Here Canny Edge +Detector is used. + +@param src Input 8-bit 3-channel image. +@param mask Input 8-bit 1 or 3-channel image. +@param dst Output image with the same size and type as src. +@param low_threshold Range from 0 to 100. +@param high_threshold Value \> 100. +@param kernel_size The size of the Sobel kernel to be used. + +**NOTE:** + +The algorithm assumes that the color of the source image is close to that of the destination. This +assumption means that when the colors don't match, the source image color gets tinted toward the +color of the destination image. + */ CV_EXPORTS_W void textureFlattening(InputArray src, InputArray mask, OutputArray dst, float low_threshold = 30, float high_threshold = 45, int kernel_size = 3); +//! @} photo_clone + +//! @addtogroup photo_render +//! @{ + +/** @brief Filtering is the fundamental operation in image and video processing. Edge-preserving smoothing +filters are used in many different applications @cite EM11 . + +@param src Input 8-bit 3-channel image. +@param dst Output 8-bit 3-channel image. +@param flags Edge preserving filters: +- **RECURS_FILTER** = 1 +- **NORMCONV_FILTER** = 2 +@param sigma_s Range between 0 to 200. +@param sigma_r Range between 0 to 1. + */ CV_EXPORTS_W void edgePreservingFilter(InputArray src, OutputArray dst, int flags = 1, float sigma_s = 60, float sigma_r = 0.4f); +/** @brief This filter enhances the details of a particular image. + +@param src Input 8-bit 3-channel image. +@param dst Output image with the same size and type as src. +@param sigma_s Range between 0 to 200. +@param sigma_r Range between 0 to 1. + */ CV_EXPORTS_W void detailEnhance(InputArray src, OutputArray dst, float sigma_s = 10, float sigma_r = 0.15f); +/** @brief Pencil-like non-photorealistic line drawing + +@param src Input 8-bit 3-channel image. +@param dst1 Output 8-bit 1-channel image. +@param dst2 Output image with the same size and type as src. +@param sigma_s Range between 0 to 200. +@param sigma_r Range between 0 to 1. +@param shade_factor Range between 0 to 0.1. + */ CV_EXPORTS_W void pencilSketch(InputArray src, OutputArray dst1, OutputArray dst2, float sigma_s = 60, float sigma_r = 0.07f, float shade_factor = 0.02f); +/** @brief Stylization aims to produce digital imagery with a wide variety of effects not focused on +photorealism. Edge-aware filters are ideal for stylization, as they can abstract regions of low +contrast while preserving, or enhancing, high-contrast features. + +@param src Input 8-bit 3-channel image. +@param dst Output image with the same size and type as src. +@param sigma_s Range between 0 to 200. +@param sigma_r Range between 0 to 1. + */ CV_EXPORTS_W void stylization(InputArray src, OutputArray dst, float sigma_s = 60, float sigma_r = 0.45f); +//! @} photo_render + +//! @} photo + } // cv #endif diff --git a/modules/photo/include/opencv2/photo/cuda.hpp b/modules/photo/include/opencv2/photo/cuda.hpp index c26a041761..4b69afa7be 100644 --- a/modules/photo/include/opencv2/photo/cuda.hpp +++ b/modules/photo/include/opencv2/photo/cuda.hpp @@ -47,18 +47,75 @@ namespace cv { namespace cuda { -//! Brute force non-local means algorith (slow but universal) +//! @addtogroup photo_denoise +//! @{ + +/** @brief Performs pure non local means denoising without any simplification, and thus it is not fast. + +@param src Source image. Supports only CV_8UC1, CV_8UC2 and CV_8UC3. +@param dst Destination image. +@param h Filter sigma regulating filter strength for color. +@param search_window Size of search window. +@param block_size Size of block used for computing weights. +@param borderMode Border type. See borderInterpolate for details. BORDER_REFLECT101 , +BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now. +@param s Stream for the asynchronous version. + +@sa + fastNlMeansDenoising + */ CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null()); -//! Fast (but approximate)version of non-local means algorith similar to CPU function (running sums technique) +/** @brief The class implements fast approximate Non Local Means Denoising algorithm. + */ class CV_EXPORTS FastNonLocalMeansDenoising { public: - //! Simple method, recommended for grayscale images (though it supports multichannel images) + /** @brief Perform image denoising using Non-local Means Denoising algorithm + with several computational + optimizations. Noise expected to be a gaussian white noise + + @param src Input 8-bit 1-channel, 2-channel or 3-channel image. + @param dst Output image with the same size and type as src . + @param h Parameter regulating filter strength. Big h value perfectly removes noise but also + removes image details, smaller h value preserves details but also preserves some noise + @param search_window Size in pixels of the window that is used to compute weighted average for + given pixel. Should be odd. Affect performance linearly: greater search_window - greater + denoising time. Recommended value 21 pixels + @param block_size Size in pixels of the template patch that is used to compute weights. Should be + odd. Recommended value 7 pixels + @param s Stream for the asynchronous invocations. + + This function expected to be applied to grayscale images. For colored images look at + FastNonLocalMeansDenoising::labMethod. + + @sa + fastNlMeansDenoising + */ void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()); - //! Processes luminance and color components separatelly - void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()); + /** @brief Modification of FastNonLocalMeansDenoising::simpleMethod for color images + + @param src Input 8-bit 3-channel image. + @param dst Output image with the same size and type as src . + @param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but + also removes image details, smaller h value preserves details but also preserves some noise + @param photo_render float The same as h but for color components. For most images value equals 10 will be + enought to remove colored noise and do not distort colors + @param search_window Size in pixels of the window that is used to compute weighted average for + given pixel. Should be odd. Affect performance linearly: greater search_window - greater + denoising time. Recommended value 21 pixels + @param block_size Size in pixels of the template patch that is used to compute weights. Should be + odd. Recommended value 7 pixels + @param s Stream for the asynchronous invocations. + + The function converts image to CIELAB colorspace and then separately denoise L and AB components + with given h parameters using FastNonLocalMeansDenoising::simpleMethod function. + + @sa + fastNlMeansDenoisingColored + */ + void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float photo_render, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()); private: @@ -66,6 +123,8 @@ private: GpuMat lab, l, ab; }; +//! @} photo + }} // namespace cv { namespace cuda { #endif /* __OPENCV_PHOTO_CUDA_HPP__ */ diff --git a/modules/photo/include/opencv2/photo/photo_c.h b/modules/photo/include/opencv2/photo/photo_c.h index 4ca05f2538..908e0a14ac 100644 --- a/modules/photo/include/opencv2/photo/photo_c.h +++ b/modules/photo/include/opencv2/photo/photo_c.h @@ -49,6 +49,10 @@ extern "C" { #endif +/** @addtogroup photo_c + @{ + */ + /* Inpainting algorithms */ enum { @@ -61,6 +65,7 @@ enum CVAPI(void) cvInpaint( const CvArr* src, const CvArr* inpaint_mask, CvArr* dst, double inpaintRange, int flags ); +/** @} */ #ifdef __cplusplus } //extern "C" diff --git a/modules/shape/include/opencv2/shape.hpp b/modules/shape/include/opencv2/shape.hpp index d07bf5e453..093d8575d3 100644 --- a/modules/shape/include/opencv2/shape.hpp +++ b/modules/shape/include/opencv2/shape.hpp @@ -48,6 +48,10 @@ #include "opencv2/shape/hist_cost.hpp" #include "opencv2/shape/shape_distance.hpp" +/** + @defgroup shape Shape Distance and Matching + */ + namespace cv { CV_EXPORTS bool initModule_shape(); diff --git a/modules/shape/include/opencv2/shape/emdL1.hpp b/modules/shape/include/opencv2/shape/emdL1.hpp index 74c734a519..1dfa7581a6 100644 --- a/modules/shape/include/opencv2/shape/emdL1.hpp +++ b/modules/shape/include/opencv2/shape/emdL1.hpp @@ -51,8 +51,22 @@ namespace cv * EMDL1 Function * \****************************************************************************************/ +//! @addtogroup shape +//! @{ + +/** @brief Computes the "minimal work" distance between two weighted point configurations base on the papers +"EMD-L1: An efficient and Robust Algorithm for comparing histogram-based descriptors", by Haibin +Ling and Kazunori Okuda; and "The Earth Mover's Distance is the Mallows Distance: Some Insights from +Statistics", by Elizaveta Levina and Peter Bickel. + +@param signature1 First signature, a single column floating-point matrix. Each row is the value of +the histogram in each bin. +@param signature2 Second signature of the same format and size as signature1. + */ CV_EXPORTS float EMDL1(InputArray signature1, InputArray signature2); +//! @} + }//namespace cv #endif diff --git a/modules/shape/include/opencv2/shape/hist_cost.hpp b/modules/shape/include/opencv2/shape/hist_cost.hpp index 0ff3573eea..15c0a87c7b 100644 --- a/modules/shape/include/opencv2/shape/hist_cost.hpp +++ b/modules/shape/include/opencv2/shape/hist_cost.hpp @@ -49,8 +49,10 @@ namespace cv { -/*! - * The base class for HistogramCostExtractor. +//! @addtogroup shape +//! @{ + +/** @brief Abstract base class for histogram cost algorithms. */ class CV_EXPORTS_W HistogramCostExtractor : public Algorithm { @@ -64,7 +66,8 @@ public: CV_WRAP virtual float getDefaultCost() const = 0; }; -/*! */ +/** @brief A norm based cost extraction. : + */ class CV_EXPORTS_W NormHistogramCostExtractor : public HistogramCostExtractor { public: @@ -75,7 +78,8 @@ public: CV_EXPORTS_W Ptr createNormHistogramCostExtractor(int flag=DIST_L2, int nDummies=25, float defaultCost=0.2f); -/*! */ +/** @brief An EMD based cost extraction. : + */ class CV_EXPORTS_W EMDHistogramCostExtractor : public HistogramCostExtractor { public: @@ -86,18 +90,22 @@ public: CV_EXPORTS_W Ptr createEMDHistogramCostExtractor(int flag=DIST_L2, int nDummies=25, float defaultCost=0.2f); -/*! */ +/** @brief An Chi based cost extraction. : + */ class CV_EXPORTS_W ChiHistogramCostExtractor : public HistogramCostExtractor {}; CV_EXPORTS_W Ptr createChiHistogramCostExtractor(int nDummies=25, float defaultCost=0.2f); -/*! */ +/** @brief An EMD-L1 based cost extraction. : + */ class CV_EXPORTS_W EMDL1HistogramCostExtractor : public HistogramCostExtractor {}; CV_EXPORTS_W Ptr createEMDL1HistogramCostExtractor(int nDummies=25, float defaultCost=0.2f); +//! @} + } // cv #endif diff --git a/modules/shape/include/opencv2/shape/shape_distance.hpp b/modules/shape/include/opencv2/shape/shape_distance.hpp index acdb6e5f6e..4b0c3b5f6b 100644 --- a/modules/shape/include/opencv2/shape/shape_distance.hpp +++ b/modules/shape/include/opencv2/shape/shape_distance.hpp @@ -50,65 +50,131 @@ namespace cv { -/*! - * The base class for ShapeDistanceExtractor. - * This is just to define the common interface for - * shape comparisson techniques. +//! @addtogroup shape +//! @{ + +/** @brief Abstract base class for shape distance algorithms. */ class CV_EXPORTS_W ShapeDistanceExtractor : public Algorithm { public: + /** @brief Compute the shape distance between two shapes defined by its contours. + + @param contour1 Contour defining first shape. + @param contour2 Contour defining second shape. + */ CV_WRAP virtual float computeDistance(InputArray contour1, InputArray contour2) = 0; }; /***********************************************************************************/ /***********************************************************************************/ /***********************************************************************************/ -/*! - * Shape Context implementation. - * The SCD class implements SCD algorithm proposed by Belongie et al.in - * "Shape Matching and Object Recognition Using Shape Contexts". - * Implemented by Juan M. Perez for the GSOC 2013. - */ +/** @brief Implementation of the Shape Context descriptor and matching algorithm + +proposed by Belongie et al. in "Shape Matching and Object Recognition Using Shape Contexts" (PAMI +2002). This implementation is packaged in a generic scheme, in order to allow you the +implementation of the common variations of the original pipeline. +*/ class CV_EXPORTS_W ShapeContextDistanceExtractor : public ShapeDistanceExtractor { public: + /** @brief Establish the number of angular bins for the Shape Context Descriptor used in the shape matching + pipeline. + + @param nAngularBins The number of angular bins in the shape context descriptor. + */ CV_WRAP virtual void setAngularBins(int nAngularBins) = 0; CV_WRAP virtual int getAngularBins() const = 0; + /** @brief Establish the number of radial bins for the Shape Context Descriptor used in the shape matching + pipeline. + + @param nRadialBins The number of radial bins in the shape context descriptor. + */ CV_WRAP virtual void setRadialBins(int nRadialBins) = 0; CV_WRAP virtual int getRadialBins() const = 0; + /** @brief Set the inner radius of the shape context descriptor. + + @param innerRadius The value of the inner radius. + */ CV_WRAP virtual void setInnerRadius(float innerRadius) = 0; CV_WRAP virtual float getInnerRadius() const = 0; + /** @brief Set the outer radius of the shape context descriptor. + + @param outerRadius The value of the outer radius. + */ CV_WRAP virtual void setOuterRadius(float outerRadius) = 0; CV_WRAP virtual float getOuterRadius() const = 0; CV_WRAP virtual void setRotationInvariant(bool rotationInvariant) = 0; CV_WRAP virtual bool getRotationInvariant() const = 0; + /** @brief Set the weight of the shape context distance in the final value of the shape distance. The shape + context distance between two shapes is defined as the symmetric sum of shape context matching costs + over best matching points. The final value of the shape distance is a user-defined linear + combination of the shape context distance, an image appearance distance, and a bending energy. + + @param shapeContextWeight The weight of the shape context distance in the final distance value. + */ CV_WRAP virtual void setShapeContextWeight(float shapeContextWeight) = 0; CV_WRAP virtual float getShapeContextWeight() const = 0; + /** @brief Set the weight of the Image Appearance cost in the final value of the shape distance. The image + appearance cost is defined as the sum of squared brightness differences in Gaussian windows around + corresponding image points. The final value of the shape distance is a user-defined linear + combination of the shape context distance, an image appearance distance, and a bending energy. If + this value is set to a number different from 0, is mandatory to set the images that correspond to + each shape. + + @param imageAppearanceWeight The weight of the appearance cost in the final distance value. + */ CV_WRAP virtual void setImageAppearanceWeight(float imageAppearanceWeight) = 0; CV_WRAP virtual float getImageAppearanceWeight() const = 0; + /** @brief Set the weight of the Bending Energy in the final value of the shape distance. The bending energy + definition depends on what transformation is being used to align the shapes. The final value of the + shape distance is a user-defined linear combination of the shape context distance, an image + appearance distance, and a bending energy. + + @param bendingEnergyWeight The weight of the Bending Energy in the final distance value. + */ CV_WRAP virtual void setBendingEnergyWeight(float bendingEnergyWeight) = 0; CV_WRAP virtual float getBendingEnergyWeight() const = 0; + /** @brief Set the images that correspond to each shape. This images are used in the calculation of the Image + Appearance cost. + + @param image1 Image corresponding to the shape defined by contours1. + @param image2 Image corresponding to the shape defined by contours2. + */ CV_WRAP virtual void setImages(InputArray image1, InputArray image2) = 0; CV_WRAP virtual void getImages(OutputArray image1, OutputArray image2) const = 0; CV_WRAP virtual void setIterations(int iterations) = 0; CV_WRAP virtual int getIterations() const = 0; + /** @brief Set the algorithm used for building the shape context descriptor cost matrix. + + @param comparer Smart pointer to a HistogramCostExtractor, an algorithm that defines the cost + matrix between descriptors. + */ CV_WRAP virtual void setCostExtractor(Ptr comparer) = 0; CV_WRAP virtual Ptr getCostExtractor() const = 0; + /** @brief Set the value of the standard deviation for the Gaussian window for the image appearance cost. + + @param sigma Standard Deviation. + */ CV_WRAP virtual void setStdDev(float sigma) = 0; CV_WRAP virtual float getStdDev() const = 0; + /** @brief Set the algorithm used for aligning the shapes. + + @param transformer Smart pointer to a ShapeTransformer, an algorithm that defines the aligning + transformation. + */ CV_WRAP virtual void setTransformAlgorithm(Ptr transformer) = 0; CV_WRAP virtual Ptr getTransformAlgorithm() const = 0; }; @@ -123,15 +189,28 @@ CV_EXPORTS_W Ptr /***********************************************************************************/ /***********************************************************************************/ /***********************************************************************************/ -/*! - * Hausdorff distace implementation based on +/** @brief A simple Hausdorff distance measure between shapes defined by contours + +according to the paper "Comparing Images using the Hausdorff distance." by D.P. Huttenlocher, G.A. +Klanderman, and W.J. Rucklidge. (PAMI 1993). : */ class CV_EXPORTS_W HausdorffDistanceExtractor : public ShapeDistanceExtractor { public: + /** @brief Set the norm used to compute the Hausdorff value between two shapes. It can be L1 or L2 norm. + + @param distanceFlag Flag indicating which norm is used to compute the Hausdorff distance + (NORM_L1, NORM_L2). + */ CV_WRAP virtual void setDistanceFlag(int distanceFlag) = 0; CV_WRAP virtual int getDistanceFlag() const = 0; + /** @brief This method sets the rank proportion (or fractional value) that establish the Kth ranked value of + the partial Hausdorff distance. Experimentally had been shown that 0.6 is a good value to compare + shapes. + + @param rankProportion fractional value (between 0 and 1). + */ CV_WRAP virtual void setRankProportion(float rankProportion) = 0; CV_WRAP virtual float getRankProportion() const = 0; }; @@ -139,5 +218,7 @@ public: /* Constructor */ CV_EXPORTS_W Ptr createHausdorffDistanceExtractor(int distanceFlag=cv::NORM_L2, float rankProp=0.6f); +//! @} + } // cv #endif diff --git a/modules/shape/include/opencv2/shape/shape_transformer.hpp b/modules/shape/include/opencv2/shape/shape_transformer.hpp index cdabf971c7..2180613670 100644 --- a/modules/shape/include/opencv2/shape/shape_transformer.hpp +++ b/modules/shape/include/opencv2/shape/shape_transformer.hpp @@ -50,20 +50,38 @@ namespace cv { -/*! - * The base class for ShapeTransformer. - * This is just to define the common interface for - * shape transformation techniques. +//! @addtogroup shape +//! @{ + +/** @brief Abstract base class for shape transformation algorithms. */ class CV_EXPORTS_W ShapeTransformer : public Algorithm { public: - /* Estimate, Apply Transformation and return Transforming cost*/ + /** @brief Estimate the transformation parameters of the current transformer algorithm, based on point matches. + + @param transformingShape Contour defining first shape. + @param targetShape Contour defining second shape (Target). + @param matches Standard vector of Matches between points. + */ CV_WRAP virtual void estimateTransformation(InputArray transformingShape, InputArray targetShape, std::vector& matches) = 0; + /** @brief Apply a transformation, given a pre-estimated transformation parameters. + + @param input Contour (set of points) to apply the transformation. + @param output Output contour. + */ CV_WRAP virtual float applyTransformation(InputArray input, OutputArray output=noArray()) = 0; + /** @brief Apply a transformation, given a pre-estimated transformation parameters, to an Image. + + @param transformingImage Input image. + @param output Output image. + @param flags Image interpolation method. + @param borderMode border style. + @param borderValue border value. + */ CV_WRAP virtual void warpImage(InputArray transformingImage, OutputArray output, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, const Scalar& borderValue=Scalar()) const = 0; @@ -71,30 +89,33 @@ public: /***********************************************************************************/ /***********************************************************************************/ -/*! - * Thin Plate Spline Transformation - * Implementation of the TPS transformation - * according to "Principal Warps: Thin-Plate Splines and the - * Decomposition of Deformations" by Juan Manuel Perez for the GSOC 2013 - */ +/** @brief Definition of the transformation + +ocupied in the paper "Principal Warps: Thin-Plate Splines and Decomposition of Deformations", by +F.L. Bookstein (PAMI 1989). : + */ class CV_EXPORTS_W ThinPlateSplineShapeTransformer : public ShapeTransformer { public: + /** @brief Set the regularization parameter for relaxing the exact interpolation requirements of the TPS + algorithm. + + @param beta value of the regularization parameter. + */ CV_WRAP virtual void setRegularizationParameter(double beta) = 0; CV_WRAP virtual double getRegularizationParameter() const = 0; }; -/* Complete constructor */ +/** Complete constructor */ CV_EXPORTS_W Ptr createThinPlateSplineShapeTransformer(double regularizationParameter=0); /***********************************************************************************/ /***********************************************************************************/ -/*! - * Affine Transformation as a derivated from ShapeTransformer - */ +/** @brief Wrapper class for the OpenCV Affine Transformation algorithm. : + */ class CV_EXPORTS_W AffineTransformer : public ShapeTransformer { public: @@ -102,8 +123,10 @@ public: CV_WRAP virtual bool getFullAffine() const = 0; }; -/* Complete constructor */ +/** Complete constructor */ CV_EXPORTS_W Ptr createAffineTransformer(bool fullAffine); +//! @} + } // cv #endif diff --git a/modules/stitching/include/opencv2/stitching.hpp b/modules/stitching/include/opencv2/stitching.hpp index 15e9479e29..96cde14010 100644 --- a/modules/stitching/include/opencv2/stitching.hpp +++ b/modules/stitching/include/opencv2/stitching.hpp @@ -53,8 +53,46 @@ #include "opencv2/stitching/detail/blenders.hpp" #include "opencv2/stitching/detail/camera.hpp" +/** +@defgroup stitching Images stitching + +This figure illustrates the stitching module pipeline implemented in the Stitcher class. Using that +class it's possible to configure/remove some steps, i.e. adjust the stitching pipeline according to +the particular needs. All building blocks from the pipeline are available in the detail namespace, +one can combine and use them separately. + +The implemented stitching pipeline is very similar to the one proposed in @cite BL07 . + +![image](StitchingPipeline.jpg) + +@{ + @defgroup stitching_match Features Finding and Images Matching + @defgroup stitching_rotation Rotation Estimation + @defgroup stitching_autocalib Autocalibration + @defgroup stitching_warp Images Warping + @defgroup stitching_seam Seam Estimation + @defgroup stitching_exposure Exposure Compensation + @defgroup stitching_blend Image Blenders +@} + */ + namespace cv { +//! @addtogroup stitching +//! @{ + +/** @brief High level image stitcher. + +It's possible to use this class without being aware of the entire stitching pipeline. However, to +be able to achieve higher stitching stability and quality of the final images at least being +familiar with the theory is recommended. + +@note + - A basic example on image stitching can be found at + opencv_source_code/samples/cpp/stitching.cpp + - A detailed example on image stitching can be found at + opencv_source_code/samples/cpp/stitching_detailed.cpp + */ class CV_EXPORTS_W Stitcher { public: @@ -68,7 +106,11 @@ public: }; // Stitcher() {} - // Creates stitcher with default parameters + /** @brief Creates a stitcher with the default parameters. + + @param try_use_gpu Flag indicating whether GPU should be used whenever it's possible. + @return Stitcher class instance. + */ static Stitcher createDefault(bool try_use_gpu = false); CV_WRAP double registrationResol() const { return registr_resol_; } @@ -128,13 +170,43 @@ public: const Ptr blender() const { return blender_; } void setBlender(Ptr b) { blender_ = b; } + /** @overload */ CV_WRAP Status estimateTransform(InputArrayOfArrays images); + /** @brief These functions try to match the given images and to estimate rotations of each camera. + + @note Use the functions only if you're aware of the stitching pipeline, otherwise use + Stitcher::stitch. + + @param images Input images. + @param rois Region of interest rectangles. + @return Status code. + */ Status estimateTransform(InputArrayOfArrays images, const std::vector > &rois); + /** @overload */ CV_WRAP Status composePanorama(OutputArray pano); + /** @brief These functions try to compose the given images (or images stored internally from the other function + calls) into the final pano under the assumption that the image transformations were estimated + before. + + @note Use the functions only if you're aware of the stitching pipeline, otherwise use + Stitcher::stitch. + + @param images Input images. + @param pano Final pano. + @return Status code. + */ Status composePanorama(InputArrayOfArrays images, OutputArray pano); + /** @overload */ CV_WRAP Status stitch(InputArrayOfArrays images, OutputArray pano); + /** @brief These functions try to stitch the given images. + + @param images Input images. + @param rois Region of interest rectangles. + @param pano Final pano. + @return Status code. + */ Status stitch(InputArrayOfArrays images, const std::vector > &rois, OutputArray pano); std::vector component() const { return indices_; } @@ -178,6 +250,8 @@ private: CV_EXPORTS_W Ptr createStitcher(bool try_use_gpu = false); +//! @} stitching + } // namespace cv #endif // __OPENCV_STITCHING_STITCHER_HPP__ diff --git a/modules/stitching/include/opencv2/stitching/detail/autocalib.hpp b/modules/stitching/include/opencv2/stitching/detail/autocalib.hpp index 519ed804d8..ccc0aa179e 100644 --- a/modules/stitching/include/opencv2/stitching/detail/autocalib.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/autocalib.hpp @@ -49,16 +49,37 @@ namespace cv { namespace detail { -// See "Construction of Panoramic Image Mosaics with Global and Local Alignment" -// by Heung-Yeung Shum and Richard Szeliski. +//! @addtogroup stitching_autocalib +//! @{ + +/** @brief Tries to estimate focal lengths from the given homography under the assumption that the camera +undergoes rotations around its centre only. + +@param H Homography. +@param f0 Estimated focal length along X axis. +@param f1 Estimated focal length along Y axis. +@param f0_ok True, if f0 was estimated successfully, false otherwise. +@param f1_ok True, if f1 was estimated successfully, false otherwise. + +See "Construction of Panoramic Image Mosaics with Global and Local Alignment" +by Heung-Yeung Shum and Richard Szeliski. + */ void CV_EXPORTS focalsFromHomography(const Mat &H, double &f0, double &f1, bool &f0_ok, bool &f1_ok); +/** @brief Estimates focal lengths for each given camera. + +@param features Features of images. +@param pairwise_matches Matches between all image pairs. +@param focals Estimated focal lengths for each camera. + */ void CV_EXPORTS estimateFocal(const std::vector &features, const std::vector &pairwise_matches, std::vector &focals); bool CV_EXPORTS calibrateRotatingCamera(const std::vector &Hs, Mat &K); +//! @} stitching_autocalib + } // namespace detail } // namespace cv diff --git a/modules/stitching/include/opencv2/stitching/detail/blenders.hpp b/modules/stitching/include/opencv2/stitching/detail/blenders.hpp index f91a0eae6e..0e607258a0 100644 --- a/modules/stitching/include/opencv2/stitching/detail/blenders.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/blenders.hpp @@ -48,8 +48,13 @@ namespace cv { namespace detail { +//! @addtogroup stitching_blend +//! @{ -// Simple blender which puts one image over another +/** @brief Base class for all blenders. + +Simple blender which puts one image over another +*/ class CV_EXPORTS Blender { public: @@ -58,9 +63,26 @@ public: enum { NO, FEATHER, MULTI_BAND }; static Ptr createDefault(int type, bool try_gpu = false); + /** @brief Prepares the blender for blending. + + @param corners Source images top-left corners + @param sizes Source image sizes + */ void prepare(const std::vector &corners, const std::vector &sizes); + /** @overload */ virtual void prepare(Rect dst_roi); + /** @brief Processes the image. + + @param img Source image + @param mask Source image mask + @param tl Source image top-left corners + */ virtual void feed(InputArray img, InputArray mask, Point tl); + /** @brief Blends and returns the final pano. + + @param dst Final pano + @param dst_mask Final pano mask + */ virtual void blend(InputOutputArray dst, InputOutputArray dst_mask); protected: @@ -68,7 +90,8 @@ protected: Rect dst_roi_; }; - +/** @brief Simple blender which mixes images at its borders. + */ class CV_EXPORTS FeatherBlender : public Blender { public: @@ -81,8 +104,8 @@ public: void feed(InputArray img, InputArray mask, Point tl); void blend(InputOutputArray dst, InputOutputArray dst_mask); - // Creates weight maps for fixed set of source images by their masks and top-left corners. - // Final image can be obtained by simple weighting of the source images. + //! Creates weight maps for fixed set of source images by their masks and top-left corners. + //! Final image can be obtained by simple weighting of the source images. Rect createWeightMaps(const std::vector &masks, const std::vector &corners, std::vector &weight_maps); @@ -94,7 +117,8 @@ private: inline FeatherBlender::FeatherBlender(float _sharpness) { setSharpness(_sharpness); } - +/** @brief Blender which uses multi-band blending algorithm (see @cite BA83). + */ class CV_EXPORTS MultiBandBlender : public Blender { public: @@ -131,6 +155,8 @@ void CV_EXPORTS createLaplacePyrGpu(InputArray img, int num_levels, std::vector< void CV_EXPORTS restoreImageFromLaplacePyr(std::vector& pyr); void CV_EXPORTS restoreImageFromLaplacePyrGpu(std::vector& pyr); +//! @} + } // namespace detail } // namespace cv diff --git a/modules/stitching/include/opencv2/stitching/detail/camera.hpp b/modules/stitching/include/opencv2/stitching/detail/camera.hpp index 00ae4eb918..c231ba5ed6 100644 --- a/modules/stitching/include/opencv2/stitching/detail/camera.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/camera.hpp @@ -48,6 +48,13 @@ namespace cv { namespace detail { +//! @addtogroup stitching +//! @{ + +/** @brief Describes camera parameters. + +@note Translation is assumed to be zero during the whole stitching pipeline. : + */ struct CV_EXPORTS CameraParams { CameraParams(); @@ -63,6 +70,8 @@ struct CV_EXPORTS CameraParams Mat t; // Translation }; +//! @} + } // namespace detail } // namespace cv diff --git a/modules/stitching/include/opencv2/stitching/detail/exposure_compensate.hpp b/modules/stitching/include/opencv2/stitching/detail/exposure_compensate.hpp index 9cd8b32f4c..ef64e12448 100644 --- a/modules/stitching/include/opencv2/stitching/detail/exposure_compensate.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/exposure_compensate.hpp @@ -48,6 +48,11 @@ namespace cv { namespace detail { +//! @addtogroup stitching_exposure +//! @{ + +/** @brief Base class for all exposure compensators. + */ class CV_EXPORTS ExposureCompensator { public: @@ -56,14 +61,29 @@ public: enum { NO, GAIN, GAIN_BLOCKS }; static Ptr createDefault(int type); + /** + @param corners Source image top-left corners + @param images Source images + @param masks Image masks to update (second value in pair specifies the value which should be used + to detect where image is) + */ void feed(const std::vector &corners, const std::vector &images, const std::vector &masks); + /** @overload */ virtual void feed(const std::vector &corners, const std::vector &images, const std::vector > &masks) = 0; + /** @brief Compensate exposure in the specified image. + + @param index Image index + @param corner Image top-left corner + @param image Image to process + @param mask Image mask + */ virtual void apply(int index, Point corner, InputOutputArray image, InputArray mask) = 0; }; - +/** @brief Stub exposure compensator which does nothing. + */ class CV_EXPORTS NoExposureCompensator : public ExposureCompensator { public: @@ -72,7 +92,9 @@ public: void apply(int /*index*/, Point /*corner*/, InputOutputArray /*image*/, InputArray /*mask*/) { } }; - +/** @brief Exposure compensator which tries to remove exposure related artifacts by adjusting image +intensities, see @cite BL07 and @cite WJ10 for details. + */ class CV_EXPORTS GainCompensator : public ExposureCompensator { public: @@ -85,7 +107,9 @@ private: Mat_ gains_; }; - +/** @brief Exposure compensator which tries to remove exposure related artifacts by adjusting image block +intensities, see @cite UES01 for details. + */ class CV_EXPORTS BlocksGainCompensator : public ExposureCompensator { public: @@ -100,6 +124,8 @@ private: std::vector gain_maps_; }; +//! @} + } // namespace detail } // namespace cv diff --git a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp index c0fb5d9d5a..8f34bd23a3 100644 --- a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp @@ -55,6 +55,10 @@ namespace cv { namespace detail { +//! @addtogroup stitching_match +//! @{ + +/** @brief Structure containing image keypoints and descriptors. */ struct CV_EXPORTS ImageFeatures { int img_idx; @@ -63,20 +67,40 @@ struct CV_EXPORTS ImageFeatures UMat descriptors; }; - +/** @brief Feature finders base class */ class CV_EXPORTS FeaturesFinder { public: virtual ~FeaturesFinder() {} + /** @overload */ void operator ()(InputArray image, ImageFeatures &features); + /** @brief Finds features in the given image. + + @param image Source image + @param features Found features + @param rois Regions of interest + + @sa detail::ImageFeatures, Rect_ + */ void operator ()(InputArray image, ImageFeatures &features, const std::vector &rois); + /** @brief Frees unused memory allocated before if there is any. */ virtual void collectGarbage() {} protected: + /** @brief This method must implement features finding logic in order to make the wrappers + detail::FeaturesFinder::operator()_ work. + + @param image Source image + @param features Found features + + @sa detail::ImageFeatures */ virtual void find(InputArray image, ImageFeatures &features) = 0; }; +/** @brief SURF features finder. +@sa detail::FeaturesFinder, SURF +*/ class CV_EXPORTS SurfFeaturesFinder : public FeaturesFinder { public: @@ -91,6 +115,10 @@ private: Ptr surf; }; +/** @brief ORB features finder. : + +@sa detail::FeaturesFinder, ORB +*/ class CV_EXPORTS OrbFeaturesFinder : public FeaturesFinder { public: @@ -126,50 +154,92 @@ private: }; #endif +/** @brief Structure containing information about matches between two images. +It's assumed that there is a homography between those images. +*/ struct CV_EXPORTS MatchesInfo { MatchesInfo(); MatchesInfo(const MatchesInfo &other); const MatchesInfo& operator =(const MatchesInfo &other); - int src_img_idx, dst_img_idx; // Images indices (optional) + int src_img_idx, dst_img_idx; //!< Images indices (optional) std::vector matches; - std::vector inliers_mask; // Geometrically consistent matches mask - int num_inliers; // Number of geometrically consistent matches - Mat H; // Estimated homography - double confidence; // Confidence two images are from the same panorama + std::vector inliers_mask; //!< Geometrically consistent matches mask + int num_inliers; //!< Number of geometrically consistent matches + Mat H; //!< Estimated homography + double confidence; //!< Confidence two images are from the same panorama }; - +/** @brief Feature matchers base class. */ class CV_EXPORTS FeaturesMatcher { public: virtual ~FeaturesMatcher() {} + /** @overload + @param features1 First image features + @param features2 Second image features + @param matches_info Found matches + */ void operator ()(const ImageFeatures &features1, const ImageFeatures &features2, MatchesInfo& matches_info) { match(features1, features2, matches_info); } + /** @brief Performs images matching. + + @param features Features of the source images + @param pairwise_matches Found pairwise matches + @param mask Mask indicating which image pairs must be matched + + The function is parallelized with the TBB library. + + @sa detail::MatchesInfo + */ void operator ()(const std::vector &features, std::vector &pairwise_matches, const cv::UMat &mask = cv::UMat()); + /** @return True, if it's possible to use the same matcher instance in parallel, false otherwise + */ bool isThreadSafe() const { return is_thread_safe_; } + /** @brief Frees unused memory allocated before if there is any. + */ virtual void collectGarbage() {} protected: FeaturesMatcher(bool is_thread_safe = false) : is_thread_safe_(is_thread_safe) {} + /** @brief This method must implement matching logic in order to make the wrappers + detail::FeaturesMatcher::operator()_ work. + + @param features1 first image features + @param features2 second image features + @param matches_info found matches + */ virtual void match(const ImageFeatures &features1, const ImageFeatures &features2, MatchesInfo& matches_info) = 0; bool is_thread_safe_; }; +/** @brief Features matcher which finds two best matches for each feature and leaves the best one only if the +ratio between descriptor distances is greater than the threshold match_conf +@sa detail::FeaturesMatcher + */ class CV_EXPORTS BestOf2NearestMatcher : public FeaturesMatcher { public: + /** @brief Constructs a "best of 2 nearest" matcher. + + @param try_use_gpu Should try to use GPU or not + @param match_conf Match distances ration threshold + @param num_matches_thresh1 Minimum number of matches required for the 2D projective transform + estimation used in the inliers classification step + @param num_matches_thresh2 Minimum number of matches required for the 2D projective transform + re-estimation on inliers + */ BestOf2NearestMatcher(bool try_use_gpu = false, float match_conf = 0.3f, int num_matches_thresh1 = 6, int num_matches_thresh2 = 6); @@ -197,6 +267,8 @@ protected: int range_width_; }; +//! @} stitching_match + } // namespace detail } // namespace cv diff --git a/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp b/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp index c0e446c60e..2c86e6335c 100644 --- a/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/motion_estimators.hpp @@ -51,23 +51,50 @@ namespace cv { namespace detail { +//! @addtogroup stitching_rotation +//! @{ + +/** @brief Rotation estimator base class. + +It takes features of all images, pairwise matches between all images and estimates rotations of all +cameras. + +@note The coordinate system origin is implementation-dependent, but you can always normalize the +rotations in respect to the first camera, for instance. : + */ class CV_EXPORTS Estimator { public: virtual ~Estimator() {} + /** @brief Estimates camera parameters. + + @param features Features of images + @param pairwise_matches Pairwise matches of images + @param cameras Estimated camera parameters + @return True in case of success, false otherwise + */ bool operator ()(const std::vector &features, const std::vector &pairwise_matches, std::vector &cameras) { return estimate(features, pairwise_matches, cameras); } protected: + /** @brief This method must implement camera parameters estimation logic in order to make the wrapper + detail::Estimator::operator()_ work. + + @param features Features of images + @param pairwise_matches Pairwise matches of images + @param cameras Estimated camera parameters + @return True in case of success, false otherwise + */ virtual bool estimate(const std::vector &features, const std::vector &pairwise_matches, std::vector &cameras) = 0; }; - +/** @brief Homography based rotation estimator. + */ class CV_EXPORTS HomographyBasedEstimator : public Estimator { public: @@ -82,7 +109,8 @@ private: bool is_focals_estimated_; }; - +/** @brief Base class for all camera parameters refinement methods. + */ class CV_EXPORTS BundleAdjusterBase : public Estimator { public: @@ -100,6 +128,11 @@ public: void setTermCriteria(const TermCriteria& term_criteria) { term_criteria_ = term_criteria; } protected: + /** @brief Construct a bundle adjuster base instance. + + @param num_params_per_cam Number of parameters per camera + @param num_errs_per_measurement Number of error terms (components) per match + */ BundleAdjusterBase(int num_params_per_cam, int num_errs_per_measurement) : num_params_per_cam_(num_params_per_cam), num_errs_per_measurement_(num_errs_per_measurement) @@ -114,9 +147,26 @@ protected: const std::vector &pairwise_matches, std::vector &cameras); + /** @brief Sets initial camera parameter to refine. + + @param cameras Camera parameters + */ virtual void setUpInitialCameraParams(const std::vector &cameras) = 0; + /** @brief Gets the refined camera parameters. + + @param cameras Refined camera parameters + */ virtual void obtainRefinedCameraParams(std::vector &cameras) const = 0; + /** @brief Calculates error vector. + + @param err Error column-vector of length total_num_matches \* num_errs_per_measurement + */ virtual void calcError(Mat &err) = 0; + /** @brief Calculates the cost function jacobian. + + @param jac Jacobian matrix of dimensions + (total_num_matches \* num_errs_per_measurement) x (num_images \* num_params_per_cam) + */ virtual void calcJacobian(Mat &jac) = 0; // 3x3 8U mask, where 0 means don't refine respective parameter, != 0 means refine @@ -145,9 +195,12 @@ protected: }; -// Minimizes reprojection error. -// It can estimate focal length, aspect ratio, principal point. -// You can affect only on them via the refinement mask. +/** @brief Implementation of the camera parameters refinement algorithm which minimizes sum of the reprojection +error squares + +It can estimate focal length, aspect ratio, principal point. +You can affect only on them via the refinement mask. + */ class CV_EXPORTS BundleAdjusterReproj : public BundleAdjusterBase { public: @@ -163,8 +216,11 @@ private: }; -// Minimizes sun of ray-to-ray distances. -// It can estimate focal length. It ignores the refinement mask for now. +/** @brief Implementation of the camera parameters refinement algorithm which minimizes sum of the distances +between the rays passing through the camera center and a feature. : + +It can estimate focal length. It ignores the refinement mask for now. + */ class CV_EXPORTS BundleAdjusterRay : public BundleAdjusterBase { public: @@ -186,6 +242,11 @@ enum WaveCorrectKind WAVE_CORRECT_VERT }; +/** @brief Tries to make panorama more horizontal (or vertical). + +@param rmats Camera rotation matrices. +@param kind Correction kind, see detail::WaveCorrectKind. + */ void CV_EXPORTS waveCorrect(std::vector &rmats, WaveCorrectKind kind); @@ -205,6 +266,8 @@ void CV_EXPORTS findMaxSpanningTree( int num_images, const std::vector &pairwise_matches, Graph &span_tree, std::vector ¢ers); +//! @} stitching_rotation + } // namespace detail } // namespace cv diff --git a/modules/stitching/include/opencv2/stitching/detail/seam_finders.hpp b/modules/stitching/include/opencv2/stitching/detail/seam_finders.hpp index 5f085c1a44..e4f7816bb4 100644 --- a/modules/stitching/include/opencv2/stitching/detail/seam_finders.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/seam_finders.hpp @@ -50,22 +50,35 @@ namespace cv { namespace detail { +//! @addtogroup stitching_seam +//! @{ + +/** @brief Base class for a seam estimator. + */ class CV_EXPORTS SeamFinder { public: virtual ~SeamFinder() {} + /** @brief Estimates seams. + + @param src Source images + @param corners Source image top-left corners + @param masks Source image masks to update + */ virtual void find(const std::vector &src, const std::vector &corners, std::vector &masks) = 0; }; - +/** @brief Stub seam estimator which does nothing. + */ class CV_EXPORTS NoSeamFinder : public SeamFinder { public: void find(const std::vector&, const std::vector&, std::vector&) {} }; - +/** @brief Base class for all pairwise seam estimators. + */ class CV_EXPORTS PairwiseSeamFinder : public SeamFinder { public: @@ -74,6 +87,12 @@ public: protected: void run(); + /** @brief Resolves masks intersection of two specified images in the given ROI. + + @param first First image index + @param second Second image index + @param roi Region of interest + */ virtual void findInPair(size_t first, size_t second, Rect roi) = 0; std::vector images_; @@ -82,7 +101,8 @@ protected: std::vector masks_; }; - +/** @brief Voronoi diagram-based seam estimator. + */ class CV_EXPORTS VoronoiSeamFinder : public PairwiseSeamFinder { public: @@ -201,14 +221,16 @@ private: std::set > edges_; }; - +/** @brief Base class for all minimum graph-cut-based seam estimators. + */ class CV_EXPORTS GraphCutSeamFinderBase { public: enum CostType { COST_COLOR, COST_COLOR_GRAD }; }; - +/** @brief Minimum graph cut-based seam estimator. See details in @cite V03 . + */ class CV_EXPORTS GraphCutSeamFinder : public GraphCutSeamFinderBase, public SeamFinder { public: @@ -253,6 +275,8 @@ private: }; #endif +//! @} + } // namespace detail } // namespace cv diff --git a/modules/stitching/include/opencv2/stitching/detail/timelapsers.hpp b/modules/stitching/include/opencv2/stitching/detail/timelapsers.hpp index f881a9b934..d64c03c27d 100644 --- a/modules/stitching/include/opencv2/stitching/detail/timelapsers.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/timelapsers.hpp @@ -49,6 +49,9 @@ namespace cv { namespace detail { +//! @addtogroup stitching +//! @{ + // Base Timelapser class, takes a sequence of images, applies appropriate shift, stores result in dst_. class CV_EXPORTS Timelapser @@ -80,6 +83,8 @@ public: virtual void initialize(const std::vector &corners, const std::vector &sizes); }; +//! @} + } // namespace detail } // namespace cv diff --git a/modules/stitching/include/opencv2/stitching/detail/util.hpp b/modules/stitching/include/opencv2/stitching/detail/util.hpp index 051d941f9e..3845ba59ea 100644 --- a/modules/stitching/include/opencv2/stitching/detail/util.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/util.hpp @@ -99,6 +99,9 @@ namespace cv { namespace detail { +//! @addtogroup stitching +//! @{ + class CV_EXPORTS DisjointSets { public: @@ -158,6 +161,8 @@ CV_EXPORTS void selectRandomSubset(int count, int size, std::vector &subset CV_EXPORTS int& stitchingLogLevel(); +//! @} + } // namespace detail } // namespace cv diff --git a/modules/stitching/include/opencv2/stitching/detail/util_inl.hpp b/modules/stitching/include/opencv2/stitching/detail/util_inl.hpp index 3ff6c9d8b5..6ac6f8ecc6 100644 --- a/modules/stitching/include/opencv2/stitching/detail/util_inl.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/util_inl.hpp @@ -47,6 +47,8 @@ #include "opencv2/core.hpp" #include "util.hpp" // Make your IDE see declarations +//! @cond IGNORED + namespace cv { namespace detail { @@ -124,4 +126,6 @@ static inline double sqr(double x) { return x * x; } } // namespace detail } // namespace cv +//! @endcond + #endif // __OPENCV_STITCHING_UTIL_INL_HPP__ diff --git a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp index ac9e256ad7..79f387cc2f 100644 --- a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp @@ -51,28 +51,76 @@ namespace cv { namespace detail { +//! @addtogroup stitching_warp +//! @{ + +/** @brief Rotation-only model image warper interface. + */ class CV_EXPORTS RotationWarper { public: virtual ~RotationWarper() {} + /** @brief Projects the image point. + + @param pt Source point + @param K Camera intrinsic parameters + @param R Camera rotation matrix + @return Projected point + */ virtual Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R) = 0; + /** @brief Builds the projection maps according to the given camera data. + + @param src_size Source image size + @param K Camera intrinsic parameters + @param R Camera rotation matrix + @param xmap Projection map for the x axis + @param ymap Projection map for the y axis + @return Projected image minimum bounding box + */ virtual Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap) = 0; + /** @brief Projects the image. + + @param src Source image + @param K Camera intrinsic parameters + @param R Camera rotation matrix + @param interp_mode Interpolation mode + @param border_mode Border extrapolation mode + @param dst Projected image + @return Project image top-left corner + */ virtual Point warp(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode, OutputArray dst) = 0; + /** @brief Projects the image backward. + + @param src Projected image + @param K Camera intrinsic parameters + @param R Camera rotation matrix + @param interp_mode Interpolation mode + @param border_mode Border extrapolation mode + @param dst_size Backward-projected image size + @param dst Backward-projected image + */ virtual void warpBackward(InputArray src, InputArray K, InputArray R, int interp_mode, int border_mode, Size dst_size, OutputArray dst) = 0; + /** + @param src_size Source image bounding box + @param K Camera intrinsic parameters + @param R Camera rotation matrix + @return Projected image minimum bounding box + */ virtual Rect warpRoi(Size src_size, InputArray K, InputArray R) = 0; virtual float getScale() const { return 1.f; } virtual void setScale(float) {} }; - +/** @brief Base class for warping logic implementation. + */ struct CV_EXPORTS ProjectorBase { void setCameraParams(InputArray K = Mat::eye(3, 3, CV_32F), @@ -87,7 +135,8 @@ struct CV_EXPORTS ProjectorBase float t[3]; }; - +/** @brief Base class for rotation-based warper using a detail::ProjectorBase_ derived class. + */ template class CV_EXPORTS RotationWarperBase : public RotationWarper { @@ -126,10 +175,15 @@ struct CV_EXPORTS PlaneProjector : ProjectorBase void mapBackward(float u, float v, float &x, float &y); }; - +/** @brief Warper that maps an image onto the z = 1 plane. + */ class CV_EXPORTS PlaneWarper : public RotationWarperBase { public: + /** @brief Construct an instance of the plane warper class. + + @param scale Projected image scale multiplier + */ PlaneWarper(float scale = 1.f) { projector_.scale = scale; } Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R, InputArray T); @@ -154,11 +208,18 @@ struct CV_EXPORTS SphericalProjector : ProjectorBase }; -// Projects image onto unit sphere with origin at (0, 0, 0). -// Poles are located at (0, -1, 0) and (0, 1, 0) points. +/** @brief Warper that maps an image onto the unit sphere located at the origin. + + Projects image onto unit sphere with origin at (0, 0, 0). + Poles are located at (0, -1, 0) and (0, 1, 0) points. +*/ class CV_EXPORTS SphericalWarper : public RotationWarperBase { public: + /** @brief Construct an instance of the spherical warper class. + + @param scale Projected image scale multiplier + */ SphericalWarper(float scale) { projector_.scale = scale; } Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap); @@ -175,10 +236,15 @@ struct CV_EXPORTS CylindricalProjector : ProjectorBase }; -// Projects image onto x * x + z * z = 1 cylinder +/** @brief Warper that maps an image onto the x\*x + z\*z = 1 cylinder. + */ class CV_EXPORTS CylindricalWarper : public RotationWarperBase { public: + /** @brief Construct an instance of the cylindrical warper class. + + @param scale Projected image scale multiplier + */ CylindricalWarper(float scale) { projector_.scale = scale; } Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap); @@ -508,6 +574,8 @@ protected: } }; +//! @} stitching_warp + } // namespace detail } // namespace cv diff --git a/modules/stitching/include/opencv2/stitching/detail/warpers_inl.hpp b/modules/stitching/include/opencv2/stitching/detail/warpers_inl.hpp index 7dcbb6cb57..0416ecb5ee 100644 --- a/modules/stitching/include/opencv2/stitching/detail/warpers_inl.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/warpers_inl.hpp @@ -47,6 +47,8 @@ #include "warpers.hpp" // Make your IDE see declarations #include +//! @cond IGNORED + namespace cv { namespace detail { @@ -767,4 +769,6 @@ void PlanePortraitProjector::mapBackward(float u0, float v0, float &x, float &y) } // namespace detail } // namespace cv +//! @endcond + #endif // __OPENCV_STITCHING_WARPERS_INL_HPP__ diff --git a/modules/stitching/include/opencv2/stitching/warpers.hpp b/modules/stitching/include/opencv2/stitching/warpers.hpp index da5fe26183..7e570d30c4 100644 --- a/modules/stitching/include/opencv2/stitching/warpers.hpp +++ b/modules/stitching/include/opencv2/stitching/warpers.hpp @@ -47,6 +47,11 @@ namespace cv { +//! @addtogroup stitching_warp +//! @{ + +/** @brief Image warper factories base class. + */ class WarperCreator { public: @@ -54,21 +59,25 @@ public: virtual Ptr create(float scale) const = 0; }; - +/** @brief Plane warper factory class. + @sa detail::PlaneWarper + */ class PlaneWarper : public WarperCreator { public: Ptr create(float scale) const { return makePtr(scale); } }; - +/** @brief Cylindrical warper factory class. +@sa detail::CylindricalWarper +*/ class CylindricalWarper: public WarperCreator { public: Ptr create(float scale) const { return makePtr(scale); } }; - +/** @brief Spherical warper factory class */ class SphericalWarper: public WarperCreator { public: @@ -167,6 +176,8 @@ public: }; #endif +//! @} stitching_warp + } // namespace cv #endif // __OPENCV_STITCHING_WARPER_CREATORS_HPP__ diff --git a/modules/superres/include/opencv2/superres.hpp b/modules/superres/include/opencv2/superres.hpp index 3d96e0f71c..e5bca4b31f 100644 --- a/modules/superres/include/opencv2/superres.hpp +++ b/modules/superres/include/opencv2/superres.hpp @@ -45,10 +45,23 @@ #include "opencv2/core.hpp" +/** + @defgroup superres Super Resolution + +The Super Resolution module contains a set of functions and classes that can be used to solve the +problem of resolution enhancement. There are a few methods implemented, most of them are descibed in +the papers @cite Farsiu03 and @cite Mitzel09 . + + */ + namespace cv { namespace superres { + +//! @addtogroup superres +//! @{ + CV_EXPORTS bool initModule_superres(); class CV_EXPORTS FrameSource @@ -67,14 +80,29 @@ namespace cv CV_EXPORTS Ptr createFrameSource_Camera(int deviceId = 0); + /** @brief Base class for Super Resolution algorithms. + + The class is only used to define the common interface for the whole family of Super Resolution + algorithms. + */ class CV_EXPORTS SuperResolution : public cv::Algorithm, public FrameSource { public: + /** @brief Set input frame source for Super Resolution algorithm. + + @param frameSource Input frame source + */ void setInput(const Ptr& frameSource); + /** @brief Process next frame from input and return output result. + + @param frame Output result + */ void nextFrame(OutputArray frame); void reset(); + /** @brief Clear all inner buffers. + */ virtual void collectGarbage(); protected: @@ -90,11 +118,31 @@ namespace cv bool firstCall_; }; - // S. Farsiu , D. Robinson, M. Elad, P. Milanfar. Fast and robust multiframe super resolution. - // Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow. + /** @brief Create Bilateral TV-L1 Super Resolution. + + This class implements Super Resolution algorithm described in the papers @cite Farsiu03 and + @cite Mitzel09 . + + Here are important members of the class that control the algorithm, which you can set after + constructing the class instance: + + - **int scale** Scale factor. + - **int iterations** Iteration count. + - **double tau** Asymptotic value of steepest descent method. + - **double lambda** Weight parameter to balance data term and smoothness term. + - **double alpha** Parameter of spacial distribution in Bilateral-TV. + - **int btvKernelSize** Kernel size of Bilateral-TV filter. + - **int blurKernelSize** Gaussian blur kernel size. + - **double blurSigma** Gaussian blur sigma. + - **int temporalAreaRadius** Radius of the temporal search area. + - **Ptr\ opticalFlow** Dense optical flow algorithm. + */ CV_EXPORTS Ptr createSuperResolution_BTVL1(); CV_EXPORTS Ptr createSuperResolution_BTVL1_CUDA(); CV_EXPORTS Ptr createSuperResolution_BTVL1_OCL(); + +//! @} superres + } } diff --git a/modules/superres/include/opencv2/superres/optical_flow.hpp b/modules/superres/include/opencv2/superres/optical_flow.hpp index 06225e5386..d4362c4fe7 100644 --- a/modules/superres/include/opencv2/superres/optical_flow.hpp +++ b/modules/superres/include/opencv2/superres/optical_flow.hpp @@ -49,6 +49,10 @@ namespace cv { namespace superres { + +//! @addtogroup superres +//! @{ + class CV_EXPORTS DenseOpticalFlowExt : public cv::Algorithm { public: @@ -70,6 +74,9 @@ namespace cv CV_EXPORTS Ptr createOptFlow_PyrLK_CUDA(); CV_EXPORTS Ptr createOptFlow_PyrLK_OCL(); + +//! @} + } } diff --git a/modules/video/include/opencv2/video.hpp b/modules/video/include/opencv2/video.hpp index 70c17e67d9..6d20a26cce 100644 --- a/modules/video/include/opencv2/video.hpp +++ b/modules/video/include/opencv2/video.hpp @@ -44,6 +44,15 @@ #ifndef __OPENCV_VIDEO_HPP__ #define __OPENCV_VIDEO_HPP__ +/** + @defgroup video Video Analysis + @{ + @defgroup video_motion Motion Analysis + @defgroup video_track Object Tracking + @defgroup video_c C API + @} +*/ + #include "opencv2/video/tracking.hpp" #include "opencv2/video/background_segm.hpp" diff --git a/modules/video/include/opencv2/video/background_segm.hpp b/modules/video/include/opencv2/video/background_segm.hpp index 789f9db96d..dbeccbdc8f 100644 --- a/modules/video/include/opencv2/video/background_segm.hpp +++ b/modules/video/include/opencv2/video/background_segm.hpp @@ -49,49 +49,102 @@ namespace cv { -/*! - The Base Class for Background/Foreground Segmentation +//! @addtogroup video_motion +//! @{ - The class is only used to define the common interface for - the whole family of background/foreground segmentation algorithms. -*/ +/** @brief Base class for background/foreground segmentation. : + +The class is only used to define the common interface for the whole family of background/foreground +segmentation algorithms. + */ class CV_EXPORTS_W BackgroundSubtractor : public Algorithm { public: - //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image. + /** @brief Computes a foreground mask. + + @param image Next video frame. + @param fgmask The output foreground mask as an 8-bit binary image. + @param learningRate The value between 0 and 1 that indicates how fast the background model is + learnt. Negative parameter value makes the algorithm to use some automatically chosen learning + rate. 0 means that the background model is not updated at all, 1 means that the background model + is completely reinitialized from the last frame. + */ CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) = 0; - //! computes a background image + /** @brief Computes a background image. + + @param backgroundImage The output background image. + + @note Sometimes the background image can be very blurry, as it contain the average background + statistics. + */ CV_WRAP virtual void getBackgroundImage(OutputArray backgroundImage) const = 0; }; -/*! - The class implements the following algorithm: - "Improved adaptive Gausian mixture model for background subtraction" - Z.Zivkovic - International Conference Pattern Recognition, UK, August, 2004. - http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf +/** @brief Gaussian Mixture-based Background/Foreground Segmentation Algorithm. + +The class implements the Gaussian mixture model background subtraction described in @cite Zivkovic2004 +and @cite Zivkovic2006 . */ class CV_EXPORTS_W BackgroundSubtractorMOG2 : public BackgroundSubtractor { public: + /** @brief Returns the number of last frames that affect the background model + */ CV_WRAP virtual int getHistory() const = 0; + /** @brief Sets the number of last frames that affect the background model + */ CV_WRAP virtual void setHistory(int history) = 0; + /** @brief Returns the number of gaussian components in the background model + */ CV_WRAP virtual int getNMixtures() const = 0; + /** @brief Sets the number of gaussian components in the background model. + + The model needs to be reinitalized to reserve memory. + */ CV_WRAP virtual void setNMixtures(int nmixtures) = 0;//needs reinitialization! + /** @brief Returns the "background ratio" parameter of the algorithm + + If a foreground pixel keeps semi-constant value for about backgroundRatio\*history frames, it's + considered background and added to the model as a center of a new component. It corresponds to TB + parameter in the paper. + */ CV_WRAP virtual double getBackgroundRatio() const = 0; + /** @brief Sets the "background ratio" parameter of the algorithm + */ CV_WRAP virtual void setBackgroundRatio(double ratio) = 0; + /** @brief Returns the variance threshold for the pixel-model match + + The main threshold on the squared Mahalanobis distance to decide if the sample is well described by + the background model or not. Related to Cthr from the paper. + */ CV_WRAP virtual double getVarThreshold() const = 0; + /** @brief Sets the variance threshold for the pixel-model match + */ CV_WRAP virtual void setVarThreshold(double varThreshold) = 0; + /** @brief Returns the variance threshold for the pixel-model match used for new mixture component generation + + Threshold for the squared Mahalanobis distance that helps decide when a sample is close to the + existing components (corresponds to Tg in the paper). If a pixel is not close to any component, it + is considered foreground or added as a new component. 3 sigma =\> Tg=3\*3=9 is default. A smaller Tg + value generates more components. A higher Tg value may result in a small number of components but + they can grow too large. + */ CV_WRAP virtual double getVarThresholdGen() const = 0; + /** @brief Sets the variance threshold for the pixel-model match used for new mixture component generation + */ CV_WRAP virtual void setVarThresholdGen(double varThresholdGen) = 0; + /** @brief Returns the initial variance of each gaussian component + */ CV_WRAP virtual double getVarInit() const = 0; + /** @brief Sets the initial variance of each gaussian component + */ CV_WRAP virtual void setVarInit(double varInit) = 0; CV_WRAP virtual double getVarMin() const = 0; @@ -100,62 +153,154 @@ public: CV_WRAP virtual double getVarMax() const = 0; CV_WRAP virtual void setVarMax(double varMax) = 0; + /** @brief Returns the complexity reduction threshold + + This parameter defines the number of samples needed to accept to prove the component exists. CT=0.05 + is a default value for all the samples. By setting CT=0 you get an algorithm very similar to the + standard Stauffer&Grimson algorithm. + */ CV_WRAP virtual double getComplexityReductionThreshold() const = 0; + /** @brief Sets the complexity reduction threshold + */ CV_WRAP virtual void setComplexityReductionThreshold(double ct) = 0; + /** @brief Returns the shadow detection flag + + If true, the algorithm detects shadows and marks them. See createBackgroundSubtractorMOG2 for + details. + */ CV_WRAP virtual bool getDetectShadows() const = 0; + /** @brief Enables or disables shadow detection + */ CV_WRAP virtual void setDetectShadows(bool detectShadows) = 0; + /** @brief Returns the shadow value + + Shadow value is the value used to mark shadows in the foreground mask. Default value is 127. Value 0 + in the mask always means background, 255 means foreground. + */ CV_WRAP virtual int getShadowValue() const = 0; + /** @brief Sets the shadow value + */ CV_WRAP virtual void setShadowValue(int value) = 0; + /** @brief Returns the shadow threshold + + A shadow is detected if pixel is a darker version of the background. The shadow threshold (Tau in + the paper) is a threshold defining how much darker the shadow can be. Tau= 0.5 means that if a pixel + is more than twice darker then it is not shadow. See Prati, Mikic, Trivedi and Cucchiarra, + *Detecting Moving Shadows...*, IEEE PAMI,2003. + */ CV_WRAP virtual double getShadowThreshold() const = 0; + /** @brief Sets the shadow threshold + */ CV_WRAP virtual void setShadowThreshold(double threshold) = 0; }; +/** @brief Creates MOG2 Background Subtractor + +@param history Length of the history. +@param varThreshold Threshold on the squared Mahalanobis distance between the pixel and the model +to decide whether a pixel is well described by the background model. This parameter does not +affect the background update. +@param detectShadows If true, the algorithm will detect shadows and mark them. It decreases the +speed a bit, so if you do not need this feature, set the parameter to false. + */ CV_EXPORTS_W Ptr createBackgroundSubtractorMOG2(int history=500, double varThreshold=16, bool detectShadows=true); -/*! - The class implements the K nearest neigbours algorithm from: - "Efficient Adaptive Density Estimation per Image Pixel for the Task of Background Subtraction" - Z.Zivkovic, F. van der Heijden - Pattern Recognition Letters, vol. 27, no. 7, pages 773-780, 2006 - http://www.zoranz.net/Publications/zivkovicPRL2006.pdf - - Fast for small foreground object. Results on the benchmark data is at http://www.changedetection.net. -*/ +/** @brief K-nearest neigbours - based Background/Foreground Segmentation Algorithm. +The class implements the K-nearest neigbours background subtraction described in @cite Zivkovic2006 . +Very efficient if number of foreground pixels is low. + */ class CV_EXPORTS_W BackgroundSubtractorKNN : public BackgroundSubtractor { public: + /** @brief Returns the number of last frames that affect the background model + */ CV_WRAP virtual int getHistory() const = 0; + /** @brief Sets the number of last frames that affect the background model + */ CV_WRAP virtual void setHistory(int history) = 0; + /** @brief Returns the number of data samples in the background model + */ CV_WRAP virtual int getNSamples() const = 0; + /** @brief Sets the number of data samples in the background model. + + The model needs to be reinitalized to reserve memory. + */ CV_WRAP virtual void setNSamples(int _nN) = 0;//needs reinitialization! + /** @brief Returns the threshold on the squared distance between the pixel and the sample + + The threshold on the squared distance between the pixel and the sample to decide whether a pixel is + close to a data sample. + */ CV_WRAP virtual double getDist2Threshold() const = 0; + /** @brief Sets the threshold on the squared distance + */ CV_WRAP virtual void setDist2Threshold(double _dist2Threshold) = 0; + /** @brief Returns the number of neighbours, the k in the kNN. + + K is the number of samples that need to be within dist2Threshold in order to decide that that + pixel is matching the kNN background model. + */ CV_WRAP virtual int getkNNSamples() const = 0; + /** @brief Sets the k in the kNN. How many nearest neigbours need to match. + */ CV_WRAP virtual void setkNNSamples(int _nkNN) = 0; + /** @brief Returns the shadow detection flag + + If true, the algorithm detects shadows and marks them. See createBackgroundSubtractorKNN for + details. + */ CV_WRAP virtual bool getDetectShadows() const = 0; + /** @brief Enables or disables shadow detection + */ CV_WRAP virtual void setDetectShadows(bool detectShadows) = 0; + /** @brief Returns the shadow value + + Shadow value is the value used to mark shadows in the foreground mask. Default value is 127. Value 0 + in the mask always means background, 255 means foreground. + */ CV_WRAP virtual int getShadowValue() const = 0; + /** @brief Sets the shadow value + */ CV_WRAP virtual void setShadowValue(int value) = 0; + /** @brief Returns the shadow threshold + + A shadow is detected if pixel is a darker version of the background. The shadow threshold (Tau in + the paper) is a threshold defining how much darker the shadow can be. Tau= 0.5 means that if a pixel + is more than twice darker then it is not shadow. See Prati, Mikic, Trivedi and Cucchiarra, + *Detecting Moving Shadows...*, IEEE PAMI,2003. + */ CV_WRAP virtual double getShadowThreshold() const = 0; + /** @brief Sets the shadow threshold + */ CV_WRAP virtual void setShadowThreshold(double threshold) = 0; }; +/** @brief Creates KNN Background Subtractor + +@param history Length of the history. +@param dist2Threshold Threshold on the squared distance between the pixel and the sample to decide +whether a pixel is close to that sample. This parameter does not affect the background update. +@param detectShadows If true, the algorithm will detect shadows and mark them. It decreases the +speed a bit, so if you do not need this feature, set the parameter to false. + */ CV_EXPORTS_W Ptr createBackgroundSubtractorKNN(int history=500, double dist2Threshold=400.0, bool detectShadows=true); +//! @} video_motion + } // cv #endif diff --git a/modules/video/include/opencv2/video/tracking.hpp b/modules/video/include/opencv2/video/tracking.hpp index a9fdaa272a..d54547ef71 100644 --- a/modules/video/include/opencv2/video/tracking.hpp +++ b/modules/video/include/opencv2/video/tracking.hpp @@ -50,26 +50,126 @@ namespace cv { +//! @addtogroup video_track +//! @{ + enum { OPTFLOW_USE_INITIAL_FLOW = 4, OPTFLOW_LK_GET_MIN_EIGENVALS = 8, OPTFLOW_FARNEBACK_GAUSSIAN = 256 }; -//! updates the object tracking window using CAMSHIFT algorithm +/** @brief Finds an object center, size, and orientation. + +@param probImage Back projection of the object histogram. See calcBackProject. +@param window Initial search window. +@param criteria Stop criteria for the underlying meanShift. +returns +(in old interfaces) Number of iterations CAMSHIFT took to converge +The function implements the CAMSHIFT object tracking algorithm @cite Bradski98 . First, it finds an +object center using meanShift and then adjusts the window size and finds the optimal rotation. The +function returns the rotated rectangle structure that includes the object position, size, and +orientation. The next position of the search window can be obtained with RotatedRect::boundingRect() + +See the OpenCV sample camshiftdemo.c that tracks colored objects. + +@note +- (Python) A sample explaining the camshift tracking algorithm can be found at + opencv_source_code/samples/python2/camshift.py + */ CV_EXPORTS_W RotatedRect CamShift( InputArray probImage, CV_IN_OUT Rect& window, TermCriteria criteria ); -//! updates the object tracking window using meanshift algorithm +/** @brief Finds an object on a back projection image. + +@param probImage Back projection of the object histogram. See calcBackProject for details. +@param window Initial search window. +@param criteria Stop criteria for the iterative search algorithm. +returns +: Number of iterations CAMSHIFT took to converge. +The function implements the iterative object search algorithm. It takes the input back projection of +an object and the initial position. The mass center in window of the back projection image is +computed and the search window center shifts to the mass center. The procedure is repeated until the +specified number of iterations criteria.maxCount is done or until the window center shifts by less +than criteria.epsilon. The algorithm is used inside CamShift and, unlike CamShift , the search +window size or orientation do not change during the search. You can simply pass the output of +calcBackProject to this function. But better results can be obtained if you pre-filter the back +projection and remove the noise. For example, you can do this by retrieving connected components +with findContours , throwing away contours with small area ( contourArea ), and rendering the +remaining contours with drawContours. + +@note +- A mean-shift tracking sample can be found at opencv_source_code/samples/cpp/camshiftdemo.cpp + */ CV_EXPORTS_W int meanShift( InputArray probImage, CV_IN_OUT Rect& window, TermCriteria criteria ); -//! constructs a pyramid which can be used as input for calcOpticalFlowPyrLK +/** @brief Constructs the image pyramid which can be passed to calcOpticalFlowPyrLK. + +@param img 8-bit input image. +@param pyramid output pyramid. +@param winSize window size of optical flow algorithm. Must be not less than winSize argument of +calcOpticalFlowPyrLK. It is needed to calculate required padding for pyramid levels. +@param maxLevel 0-based maximal pyramid level number. +@param withDerivatives set to precompute gradients for the every pyramid level. If pyramid is +constructed without the gradients then calcOpticalFlowPyrLK will calculate them internally. +@param pyrBorder the border mode for pyramid layers. +@param derivBorder the border mode for gradients. +@param tryReuseInputImage put ROI of input image into the pyramid if possible. You can pass false +to force data copying. +@return number of levels in constructed pyramid. Can be less than maxLevel. + */ CV_EXPORTS_W int buildOpticalFlowPyramid( InputArray img, OutputArrayOfArrays pyramid, Size winSize, int maxLevel, bool withDerivatives = true, int pyrBorder = BORDER_REFLECT_101, int derivBorder = BORDER_CONSTANT, bool tryReuseInputImage = true ); -//! computes sparse optical flow using multi-scale Lucas-Kanade algorithm +/** @brief Calculates an optical flow for a sparse feature set using the iterative Lucas-Kanade method with +pyramids. + +@param prevImg first 8-bit input image or pyramid constructed by buildOpticalFlowPyramid. +@param nextImg second input image or pyramid of the same size and the same type as prevImg. +@param prevPts vector of 2D points for which the flow needs to be found; point coordinates must be +single-precision floating-point numbers. +@param nextPts output vector of 2D points (with single-precision floating-point coordinates) +containing the calculated new positions of input features in the second image; when +OPTFLOW_USE_INITIAL_FLOW flag is passed, the vector must have the same size as in the input. +@param status output status vector (of unsigned chars); each element of the vector is set to 1 if +the flow for the corresponding features has been found, otherwise, it is set to 0. +@param err output vector of errors; each element of the vector is set to an error for the +corresponding feature, type of the error measure can be set in flags parameter; if the flow wasn't +found then the error is not defined (use the status parameter to find such cases). +@param winSize size of the search window at each pyramid level. +@param maxLevel 0-based maximal pyramid level number; if set to 0, pyramids are not used (single +level), if set to 1, two levels are used, and so on; if pyramids are passed to input then +algorithm will use as many levels as pyramids have but no more than maxLevel. +@param criteria parameter, specifying the termination criteria of the iterative search algorithm +(after the specified maximum number of iterations criteria.maxCount or when the search window +moves by less than criteria.epsilon. +@param flags operation flags: + - **OPTFLOW_USE_INITIAL_FLOW** uses initial estimations, stored in nextPts; if the flag is + not set, then prevPts is copied to nextPts and is considered the initial estimate. + - **OPTFLOW_LK_GET_MIN_EIGENVALS** use minimum eigen values as an error measure (see + minEigThreshold description); if the flag is not set, then L1 distance between patches + around the original and a moved point, divided by number of pixels in a window, is used as a + error measure. +@param minEigThreshold the algorithm calculates the minimum eigen value of a 2x2 normal matrix of +optical flow equations (this matrix is called a spatial gradient matrix in @cite Bouguet00), divided +by number of pixels in a window; if this value is less than minEigThreshold, then a corresponding +feature is filtered out and its flow is not processed, so it allows to remove bad points and get a +performance boost. + +The function implements a sparse iterative version of the Lucas-Kanade optical flow in pyramids. See +@cite Bouguet00 . The function is parallelized with the TBB library. + +@note + +- An example using the Lucas-Kanade optical flow algorithm can be found at + opencv_source_code/samples/cpp/lkdemo.cpp +- (Python) An example using the Lucas-Kanade optical flow algorithm can be found at + opencv_source_code/samples/python2/lk_track.py +- (Python) An example using the Lucas-Kanade tracker for homography matching can be found at + opencv_source_code/samples/python2/lk_homography.py + */ CV_EXPORTS_W void calcOpticalFlowPyrLK( InputArray prevImg, InputArray nextImg, InputArray prevPts, InputOutputArray nextPts, OutputArray status, OutputArray err, @@ -77,14 +177,76 @@ CV_EXPORTS_W void calcOpticalFlowPyrLK( InputArray prevImg, InputArray nextImg, TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01), int flags = 0, double minEigThreshold = 1e-4 ); -//! computes dense optical flow using Farneback algorithm +/** @brief Computes a dense optical flow using the Gunnar Farneback's algorithm. + +@param prev first 8-bit single-channel input image. +@param next second input image of the same size and the same type as prev. +@param flow computed flow image that has the same size as prev and type CV_32FC2. +@param pyr_scale parameter, specifying the image scale (\<1) to build pyramids for each image; +pyr_scale=0.5 means a classical pyramid, where each next layer is twice smaller than the previous +one. +@param levels number of pyramid layers including the initial image; levels=1 means that no extra +layers are created and only the original images are used. +@param winsize averaging window size; larger values increase the algorithm robustness to image +noise and give more chances for fast motion detection, but yield more blurred motion field. +@param iterations number of iterations the algorithm does at each pyramid level. +@param poly_n size of the pixel neighborhood used to find polynomial expansion in each pixel; +larger values mean that the image will be approximated with smoother surfaces, yielding more +robust algorithm and more blurred motion field, typically poly_n =5 or 7. +@param poly_sigma standard deviation of the Gaussian that is used to smooth derivatives used as a +basis for the polynomial expansion; for poly_n=5, you can set poly_sigma=1.1, for poly_n=7, a +good value would be poly_sigma=1.5. +@param flags operation flags that can be a combination of the following: + - **OPTFLOW_USE_INITIAL_FLOW** uses the input flow as an initial flow approximation. + - **OPTFLOW_FARNEBACK_GAUSSIAN** uses the Gaussian \f$\texttt{winsize}\times\texttt{winsize}\f$ + filter instead of a box filter of the same size for optical flow estimation; usually, this + option gives z more accurate flow than with a box filter, at the cost of lower speed; + normally, winsize for a Gaussian window should be set to a larger value to achieve the same + level of robustness. + +The function finds an optical flow for each prev pixel using the @cite Farneback2003 algorithm so that + +\f[\texttt{prev} (y,x) \sim \texttt{next} ( y + \texttt{flow} (y,x)[1], x + \texttt{flow} (y,x)[0])\f] + +@note + +- An example using the optical flow algorithm described by Gunnar Farneback can be found at + opencv_source_code/samples/cpp/fback.cpp +- (Python) An example using the optical flow algorithm described by Gunnar Farneback can be + found at opencv_source_code/samples/python2/opt_flow.py + */ CV_EXPORTS_W void calcOpticalFlowFarneback( InputArray prev, InputArray next, InputOutputArray flow, double pyr_scale, int levels, int winsize, int iterations, int poly_n, double poly_sigma, int flags ); -//! estimates the best-fit Euqcidean, similarity, affine or perspective transformation -// that maps one 2D point set to another or one image to another. +/** @brief Computes an optimal affine transformation between two 2D point sets. + +@param src First input 2D point set stored in std::vector or Mat, or an image stored in Mat. +@param dst Second input 2D point set of the same size and the same type as A, or another image. +@param fullAffine If true, the function finds an optimal affine transformation with no additional +restrictions (6 degrees of freedom). Otherwise, the class of transformations to choose from is +limited to combinations of translation, rotation, and uniform scaling (5 degrees of freedom). + +The function finds an optimal affine transform *[A|b]* (a 2 x 3 floating-point matrix) that +approximates best the affine transformation between: + +* Two point sets +* Two raster images. In this case, the function first finds some features in the src image and + finds the corresponding features in dst image. After that, the problem is reduced to the first + case. +In case of point sets, the problem is formulated as follows: you need to find a 2x2 matrix *A* and +2x1 vector *b* so that: + +\f[[A^*|b^*] = arg \min _{[A|b]} \sum _i \| \texttt{dst}[i] - A { \texttt{src}[i]}^T - b \| ^2\f] +where src[i] and dst[i] are the i-th points in src and dst, respectively +\f$[A|b]\f$ can be either arbitrary (when fullAffine=true ) or have a form of +\f[\begin{bmatrix} a_{11} & a_{12} & b_1 \\ -a_{12} & a_{11} & b_2 \end{bmatrix}\f] +when fullAffine=false. + +@sa +getAffineTransform, getPerspectiveTransform, findHomography + */ CV_EXPORTS_W Mat estimateRigidTransform( InputArray src, InputArray dst, bool fullAffine ); @@ -96,37 +258,106 @@ enum MOTION_HOMOGRAPHY = 3 }; -//! estimates the best-fit Translation, Euclidean, Affine or Perspective Transformation -// with respect to Enhanced Correlation Coefficient criterion that maps one image to -// another (area-based alignment) -// -// see reference: -// Evangelidis, G. E., Psarakis, E.Z., Parametric Image Alignment using -// Enhanced Correlation Coefficient Maximization, PAMI, 30(8), 2008 +/** @brief Finds the geometric transform (warp) between two images in terms of the ECC criterion @cite EP08 . + +@param templateImage single-channel template image; CV_8U or CV_32F array. +@param inputImage single-channel input image which should be warped with the final warpMatrix in +order to provide an image similar to templateImage, same type as temlateImage. +@param warpMatrix floating-point \f$2\times 3\f$ or \f$3\times 3\f$ mapping matrix (warp). +@param motionType parameter, specifying the type of motion: + - **MOTION_TRANSLATION** sets a translational motion model; warpMatrix is \f$2\times 3\f$ with + the first \f$2\times 2\f$ part being the unity matrix and the rest two parameters being + estimated. + - **MOTION_EUCLIDEAN** sets a Euclidean (rigid) transformation as motion model; three + parameters are estimated; warpMatrix is \f$2\times 3\f$. + - **MOTION_AFFINE** sets an affine motion model (DEFAULT); six parameters are estimated; + warpMatrix is \f$2\times 3\f$. + - **MOTION_HOMOGRAPHY** sets a homography as a motion model; eight parameters are + estimated;\`warpMatrix\` is \f$3\times 3\f$. +@param criteria parameter, specifying the termination criteria of the ECC algorithm; +criteria.epsilon defines the threshold of the increment in the correlation coefficient between two +iterations (a negative criteria.epsilon makes criteria.maxcount the only termination criterion). +Default values are shown in the declaration above. + +The function estimates the optimum transformation (warpMatrix) with respect to ECC criterion +(@cite EP08), that is + +\f[\texttt{warpMatrix} = \texttt{warpMatrix} = \arg\max_{W} \texttt{ECC}(\texttt{templateImage}(x,y),\texttt{inputImage}(x',y'))\f] + +where + +\f[\begin{bmatrix} x' \\ y' \end{bmatrix} = W \cdot \begin{bmatrix} x \\ y \\ 1 \end{bmatrix}\f] + +(the equation holds with homogeneous coordinates for homography). It returns the final enhanced +correlation coefficient, that is the correlation coefficient between the template image and the +final warped input image. When a \f$3\times 3\f$ matrix is given with motionType =0, 1 or 2, the third +row is ignored. + +Unlike findHomography and estimateRigidTransform, the function findTransformECC implements an +area-based alignment that builds on intensity similarities. In essence, the function updates the +initial transformation that roughly aligns the images. If this information is missing, the identity +warp (unity matrix) should be given as input. Note that if images undergo strong +displacements/rotations, an initial transformation that roughly aligns the images is necessary +(e.g., a simple euclidean/similarity transform that allows for the images showing the same image +content approximately). Use inverse warping in the second image to take an image close to the first +one, i.e. use the flag WARP_INVERSE_MAP with warpAffine or warpPerspective. See also the OpenCV +sample image_alignment.cpp that demonstrates the use of the function. Note that the function throws +an exception if algorithm does not converges. + +@sa +estimateRigidTransform, findHomography + */ CV_EXPORTS_W double findTransformECC( InputArray templateImage, InputArray inputImage, InputOutputArray warpMatrix, int motionType = MOTION_AFFINE, TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 50, 0.001)); -/*! - Kalman filter. +/** @brief Kalman filter class. - The class implements standard Kalman filter http://en.wikipedia.org/wiki/Kalman_filter. - However, you can modify KalmanFilter::transitionMatrix, KalmanFilter::controlMatrix and - KalmanFilter::measurementMatrix to get the extended Kalman filter functionality. -*/ +The class implements a standard Kalman filter , +@cite Welch95 . However, you can modify transitionMatrix, controlMatrix, and measurementMatrix to get +an extended Kalman filter functionality. See the OpenCV sample kalman.cpp. + +@note + +- An example using the standard Kalman filter can be found at + opencv_source_code/samples/cpp/kalman.cpp + */ class CV_EXPORTS_W KalmanFilter { public: - //! the default constructor + /** @brief The constructors. + + @note In C API when CvKalman\* kalmanFilter structure is not needed anymore, it should be released + with cvReleaseKalman(&kalmanFilter) + */ CV_WRAP KalmanFilter(); - //! the full constructor taking the dimensionality of the state, of the measurement and of the control vector + /** @overload + @param dynamParams Dimensionality of the state. + @param measureParams Dimensionality of the measurement. + @param controlParams Dimensionality of the control vector. + @param type Type of the created matrices that should be CV_32F or CV_64F. + */ CV_WRAP KalmanFilter( int dynamParams, int measureParams, int controlParams = 0, int type = CV_32F ); - //! re-initializes Kalman filter. The previous content is destroyed. + + /** @brief Re-initializes Kalman filter. The previous content is destroyed. + + @param dynamParams Dimensionalityensionality of the state. + @param measureParams Dimensionality of the measurement. + @param controlParams Dimensionality of the control vector. + @param type Type of the created matrices that should be CV_32F or CV_64F. + */ void init( int dynamParams, int measureParams, int controlParams = 0, int type = CV_32F ); - //! computes predicted state + /** @brief Computes a predicted state. + + @param control The optional input control + */ CV_WRAP const Mat& predict( const Mat& control = Mat() ); - //! updates the predicted state from the measurement + + /** @brief Updates the predicted state from the measurement. + + @param measurement The measured system parameters + */ CV_WRAP const Mat& correct( const Mat& measurement ); CV_PROP_RW Mat statePre; //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k) @@ -149,21 +380,69 @@ public: }; +/** @brief "Dual TV L1" Optical Flow Algorithm. +The class implements the "Dual TV L1" optical flow algorithm described in @cite Zach2007 and +@cite Javier2012 . +Here are important members of the class that control the algorithm, which you can set after +constructing the class instance: + +- member double tau + Time step of the numerical scheme. + +- member double lambda + Weight parameter for the data term, attachment parameter. This is the most relevant + parameter, which determines the smoothness of the output. The smaller this parameter is, + the smoother the solutions we obtain. It depends on the range of motions of the images, so + its value should be adapted to each image sequence. + +- member double theta + Weight parameter for (u - v)\^2, tightness parameter. It serves as a link between the + attachment and the regularization terms. In theory, it should have a small value in order + to maintain both parts in correspondence. The method is stable for a large range of values + of this parameter. + +- member int nscales + Number of scales used to create the pyramid of images. + +- member int warps + Number of warpings per scale. Represents the number of times that I1(x+u0) and grad( + I1(x+u0) ) are computed per scale. This is a parameter that assures the stability of the + method. It also affects the running time, so it is a compromise between speed and + accuracy. + +- member double epsilon + Stopping criterion threshold used in the numerical scheme, which is a trade-off between + precision and running time. A small value will yield more accurate solutions at the + expense of a slower convergence. + +- member int iterations + Stopping criterion iterations number used in the numerical scheme. + +C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". +Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". +*/ class CV_EXPORTS_W DenseOpticalFlow : public Algorithm { public: + /** @brief Calculates an optical flow. + + @param I0 first 8-bit single-channel input image. + @param I1 second input image of the same size and the same type as prev. + @param flow computed flow image that has the same size as prev and type CV_32FC2. + */ CV_WRAP virtual void calc( InputArray I0, InputArray I1, InputOutputArray flow ) = 0; + /** @brief Releases all inner buffers. + */ CV_WRAP virtual void collectGarbage() = 0; }; -// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method -// -// see reference: -// [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". -// [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". +/** @brief Creates instance of cv::DenseOpticalFlow +*/ CV_EXPORTS_W Ptr createOptFlow_DualTVL1(); +//! @} video_track + } // cv #endif diff --git a/modules/video/include/opencv2/video/tracking_c.h b/modules/video/include/opencv2/video/tracking_c.h index e05a0b3ada..b355352879 100644 --- a/modules/video/include/opencv2/video/tracking_c.h +++ b/modules/video/include/opencv2/video/tracking_c.h @@ -50,6 +50,10 @@ extern "C" { #endif +/** @addtogroup video_c + @{ +*/ + /****************************************************************************************\ * Motion Analysis * \****************************************************************************************/ @@ -218,6 +222,7 @@ CVAPI(const CvMat*) cvKalmanCorrect( CvKalman* kalman, const CvMat* measurement #define cvKalmanUpdateByTime cvKalmanPredict #define cvKalmanUpdateByMeasurement cvKalmanCorrect +/** @} video_c */ #ifdef __cplusplus } // extern "C" diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index b0a371af3f..8610fe3e8c 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -45,6 +45,13 @@ #include "opencv2/core.hpp" +/** + @defgroup videoio Media I/O + @{ + @defgroup videoio_c C API + @defgroup videoio_ios iOS glue + @} +*/ ////////////////////////////////// video io ///////////////////////////////// @@ -54,6 +61,9 @@ typedef struct CvVideoWriter CvVideoWriter; namespace cv { +//! @addtogroup videoio +//! @{ + // Camera API enum { CAP_ANY = 0, // autodetect CAP_VFW = 200, // platform native @@ -345,26 +355,209 @@ enum { CAP_INTELPERC_DEPTH_MAP = 0, // Each pixel is a 16-bit integ class IVideoCapture; + +/** @brief Class for video capturing from video files, image sequences or cameras. The class provides C++ API +for capturing video from cameras or for reading video files and image sequences. Here is how the +class can be used: : +@code + #include "opencv2/opencv.hpp" + + using namespace cv; + + int main(int, char**) + { + VideoCapture cap(0); // open the default camera + if(!cap.isOpened()) // check if we succeeded + return -1; + + Mat edges; + namedWindow("edges",1); + for(;;) + { + Mat frame; + cap >> frame; // get a new frame from camera + cvtColor(frame, edges, COLOR_BGR2GRAY); + GaussianBlur(edges, edges, Size(7,7), 1.5, 1.5); + Canny(edges, edges, 0, 30, 3); + imshow("edges", edges); + if(waitKey(30) >= 0) break; + } + // the camera will be deinitialized automatically in VideoCapture destructor + return 0; + } +@endcode +@note In C API the black-box structure CvCapture is used instead of VideoCapture. + +@note +- A basic sample on using the VideoCapture interface can be found at + opencv_source_code/samples/cpp/starter_video.cpp +- Another basic video processing sample can be found at + opencv_source_code/samples/cpp/video_dmtx.cpp +- (Python) A basic sample on using the VideoCapture interface can be found at + opencv_source_code/samples/python2/video.py +- (Python) Another basic video processing sample can be found at + opencv_source_code/samples/python2/video_dmtx.py +- (Python) A multi threaded video processing sample can be found at + opencv_source_code/samples/python2/video_threaded.py + */ class CV_EXPORTS_W VideoCapture { public: + /** @brief + @note In C API, when you finished working with video, release CvCapture structure with + cvReleaseCapture(), or use Ptr\ that calls cvReleaseCapture() automatically in the + destructor. + */ CV_WRAP VideoCapture(); + + /** @overload + @param filename name of the opened video file (eg. video.avi) or image sequence (eg. + img_%02d.jpg, which will read samples like img_00.jpg, img_01.jpg, img_02.jpg, ...) + */ CV_WRAP VideoCapture(const String& filename); + + /** @overload + @param device id of the opened video capturing device (i.e. a camera index). If there is a single + camera connected, just pass 0. + */ CV_WRAP VideoCapture(int device); virtual ~VideoCapture(); + + /** @brief Open video file or a capturing device for video capturing + + @param filename name of the opened video file (eg. video.avi) or image sequence (eg. + img_%02d.jpg, which will read samples like img_00.jpg, img_01.jpg, img_02.jpg, ...) + + The methods first call VideoCapture::release to close the already opened file or camera. + */ CV_WRAP virtual bool open(const String& filename); + + /** @overload + @param device id of the opened video capturing device (i.e. a camera index). + */ CV_WRAP virtual bool open(int device); + + /** @brief Returns true if video capturing has been initialized already. + + If the previous call to VideoCapture constructor or VideoCapture::open succeeded, the method returns + true. + */ CV_WRAP virtual bool isOpened() const; + + /** @brief Closes video file or capturing device. + + The methods are automatically called by subsequent VideoCapture::open and by VideoCapture + destructor. + + The C function also deallocates memory and clears \*capture pointer. + */ CV_WRAP virtual void release(); + /** @brief Grabs the next frame from video file or capturing device. + + The methods/functions grab the next frame from video file or camera and return true (non-zero) in + the case of success. + + The primary use of the function is in multi-camera environments, especially when the cameras do not + have hardware synchronization. That is, you call VideoCapture::grab() for each camera and after that + call the slower method VideoCapture::retrieve() to decode and get frame from each camera. This way + the overhead on demosaicing or motion jpeg decompression etc. is eliminated and the retrieved frames + from different cameras will be closer in time. + + Also, when a connected camera is multi-head (for example, a stereo camera or a Kinect device), the + correct way of retrieving data from it is to call VideoCapture::grab first and then call + VideoCapture::retrieve one or more times with different values of the channel parameter. See + + */ CV_WRAP virtual bool grab(); + + /** @brief Decodes and returns the grabbed video frame. + + The methods/functions decode and return the just grabbed frame. If no frames has been grabbed + (camera has been disconnected, or there are no more frames in video file), the methods return false + and the functions return NULL pointer. + + @note OpenCV 1.x functions cvRetrieveFrame and cv.RetrieveFrame return image stored inside the video + capturing structure. It is not allowed to modify or release the image! You can copy the frame using + :ocvcvCloneImage and then do whatever you want with the copy. + */ CV_WRAP virtual bool retrieve(OutputArray image, int flag = 0); virtual VideoCapture& operator >> (CV_OUT Mat& image); virtual VideoCapture& operator >> (CV_OUT UMat& image); + + /** @brief Grabs, decodes and returns the next video frame. + + The methods/functions combine VideoCapture::grab and VideoCapture::retrieve in one call. This is the + most convenient method for reading video files or capturing data from decode and return the just + grabbed frame. If no frames has been grabbed (camera has been disconnected, or there are no more + frames in video file), the methods return false and the functions return NULL pointer. + + @note OpenCV 1.x functions cvRetrieveFrame and cv.RetrieveFrame return image stored inside the video + capturing structure. It is not allowed to modify or release the image! You can copy the frame using + :ocvcvCloneImage and then do whatever you want with the copy. + */ CV_WRAP virtual bool read(OutputArray image); + /** @brief Sets a property in the VideoCapture. + + @param propId Property identifier. It can be one of the following: + - **CV_CAP_PROP_POS_MSEC** Current position of the video file in milliseconds. + - **CV_CAP_PROP_POS_FRAMES** 0-based index of the frame to be decoded/captured next. + - **CV_CAP_PROP_POS_AVI_RATIO** Relative position of the video file: 0 - start of the + film, 1 - end of the film. + - **CV_CAP_PROP_FRAME_WIDTH** Width of the frames in the video stream. + - **CV_CAP_PROP_FRAME_HEIGHT** Height of the frames in the video stream. + - **CV_CAP_PROP_FPS** Frame rate. + - **CV_CAP_PROP_FOURCC** 4-character code of codec. + - **CV_CAP_PROP_FRAME_COUNT** Number of frames in the video file. + - **CV_CAP_PROP_FORMAT** Format of the Mat objects returned by retrieve() . + - **CV_CAP_PROP_MODE** Backend-specific value indicating the current capture mode. + - **CV_CAP_PROP_BRIGHTNESS** Brightness of the image (only for cameras). + - **CV_CAP_PROP_CONTRAST** Contrast of the image (only for cameras). + - **CV_CAP_PROP_SATURATION** Saturation of the image (only for cameras). + - **CV_CAP_PROP_HUE** Hue of the image (only for cameras). + - **CV_CAP_PROP_GAIN** Gain of the image (only for cameras). + - **CV_CAP_PROP_EXPOSURE** Exposure (only for cameras). + - **CV_CAP_PROP_CONVERT_RGB** Boolean flags indicating whether images should be converted + to RGB. + - **CV_CAP_PROP_WHITE_BALANCE** Currently unsupported + - **CV_CAP_PROP_RECTIFICATION** Rectification flag for stereo cameras (note: only supported + by DC1394 v 2.x backend currently) + @param value Value of the property. + */ CV_WRAP virtual bool set(int propId, double value); + + /** @brief Returns the specified VideoCapture property + + @param propId Property identifier. It can be one of the following: + - **CV_CAP_PROP_POS_MSEC** Current position of the video file in milliseconds or video + capture timestamp. + - **CV_CAP_PROP_POS_FRAMES** 0-based index of the frame to be decoded/captured next. + - **CV_CAP_PROP_POS_AVI_RATIO** Relative position of the video file: 0 - start of the + film, 1 - end of the film. + - **CV_CAP_PROP_FRAME_WIDTH** Width of the frames in the video stream. + - **CV_CAP_PROP_FRAME_HEIGHT** Height of the frames in the video stream. + - **CV_CAP_PROP_FPS** Frame rate. + - **CV_CAP_PROP_FOURCC** 4-character code of codec. + - **CV_CAP_PROP_FRAME_COUNT** Number of frames in the video file. + - **CV_CAP_PROP_FORMAT** Format of the Mat objects returned by retrieve() . + - **CV_CAP_PROP_MODE** Backend-specific value indicating the current capture mode. + - **CV_CAP_PROP_BRIGHTNESS** Brightness of the image (only for cameras). + - **CV_CAP_PROP_CONTRAST** Contrast of the image (only for cameras). + - **CV_CAP_PROP_SATURATION** Saturation of the image (only for cameras). + - **CV_CAP_PROP_HUE** Hue of the image (only for cameras). + - **CV_CAP_PROP_GAIN** Gain of the image (only for cameras). + - **CV_CAP_PROP_EXPOSURE** Exposure (only for cameras). + - **CV_CAP_PROP_CONVERT_RGB** Boolean flags indicating whether images should be converted + to RGB. + - **CV_CAP_PROP_WHITE_BALANCE** Currently not supported + - **CV_CAP_PROP_RECTIFICATION** Rectification flag for stereo cameras (note: only supported + by DC1394 v 2.x backend currently) + + **Note**: When querying a property that is not supported by the backend used by the VideoCapture + class, value 0 is returned. + */ CV_WRAP virtual double get(int propId); protected: @@ -374,21 +567,63 @@ private: static Ptr createCameraCapture(int index); }; +/** @brief Video writer class. + */ class CV_EXPORTS_W VideoWriter { public: + /** @brief VideoWriter constructors + + The constructors/functions initialize video writers. On Linux FFMPEG is used to write videos; on + Windows FFMPEG or VFW is used; on MacOSX QTKit is used. + */ CV_WRAP VideoWriter(); + + /** @overload + @param filename Name of the output video file. + @param fourcc 4-character code of codec used to compress the frames. For example, + VideoWriter::fourcc('P','I','M','1') is a MPEG-1 codec, VideoWriter::fourcc('M','J','P','G') is a + motion-jpeg codec etc. List of codes can be obtained at [Video Codecs by + FOURCC](http://www.fourcc.org/codecs.php) page. + @param fps Framerate of the created video stream. + @param frameSize Size of the video frames. + @param isColor If it is not zero, the encoder will expect and encode color frames, otherwise it + will work with grayscale frames (the flag is currently supported on Windows only). + */ CV_WRAP VideoWriter(const String& filename, int fourcc, double fps, Size frameSize, bool isColor = true); virtual ~VideoWriter(); + + /** @brief Initializes or reinitializes video writer. + + The method opens video writer. Parameters are the same as in the constructor + VideoWriter::VideoWriter. + + */ CV_WRAP virtual bool open(const String& filename, int fourcc, double fps, Size frameSize, bool isColor = true); + + /** @brief Returns true if video writer has been successfully initialized. + */ CV_WRAP virtual bool isOpened() const; CV_WRAP virtual void release(); virtual VideoWriter& operator << (const Mat& image); + + /** @brief Writes the next video frame + + @param image The written frame + + The functions/methods write the specified image to video file. It must have the same size as has + been specified when opening the video writer. + */ CV_WRAP virtual void write(const Mat& image); + /** @brief Concatenates 4 chars to a fourcc code + + This static method constructs the fourcc code of the codec to be used in the constructor + VideoWriter::VideoWriter or VideoWriter::open. + */ CV_WRAP static int fourcc(char c1, char c2, char c3, char c4); protected: @@ -398,6 +633,8 @@ protected: template<> CV_EXPORTS void DefaultDeleter::operator ()(CvCapture* obj) const; template<> CV_EXPORTS void DefaultDeleter::operator ()(CvVideoWriter* obj) const; +//! @} videoio + } // cv #endif //__OPENCV_VIDEOIO_HPP__ diff --git a/modules/videoio/include/opencv2/videoio/cap_ios.h b/modules/videoio/include/opencv2/videoio/cap_ios.h index 4d270aba9e..cf7f2e4ff9 100644 --- a/modules/videoio/include/opencv2/videoio/cap_ios.h +++ b/modules/videoio/include/opencv2/videoio/cap_ios.h @@ -32,6 +32,9 @@ #import #include "opencv2/core.hpp" +//! @addtogroup videoio_ios +//! @{ + /////////////////////////////////////// CvAbstractCamera ///////////////////////////////////// @class CvAbstractCamera; @@ -167,3 +170,5 @@ - (void)takePicture; @end + +//! @} videoio_ios diff --git a/modules/videoio/include/opencv2/videoio/videoio_c.h b/modules/videoio/include/opencv2/videoio/videoio_c.h index 13805c0a4c..d993ab312d 100644 --- a/modules/videoio/include/opencv2/videoio/videoio_c.h +++ b/modules/videoio/include/opencv2/videoio/videoio_c.h @@ -48,6 +48,10 @@ extern "C" { #endif /* __cplusplus */ +/** + @addtogroup videoio_c + @{ +*/ /****************************************************************************************\ * Working with Video Files and Cameras * @@ -416,6 +420,7 @@ CVAPI(void) cvReleaseVideoWriter( CvVideoWriter** writer ); #define cvCreateAVIWriter cvCreateVideoWriter #define cvWriteToAVI cvWriteFrame +/** @} videoio_c */ #ifdef __cplusplus } diff --git a/modules/videostab/include/opencv2/videostab.hpp b/modules/videostab/include/opencv2/videostab.hpp index 3f86089430..17b061f8d6 100644 --- a/modules/videostab/include/opencv2/videostab.hpp +++ b/modules/videostab/include/opencv2/videostab.hpp @@ -40,15 +40,41 @@ // //M*/ -// REFERENCES -// 1. "Full-Frame Video Stabilization with Motion Inpainting" -// Yasuyuki Matsushita, Eyal Ofek, Weina Ge, Xiaoou Tang, Senior Member, and Heung-Yeung Shum -// 2. "Auto-Directed Video Stabilization with Robust L1 Optimal Camera Paths" -// Matthias Grundmann, Vivek Kwatra, Irfan Essa - #ifndef __OPENCV_VIDEOSTAB_HPP__ #define __OPENCV_VIDEOSTAB_HPP__ +/** + @defgroup videostab Video Stabilization + +The video stabilization module contains a set of functions and classes that can be used to solve the +problem of video stabilization. There are a few methods implemented, most of them are descibed in +the papers @cite OF06 and @cite G11 . However, there are some extensions and deviations from the orginal +paper methods. + +### References + + 1. "Full-Frame Video Stabilization with Motion Inpainting" + Yasuyuki Matsushita, Eyal Ofek, Weina Ge, Xiaoou Tang, Senior Member, and Heung-Yeung Shum + 2. "Auto-Directed Video Stabilization with Robust L1 Optimal Camera Paths" + Matthias Grundmann, Vivek Kwatra, Irfan Essa + + @{ + @defgroup videostab_motion Global Motion Estimation + +The video stabilization module contains a set of functions and classes for global motion estimation +between point clouds or between images. In the last case features are extracted and matched +internally. For the sake of convenience the motion estimation functions are wrapped into classes. +Both the functions and the classes are available. + + @defgroup videostab_marching Fast Marching Method + +The Fast Marching Method @cite Telea04 is used in of the video stabilization routines to do motion and +color inpainting. The method is implemented is a flexible way and it's made public for other users. + + @} + +*/ + #include "opencv2/videostab/stabilizer.hpp" #include "opencv2/videostab/ring_buffer.hpp" diff --git a/modules/videostab/include/opencv2/videostab/deblurring.hpp b/modules/videostab/include/opencv2/videostab/deblurring.hpp index 7359f8ee35..8028c1d811 100644 --- a/modules/videostab/include/opencv2/videostab/deblurring.hpp +++ b/modules/videostab/include/opencv2/videostab/deblurring.hpp @@ -51,6 +51,9 @@ namespace cv namespace videostab { +//! @addtogroup videostab +//! @{ + CV_EXPORTS float calcBlurriness(const Mat &frame); class CV_EXPORTS DeblurerBase @@ -105,6 +108,8 @@ private: Mat_ bSum_, gSum_, rSum_, wSum_; }; +//! @} + } // namespace videostab } // namespace cv diff --git a/modules/videostab/include/opencv2/videostab/fast_marching.hpp b/modules/videostab/include/opencv2/videostab/fast_marching.hpp index b948c887cc..c0c7985a74 100644 --- a/modules/videostab/include/opencv2/videostab/fast_marching.hpp +++ b/modules/videostab/include/opencv2/videostab/fast_marching.hpp @@ -53,15 +53,31 @@ namespace cv namespace videostab { -// See http://iwi.eldoc.ub.rug.nl/FILES/root/2004/JGraphToolsTelea/2004JGraphToolsTelea.pdf +//! @addtogroup videostab_marching +//! @{ + +/** @brief Describes the Fast Marching Method implementation. + + See http://iwi.eldoc.ub.rug.nl/FILES/root/2004/JGraphToolsTelea/2004JGraphToolsTelea.pdf + */ class CV_EXPORTS FastMarchingMethod { public: FastMarchingMethod() : inf_(1e6f) {} + /** @brief Template method that runs the Fast Marching Method. + + @param mask Image mask. 0 value indicates that the pixel value must be inpainted, 255 indicates + that the pixel value is known, other values aren't acceptable. + @param inpaint Inpainting functor that overloads void operator ()(int x, int y). + @return Inpainting functor. + */ template Inpaint run(const Mat &mask, Inpaint inpaint); + /** + @return Distance map that's created during working of the method. + */ Mat distanceMap() const { return dist_; } private: @@ -95,6 +111,8 @@ private: int size_; // narrow band size }; +//! @} + } // namespace videostab } // namespace cv diff --git a/modules/videostab/include/opencv2/videostab/frame_source.hpp b/modules/videostab/include/opencv2/videostab/frame_source.hpp index 0bcc3fc681..612fbdb30b 100644 --- a/modules/videostab/include/opencv2/videostab/frame_source.hpp +++ b/modules/videostab/include/opencv2/videostab/frame_source.hpp @@ -51,6 +51,9 @@ namespace cv namespace videostab { +//! @addtogroup videostab +//! @{ + class CV_EXPORTS IFrameSource { public: @@ -83,6 +86,8 @@ private: Ptr impl; }; +//! @} + } // namespace videostab } // namespace cv diff --git a/modules/videostab/include/opencv2/videostab/global_motion.hpp b/modules/videostab/include/opencv2/videostab/global_motion.hpp index 494c2da787..547f1b2821 100644 --- a/modules/videostab/include/opencv2/videostab/global_motion.hpp +++ b/modules/videostab/include/opencv2/videostab/global_motion.hpp @@ -61,23 +61,62 @@ namespace cv namespace videostab { +//! @addtogroup videostab_motion +//! @{ + +/** @brief Estimates best global motion between two 2D point clouds in the least-squares sense. + +@note Works in-place and changes input point arrays. + +@param points0 Source set of 2D points (32F). +@param points1 Destination set of 2D points (32F). +@param model Motion model (up to MM_AFFINE). +@param rmse Final root-mean-square error. +@return 3x3 2D transformation matrix (32F). + */ CV_EXPORTS Mat estimateGlobalMotionLeastSquares( InputOutputArray points0, InputOutputArray points1, int model = MM_AFFINE, float *rmse = 0); +/** @brief Estimates best global motion between two 2D point clouds robustly (using RANSAC method). + +@param points0 Source set of 2D points (32F). +@param points1 Destination set of 2D points (32F). +@param model Motion model. See cv::videostab::MotionModel. +@param params RANSAC method parameters. See videostab::RansacParams. +@param rmse Final root-mean-square error. +@param ninliers Final number of inliers. + */ CV_EXPORTS Mat estimateGlobalMotionRansac( InputArray points0, InputArray points1, int model = MM_AFFINE, const RansacParams ¶ms = RansacParams::default2dMotion(MM_AFFINE), float *rmse = 0, int *ninliers = 0); +/** @brief Base class for all global motion estimation methods. + */ class CV_EXPORTS MotionEstimatorBase { public: virtual ~MotionEstimatorBase() {} + /** @brief Sets motion model. + + @param val Motion model. See cv::videostab::MotionModel. + */ virtual void setMotionModel(MotionModel val) { motionModel_ = val; } + + /** + @return Motion model. See cv::videostab::MotionModel. + */ virtual MotionModel motionModel() const { return motionModel_; } + /** @brief Estimates global motion between two 2D point clouds. + + @param points0 Source set of 2D points (32F). + @param points1 Destination set of 2D points (32F). + @param ok Indicates whether motion was estimated successfully. + @return 3x3 2D transformation matrix (32F). + */ virtual Mat estimate(InputArray points0, InputArray points1, bool *ok = 0) = 0; protected: @@ -87,6 +126,8 @@ private: MotionModel motionModel_; }; +/** @brief Describes a robust RANSAC-based global 2D motion estimation method which minimizes L2 error. + */ class CV_EXPORTS MotionEstimatorRansacL2 : public MotionEstimatorBase { public: @@ -105,6 +146,10 @@ private: float minInlierRatio_; }; +/** @brief Describes a global 2D motion estimation method which minimizes L1 error. + +@note To be able to use this method you must build OpenCV with CLP library support. : + */ class CV_EXPORTS MotionEstimatorL1 : public MotionEstimatorBase { public: @@ -125,6 +170,8 @@ private: } }; +/** @brief Base class for global 2D motion estimation methods which take frames as input. + */ class CV_EXPORTS ImageMotionEstimatorBase { public: @@ -168,6 +215,9 @@ private: Ptr motionEstimator_; }; +/** @brief Describes a global 2D motion estimation method which uses keypoints detection and optical flow for +matching. + */ class CV_EXPORTS KeypointBasedMotionEstimator : public ImageMotionEstimatorBase { public: @@ -232,8 +282,17 @@ private: #endif // defined(HAVE_OPENCV_CUDAIMGPROC) && defined(HAVE_OPENCV_CUDA) && defined(HAVE_OPENCV_CUDAOPTFLOW) +/** @brief Computes motion between two frames assuming that all the intermediate motions are known. + +@param from Source frame index. +@param to Destination frame index. +@param motions Pair-wise motions. motions[i] denotes motion from the frame i to the frame i+1 +@return Motion from the frame from to the frame to. + */ CV_EXPORTS Mat getMotion(int from, int to, const std::vector &motions); +//! @} + } // namespace videostab } // namespace cv diff --git a/modules/videostab/include/opencv2/videostab/inpainting.hpp b/modules/videostab/include/opencv2/videostab/inpainting.hpp index 402745e7e1..844c68c7b3 100644 --- a/modules/videostab/include/opencv2/videostab/inpainting.hpp +++ b/modules/videostab/include/opencv2/videostab/inpainting.hpp @@ -55,6 +55,9 @@ namespace cv namespace videostab { +//! @addtogroup videostab +//! @{ + class CV_EXPORTS InpainterBase { public: @@ -201,6 +204,8 @@ CV_EXPORTS void completeFrameAccordingToFlow( const Mat &flowMask, const Mat &flowX, const Mat &flowY, const Mat &frame1, const Mat &mask1, float distThresh, Mat& frame0, Mat &mask0); +//! @} + } // namespace videostab } // namespace cv diff --git a/modules/videostab/include/opencv2/videostab/log.hpp b/modules/videostab/include/opencv2/videostab/log.hpp index 9dfed5205f..28625ed298 100644 --- a/modules/videostab/include/opencv2/videostab/log.hpp +++ b/modules/videostab/include/opencv2/videostab/log.hpp @@ -50,6 +50,9 @@ namespace cv namespace videostab { +//! @addtogroup videostab +//! @{ + class CV_EXPORTS ILog { public: @@ -69,6 +72,8 @@ public: virtual void print(const char *format, ...); }; +//! @} + } // namespace videostab } // namespace cv diff --git a/modules/videostab/include/opencv2/videostab/motion_core.hpp b/modules/videostab/include/opencv2/videostab/motion_core.hpp index c72e34fba8..17448e3c29 100644 --- a/modules/videostab/include/opencv2/videostab/motion_core.hpp +++ b/modules/videostab/include/opencv2/videostab/motion_core.hpp @@ -51,6 +51,11 @@ namespace cv namespace videostab { +//! @addtogroup videostab_motion +//! @{ + +/** @brief Describes motion model between two point clouds. + */ enum MotionModel { MM_TRANSLATION = 0, @@ -63,22 +68,37 @@ enum MotionModel MM_UNKNOWN = 7 }; +/** @brief Describes RANSAC method parameters. + */ struct CV_EXPORTS RansacParams { - int size; // subset size - float thresh; // max error to classify as inlier - float eps; // max outliers ratio - float prob; // probability of success + int size; //!< subset size + float thresh; //!< max error to classify as inlier + float eps; //!< max outliers ratio + float prob; //!< probability of success RansacParams() : size(0), thresh(0), eps(0), prob(0) {} + /** @brief Constructor + @param size Subset size. + @param thresh Maximum re-projection error value to classify as inlier. + @param eps Maximum ratio of incorrect correspondences. + @param prob Required success probability. + */ RansacParams(int size, float thresh, float eps, float prob); + /** + @return Number of iterations that'll be performed by RANSAC method. + */ int niters() const { return static_cast( std::ceil(std::log(1 - prob) / std::log(1 - std::pow(1 - eps, size)))); } + /** + @param model Motion model. See cv::videostab::MotionModel. + @return Default RANSAC method parameters for the given motion model. + */ static RansacParams default2dMotion(MotionModel model) { CV_Assert(model < MM_UNKNOWN); @@ -101,6 +121,7 @@ struct CV_EXPORTS RansacParams inline RansacParams::RansacParams(int _size, float _thresh, float _eps, float _prob) : size(_size), thresh(_thresh), eps(_eps), prob(_prob) {} +//! @} } // namespace videostab } // namespace cv diff --git a/modules/videostab/include/opencv2/videostab/motion_stabilizing.hpp b/modules/videostab/include/opencv2/videostab/motion_stabilizing.hpp index 6b8895f0b1..3bdbfbd009 100644 --- a/modules/videostab/include/opencv2/videostab/motion_stabilizing.hpp +++ b/modules/videostab/include/opencv2/videostab/motion_stabilizing.hpp @@ -53,12 +53,15 @@ namespace cv namespace videostab { +//! @addtogroup videostab_motion +//! @{ + class CV_EXPORTS IMotionStabilizer { public: virtual ~IMotionStabilizer() {} - // assumes that [0, size-1) is in or equals to [range.first, range.second) + //! assumes that [0, size-1) is in or equals to [range.first, range.second) virtual void stabilize( int size, const std::vector &motions, std::pair range, Mat *stabilizationMotions) = 0; @@ -163,6 +166,8 @@ CV_EXPORTS Mat ensureInclusionConstraint(const Mat &M, Size size, float trimRati CV_EXPORTS float estimateOptimalTrimRatio(const Mat &M, Size size); +//! @} + } // namespace videostab } // namespace diff --git a/modules/videostab/include/opencv2/videostab/optical_flow.hpp b/modules/videostab/include/opencv2/videostab/optical_flow.hpp index 4a21f94649..a34a82e3f8 100644 --- a/modules/videostab/include/opencv2/videostab/optical_flow.hpp +++ b/modules/videostab/include/opencv2/videostab/optical_flow.hpp @@ -55,6 +55,9 @@ namespace cv namespace videostab { +//! @addtogroup videostab +//! @{ + class CV_EXPORTS ISparseOptFlowEstimator { public: @@ -139,6 +142,8 @@ private: #endif +//! @} + } // namespace videostab } // namespace cv diff --git a/modules/videostab/include/opencv2/videostab/outlier_rejection.hpp b/modules/videostab/include/opencv2/videostab/outlier_rejection.hpp index a9c7578fce..9e40f854ca 100644 --- a/modules/videostab/include/opencv2/videostab/outlier_rejection.hpp +++ b/modules/videostab/include/opencv2/videostab/outlier_rejection.hpp @@ -52,6 +52,9 @@ namespace cv namespace videostab { +//! @addtogroup videostab +//! @{ + class CV_EXPORTS IOutlierRejector { public: @@ -90,6 +93,8 @@ private: std::vector grid_; }; +//! @} + } // namespace videostab } // namespace cv diff --git a/modules/videostab/include/opencv2/videostab/ring_buffer.hpp b/modules/videostab/include/opencv2/videostab/ring_buffer.hpp index a820edef9e..7cc3f03e9a 100644 --- a/modules/videostab/include/opencv2/videostab/ring_buffer.hpp +++ b/modules/videostab/include/opencv2/videostab/ring_buffer.hpp @@ -51,6 +51,9 @@ namespace cv namespace videostab { +//! @addtogroup videostab +//! @{ + template inline T& at(int idx, std::vector &items) { return items[cv::borderInterpolate(idx, static_cast(items.size()), cv::BORDER_WRAP)]; @@ -61,6 +64,8 @@ template inline const T& at(int idx, const std::vector &items) return items[cv::borderInterpolate(idx, static_cast(items.size()), cv::BORDER_WRAP)]; } +//! @} + } // namespace videostab } // namespace cv diff --git a/modules/videostab/include/opencv2/videostab/stabilizer.hpp b/modules/videostab/include/opencv2/videostab/stabilizer.hpp index b021b47987..c18d31416c 100644 --- a/modules/videostab/include/opencv2/videostab/stabilizer.hpp +++ b/modules/videostab/include/opencv2/videostab/stabilizer.hpp @@ -60,6 +60,9 @@ namespace cv namespace videostab { +//! @addtogroup videostab +//! @{ + class CV_EXPORTS StabilizerBase { public: @@ -189,6 +192,8 @@ protected: Mat suppressedFrame_; }; +//! @} + } // namespace videostab } // namespace cv diff --git a/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp b/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp index c95b08d0eb..6701d78101 100644 --- a/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp +++ b/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp @@ -54,6 +54,9 @@ namespace cv namespace videostab { +//! @addtogroup videostab +//! @{ + class CV_EXPORTS WobbleSuppressorBase { public: @@ -129,6 +132,8 @@ private: }; #endif +//! @} + } // namespace videostab } // namespace cv diff --git a/modules/viz/include/opencv2/viz.hpp b/modules/viz/include/opencv2/viz.hpp index 6fa6249e3b..3f8353efe3 100644 --- a/modules/viz/include/opencv2/viz.hpp +++ b/modules/viz/include/opencv2/viz.hpp @@ -51,4 +51,34 @@ #include #include +/** + @defgroup viz 3D Visualizer + +This section describes 3D visualization window as well as classes and methods that are used to +interact with it. + +3D visualization window (see Viz3d) is used to display widgets (see Widget), and it provides several +methods to interact with scene and widgets. + + @{ + @defgroup viz_widget Widget + +In this section, the widget framework is explained. Widgets represent 2D or 3D objects, varying from +simple ones such as lines to complex one such as point clouds and meshes. + +Widgets are **implicitly shared**. Therefore, one can add a widget to the scene, and modify the +widget without re-adding the widget. + +@code +// Create a cloud widget +viz::WCloud cw(cloud, viz::Color::red()); +// Display it in a window +myWindow.showWidget("CloudWidget1", cw); +// Modify it, and it will be modified in the window. +cw.setColor(viz::Color::yellow()); +@endcode + + @} +*/ + #endif /* __OPENCV_VIZ_HPP__ */ diff --git a/modules/viz/include/opencv2/viz/types.hpp b/modules/viz/include/opencv2/viz/types.hpp index 0e638a9290..dc158664eb 100644 --- a/modules/viz/include/opencv2/viz/types.hpp +++ b/modules/viz/include/opencv2/viz/types.hpp @@ -54,6 +54,12 @@ namespace cv { namespace viz { + +//! @addtogroup viz +//! @{ + + /** @brief This class a represents BGR color. + */ class Color : public Scalar { public: @@ -108,6 +114,8 @@ namespace cv static Color not_set(); }; + /** @brief This class wraps mesh attributes, and it can load a mesh from a ply file. : + */ class CV_EXPORTS Mesh { public: @@ -119,16 +127,49 @@ namespace cv Mat texture, tcoords; - //! Loads mesh from a given ply file (no texture load support for now) + /** @brief Loads a mesh from a ply file. + + @param file File name (for now only PLY is supported) + */ static Mesh load(const String& file); }; + /** @brief This class wraps intrinsic parameters of a camera. + + It provides several constructors that can extract the intrinsic parameters from field of + view, intrinsic matrix and projection matrix. : + */ class CV_EXPORTS Camera { public: + + /** @brief Constructs a Camera. + + @param fx Horizontal focal length. + @param fy Vertical focal length. + @param cx x coordinate of the principal point. + @param cy y coordinate of the principal point. + @param window_size Size of the window. This together with focal length and principal + point determines the field of view. + */ Camera(double fx, double fy, double cx, double cy, const Size &window_size); + /** @overload + @param fov Field of view (horizontal, vertical) + @param window_size Size of the window. Principal point is at the center of the window + by default. + */ explicit Camera(const Vec2d &fov, const Size &window_size); + /** @overload + @param K Intrinsic matrix of the camera. + @param window_size Size of the window. This together with intrinsic matrix determines + the field of view. + */ explicit Camera(const Matx33d &K, const Size &window_size); + /** @overload + @param proj Projection matrix of the camera. + @param window_size Size of the window. This together with projection matrix determines + the field of view. + */ explicit Camera(const Matx44d &proj, const Size &window_size); const Vec2d & getClip() const { return clip_; } @@ -143,8 +184,17 @@ namespace cv const Vec2d& getPrincipalPoint() const { return principal_point_; } const Vec2d& getFocalLength() const { return focal_; } + /** @brief Computes projection matrix using intrinsic parameters of the camera. + + @param proj Output projection matrix. + */ void computeProjectionMatrix(Matx44d &proj) const; + /** @brief Creates a Kinect Camera. + + @param window_size Size of the window. This together with intrinsic matrix of a Kinect Camera + determines the field of view. + */ static Camera KinectCamera(const Size &window_size); private: @@ -157,12 +207,21 @@ namespace cv Vec2d focal_; }; + /** @brief This class represents a keyboard event. + */ class CV_EXPORTS KeyboardEvent { public: enum { NONE = 0, ALT = 1, CTRL = 2, SHIFT = 4 }; enum Action { KEY_UP = 0, KEY_DOWN = 1 }; + /** @brief Constructs a KeyboardEvent. + + @param action Signals if key is pressed or released. + @param symbol Name of the key. + @param code Code of the key. + @param modifiers Signals if alt, ctrl or shift are pressed or their combination. + */ KeyboardEvent(Action action, const String& symbol, unsigned char code, int modifiers); Action action; @@ -171,12 +230,23 @@ namespace cv int modifiers; }; + /** @brief This class represents a mouse event. + */ class CV_EXPORTS MouseEvent { public: enum Type { MouseMove = 1, MouseButtonPress, MouseButtonRelease, MouseScrollDown, MouseScrollUp, MouseDblClick } ; enum MouseButton { NoButton = 0, LeftButton, MiddleButton, RightButton, VScroll } ; + /** @brief Constructs a MouseEvent. + + @param type Type of the event. This can be **MouseMove**, **MouseButtonPress**, + **MouseButtonRelease**, **MouseScrollDown**, **MouseScrollUp**, **MouseDblClick**. + @param button Mouse button. This can be **NoButton**, **LeftButton**, **MiddleButton**, + **RightButton**, **VScroll**. + @param pointer Position of the event. + @param modifiers Signals if alt, ctrl or shift are pressed or their combination. + */ MouseEvent(const Type& type, const MouseButton& button, const Point& pointer, int modifiers); Type type; @@ -184,9 +254,14 @@ namespace cv Point pointer; int modifiers; }; + +//! @} viz + } /* namespace viz */ } /* namespace cv */ +//! @cond IGNORED + ////////////////////////////////////////////////////////////////////////////////////////////////////// /// cv::viz::Color @@ -237,4 +312,6 @@ inline cv::viz::Color cv::viz::Color::amethyst() { return Color(204, 102, inline cv::viz::Color cv::viz::Color::not_set() { return Color(-1, -1, -1); } +//! @endcond + #endif diff --git a/modules/viz/include/opencv2/viz/viz3d.hpp b/modules/viz/include/opencv2/viz/viz3d.hpp index 9917213c17..447004f6f2 100644 --- a/modules/viz/include/opencv2/viz/viz3d.hpp +++ b/modules/viz/include/opencv2/viz/viz3d.hpp @@ -58,6 +58,12 @@ namespace cv { namespace viz { + +//! @addtogroup viz +//! @{ + + /** @brief The Viz3d class represents a 3D visualizer window. This class is implicitly shared. : + */ class CV_EXPORTS Viz3d { public: @@ -65,54 +71,240 @@ namespace cv typedef void (*KeyboardCallback)(const KeyboardEvent&, void*); typedef void (*MouseCallback)(const MouseEvent&, void*); + /** @brief The constructors. + + @param window_name Name of the window. + */ Viz3d(const String& window_name = String()); Viz3d(const Viz3d&); Viz3d& operator=(const Viz3d&); ~Viz3d(); + /** @brief Shows a widget in the window. + + @param id A unique id for the widget. @param widget The widget to be displayed in the window. + @param pose Pose of the widget. + */ void showWidget(const String &id, const Widget &widget, const Affine3d &pose = Affine3d::Identity()); + + /** @brief Removes a widget from the window. + + @param id The id of the widget that will be removed. + */ void removeWidget(const String &id); + + /** @brief Retrieves a widget from the window. + + A widget is implicitly shared; that is, if the returned widget is modified, the changes + will be immediately visible in the window. + + @param id The id of the widget that will be returned. + */ Widget getWidget(const String &id) const; + + /** @brief Removes all widgets from the window. + */ void removeAllWidgets(); + /** @brief Removed all widgets and displays image scaled to whole window area. + + @param image Image to be displayed. + @param window_size Size of Viz3d window. Default value means no change. + */ void showImage(InputArray image, const Size& window_size = Size(-1, -1)); + /** @brief Sets pose of a widget in the window. + + @param id The id of the widget whose pose will be set. @param pose The new pose of the widget. + */ void setWidgetPose(const String &id, const Affine3d &pose); + + /** @brief Updates pose of a widget in the window by pre-multiplying its current pose. + + @param id The id of the widget whose pose will be updated. @param pose The pose that the current + pose of the widget will be pre-multiplied by. + */ void updateWidgetPose(const String &id, const Affine3d &pose); + + /** @brief Returns the current pose of a widget in the window. + + @param id The id of the widget whose pose will be returned. + */ Affine3d getWidgetPose(const String &id) const; + /** @brief Sets the intrinsic parameters of the viewer using Camera. + + @param camera Camera object wrapping intrinsinc parameters. + */ void setCamera(const Camera &camera); + + /** @brief Returns a camera object that contains intrinsic parameters of the current viewer. + */ Camera getCamera() const; + + /** @brief Returns the current pose of the viewer. + */ Affine3d getViewerPose(); + + /** @brief Sets pose of the viewer. + + @param pose The new pose of the viewer. + */ void setViewerPose(const Affine3d &pose); + /** @brief Resets camera viewpoint to a 3D widget in the scene. + + @param id Id of a 3D widget. + */ void resetCameraViewpoint(const String &id); + + /** @brief Resets camera. + */ void resetCamera(); + /** @brief Transforms a point in world coordinate system to window coordinate system. + + @param pt Point in world coordinate system. + @param window_coord Output point in window coordinate system. + */ void convertToWindowCoordinates(const Point3d &pt, Point3d &window_coord); + + /** @brief Transforms a point in window coordinate system to a 3D ray in world coordinate system. + + @param window_coord Point in window coordinate system. @param origin Output origin of the ray. + @param direction Output direction of the ray. + */ void converTo3DRay(const Point3d &window_coord, Point3d &origin, Vec3d &direction); + /** @brief Returns the current size of the window. + */ Size getWindowSize() const; + /** @brief Sets the size of the window. + + @param window_size New size of the window. + */ void setWindowSize(const Size &window_size); + + /** @brief Returns the name of the window which has been set in the constructor. + */ String getWindowName() const; + + /** @brief Saves screenshot of the current scene. + + @param file Name of the file. + */ void saveScreenshot(const String &file); + + /** @brief Sets the position of the window in the screen. + + @param window_position coordinates of the window + */ void setWindowPosition(const Point& window_position); + + /** @brief Sets or unsets full-screen rendering mode. + + @param mode If true, window will use full-screen mode. + */ void setFullScreen(bool mode = true); + + /** @brief Sets background color. + */ void setBackgroundColor(const Color& color = Color::black(), const Color& color2 = Color::not_set()); void setBackgroundTexture(InputArray image = noArray()); void setBackgroundMeshLab(); + /** @brief The window renders and starts the event loop. + */ void spin(); + + /** @brief Starts the event loop for a given time. + + @param time Amount of time in milliseconds for the event loop to keep running. + @param force_redraw If true, window renders. + */ void spinOnce(int time = 1, bool force_redraw = false); + + /** @brief Returns whether the event loop has been stopped. + */ bool wasStopped() const; void close(); + /** @brief Sets keyboard handler. + + @param callback Keyboard callback (void (\*KeyboardCallbackFunction(const + KeyboardEvent&, void\*)). + @param cookie The optional parameter passed to the callback. + */ void registerKeyboardCallback(KeyboardCallback callback, void* cookie = 0); + + /** @brief Sets mouse handler. + + @param callback Mouse callback (void (\*MouseCallback)(const MouseEvent&, void\*)). + @param cookie The optional parameter passed to the callback. + */ void registerMouseCallback(MouseCallback callback, void* cookie = 0); + /** @brief Sets rendering property of a widget. + + @param id Id of the widget. + @param property Property that will be modified. + @param value The new value of the property. + + **Rendering property** can be one of the following: + - **POINT_SIZE** + - **OPACITY** + - **LINE_WIDTH** + - **FONT_SIZE** + - + **REPRESENTATION**: Expected values are + - **REPRESENTATION_POINTS** + - **REPRESENTATION_WIREFRAME** + - **REPRESENTATION_SURFACE** + - + **IMMEDIATE_RENDERING**: + - Turn on immediate rendering by setting the value to 1. + - Turn off immediate rendering by setting the value to 0. + - + **SHADING**: Expected values are + - **SHADING_FLAT** + - **SHADING_GOURAUD** + - **SHADING_PHONG** + */ void setRenderingProperty(const String &id, int property, double value); + /** @brief Returns rendering property of a widget. + + @param id Id of the widget. + @param property Property. + + **Rendering property** can be one of the following: + - **POINT_SIZE** + - **OPACITY** + - **LINE_WIDTH** + - **FONT_SIZE** + - + **REPRESENTATION**: Expected values are + - **REPRESENTATION_POINTS** + - **REPRESENTATION_WIREFRAME** + - **REPRESENTATION_SURFACE** + - + **IMMEDIATE_RENDERING**: + - Turn on immediate rendering by setting the value to 1. + - Turn off immediate rendering by setting the value to 0. + - + **SHADING**: Expected values are + - **SHADING_FLAT** + - **SHADING_GOURAUD** + - **SHADING_PHONG** + */ double getRenderingProperty(const String &id, int property); + /** @brief Sets geometry representation of the widgets to surface, wireframe or points. + + @param representation Geometry representation which can be one of the following: + - **REPRESENTATION_POINTS** + - **REPRESENTATION_WIREFRAME** + - **REPRESENTATION_SURFACE** + */ void setRepresentation(int representation); void setGlobalWarnings(bool enabled = false); @@ -127,6 +319,8 @@ namespace cv friend class VizStorage; }; +//! @} + } /* namespace viz */ } /* namespace cv */ diff --git a/modules/viz/include/opencv2/viz/vizcore.hpp b/modules/viz/include/opencv2/viz/vizcore.hpp index 0fde95b2f1..76f1ba2063 100644 --- a/modules/viz/include/opencv2/viz/vizcore.hpp +++ b/modules/viz/include/opencv2/viz/vizcore.hpp @@ -54,13 +54,48 @@ namespace cv { namespace viz { - //! takes coordiante frame data and builds transfrom to global coordinate frame + +//! @addtogroup viz +//! @{ + + /** @brief Takes coordinate frame data and builds transform to global coordinate frame. + + @param axis_x X axis vector in global coordinate frame. @param axis_y Y axis vector in global + coordinate frame. @param axis_z Z axis vector in global coordinate frame. @param origin Origin of + the coordinate frame in global coordinate frame. + + This function returns affine transform that describes transformation between global coordinate frame + and a given coordinate frame. + */ CV_EXPORTS Affine3d makeTransformToGlobal(const Vec3d& axis_x, const Vec3d& axis_y, const Vec3d& axis_z, const Vec3d& origin = Vec3d::all(0)); - //! constructs camera pose from position, focal_point and up_vector (see gluLookAt() for more infromation) + /** @brief Constructs camera pose from position, focal_point and up_vector (see gluLookAt() for more + infromation). + + @param position Position of the camera in global coordinate frame. @param focal_point Focal point + of the camera in global coordinate frame. @param y_dir Up vector of the camera in global + coordinate frame. + + This function returns pose of the camera in global coordinate frame. + */ CV_EXPORTS Affine3d makeCameraPose(const Vec3d& position, const Vec3d& focal_point, const Vec3d& y_dir); - //! retrieves a window by its name. If no window with such name, then it creates new. + /** @brief Retrieves a window by its name. + + @param window_name Name of the window that is to be retrieved. + + This function returns a Viz3d object with the given name. + + @note If the window with that name already exists, that window is returned. Otherwise, new window is + created with the given name, and it is returned. + + @note Window names are automatically prefixed by "Viz - " if it is not done by the user. + @code + /// window and window_2 are the same windows. + viz::Viz3d window = viz::getWindowByName("myWindow"); + viz::Viz3d window_2 = viz::getWindowByName("Viz - myWindow"); + @endcode + */ CV_EXPORTS Viz3d getWindowByName(const String &window_name); //! Unregisters all Viz windows from internal database. After it 'getWindowByName()' will create new windows instead getting existing from the database. @@ -69,25 +104,37 @@ namespace cv //! Displays image in specified window CV_EXPORTS Viz3d imshow(const String& window_name, InputArray image, const Size& window_size = Size(-1, -1)); - //! checks float value for Nan + /** @brief Checks **float/double** value for nan. + + @param x return true if nan. + */ inline bool isNan(float x) { unsigned int *u = reinterpret_cast(&x); return ((u[0] & 0x7f800000) == 0x7f800000) && (u[0] & 0x007fffff); } - //! checks double value for Nan + /** @brief Checks **float/double** value for nan. + + @param x return true if nan. + */ inline bool isNan(double x) { unsigned int *u = reinterpret_cast(&x); return (u[1] & 0x7ff00000) == 0x7ff00000 && (u[0] != 0 || (u[1] & 0x000fffff) != 0); } - //! checks vectors for Nans + /** @brief Checks **float/double** value for nan. + + @param v return true if **any** of the elements of the vector is *nan*. + */ template inline bool isNan(const Vec<_Tp, cn>& v) { return isNan(v.val[0]) || isNan(v.val[1]) || isNan(v.val[2]); } - //! checks point for Nans + /** @brief Checks **float/double** value for nan. + + @param p return true if **any** of the elements of the point is *nan*. + */ template inline bool isNan(const Point3_<_Tp>& p) { return isNan(p.x) || isNan(p.y) || isNan(p.z); } @@ -121,6 +168,8 @@ namespace cv CV_EXPORTS void computeNormals(const Mesh& mesh, OutputArray normals); +//! @} + } /* namespace viz */ } /* namespace cv */ diff --git a/modules/viz/include/opencv2/viz/widget_accessor.hpp b/modules/viz/include/opencv2/viz/widget_accessor.hpp index 29352a214b..ccc5b28b24 100644 --- a/modules/viz/include/opencv2/viz/widget_accessor.hpp +++ b/modules/viz/include/opencv2/viz/widget_accessor.hpp @@ -54,15 +54,35 @@ namespace cv { namespace viz { + +//! @addtogroup viz_widget +//! @{ + class Widget; - //The class is only that depends on VTK in its interface. - //It is indended for those users who want to develop own widgets system using VTK library API. + /** @brief This class is for users who want to develop their own widgets using VTK library API. : + */ struct CV_EXPORTS WidgetAccessor { + /** @brief Returns vtkProp of a given widget. + + @param widget Widget whose vtkProp is to be returned. + + @note vtkProp has to be down cast appropriately to be modified. + @code + vtkActor * actor = vtkActor::SafeDownCast(viz::WidgetAccessor::getProp(widget)); + @endcode + */ static vtkSmartPointer getProp(const Widget &widget); + /** @brief Sets vtkProp of a given widget. + + @param widget Widget whose vtkProp is to be set. @param prop A vtkProp. + */ static void setProp(Widget &widget, vtkSmartPointer prop); }; + +//! @} + } } diff --git a/modules/viz/include/opencv2/viz/widgets.hpp b/modules/viz/include/opencv2/viz/widgets.hpp index 611db54499..b4699ebb5f 100644 --- a/modules/viz/include/opencv2/viz/widgets.hpp +++ b/modules/viz/include/opencv2/viz/widgets.hpp @@ -52,6 +52,10 @@ namespace cv { namespace viz { + +//! @addtogroup viz_widget +//! @{ + ///////////////////////////////////////////////////////////////////////////// /// Widget rendering properties enum RenderingProperties @@ -80,7 +84,9 @@ namespace cv }; ///////////////////////////////////////////////////////////////////////////// - /// The base class for all widgets + + /** @brief Base class of all widgets. Widget is implicitly shared. : + */ class CV_EXPORTS Widget { public: @@ -89,14 +95,75 @@ namespace cv Widget& operator=(const Widget& other); ~Widget(); - //! Create a widget directly from ply file + /** @brief Creates a widget from ply file. + + @param file_name Ply file name. + */ static Widget fromPlyFile(const String &file_name); - //! Rendering properties of this particular widget + /** @brief Sets rendering property of the widget. + + @param property Property that will be modified. + @param value The new value of the property. + + **Rendering property** can be one of the following: + - **POINT_SIZE** + - **OPACITY** + - **LINE_WIDTH** + - **FONT_SIZE** + - + **REPRESENTATION**: Expected values are + - **REPRESENTATION_POINTS** + - **REPRESENTATION_WIREFRAME** + - **REPRESENTATION_SURFACE** + - + **IMMEDIATE_RENDERING**: + - Turn on immediate rendering by setting the value to 1. + - Turn off immediate rendering by setting the value to 0. + - + **SHADING**: Expected values are + - **SHADING_FLAT** + - **SHADING_GOURAUD** + - **SHADING_PHONG** + */ void setRenderingProperty(int property, double value); + /** @brief Returns rendering property of the widget. + + @param property Property. + + **Rendering property** can be one of the following: + - **POINT_SIZE** + - **OPACITY** + - **LINE_WIDTH** + - **FONT_SIZE** + - + **REPRESENTATION**: Expected values are + : - **REPRESENTATION_POINTS** + - **REPRESENTATION_WIREFRAME** + - **REPRESENTATION_SURFACE** + - + **IMMEDIATE_RENDERING**: + : - Turn on immediate rendering by setting the value to 1. + - Turn off immediate rendering by setting the value to 0. + - + **SHADING**: Expected values are + : - **SHADING_FLAT** + - **SHADING_GOURAUD** + - **SHADING_PHONG** + */ double getRenderingProperty(int property) const; - //! Casting between widgets + /** @brief Casts a widget to another. + + @code + // Create a sphere widget + viz::WSphere sw(Point3f(0.0f,0.0f,0.0f), 0.5f); + // Cast sphere widget to cloud widget + viz::WCloud cw = sw.cast(); + @endcode + + @note 3D Widgets can only be cast to 3D Widgets. 2D Widgets can only be cast to 2D Widgets. + */ template _W cast(); private: class Impl; @@ -105,161 +172,356 @@ namespace cv }; ///////////////////////////////////////////////////////////////////////////// - /// The base class for all 3D widgets + + /** @brief Base class of all 3D widgets. + */ class CV_EXPORTS Widget3D : public Widget { public: Widget3D() {} - //! widget position manipulation, i.e. place where it is rendered + /** @brief Sets pose of the widget. + + @param pose The new pose of the widget. + */ void setPose(const Affine3d &pose); + /** @brief Updates pose of the widget by pre-multiplying its current pose. + + @param pose The pose that the current pose of the widget will be pre-multiplied by. + */ void updatePose(const Affine3d &pose); + /** @brief Returns the current pose of the widget. + */ Affine3d getPose() const; - //! update internal widget data, i.e. points, normals, etc. + /** @brief Transforms internal widget data (i.e. points, normals) using the given transform. + + @param transform Specified transformation to apply. + */ void applyTransform(const Affine3d &transform); + /** @brief Sets the color of the widget. + + @param color color of type Color + */ void setColor(const Color &color); }; ///////////////////////////////////////////////////////////////////////////// - /// The base class for all 2D widgets + + /** @brief Base class of all 2D widgets. + */ class CV_EXPORTS Widget2D : public Widget { public: Widget2D() {} + /** @brief Sets the color of the widget. + + @param color color of type Color + */ void setColor(const Color &color); }; ///////////////////////////////////////////////////////////////////////////// /// Simple widgets + /** @brief This 3D Widget defines a finite line. + */ class CV_EXPORTS WLine : public Widget3D { public: + /** @brief Constructs a WLine. + + @param pt1 Start point of the line. + @param pt2 End point of the line. + @param color Color of the line. + */ WLine(const Point3d &pt1, const Point3d &pt2, const Color &color = Color::white()); }; + /** @brief This 3D Widget defines a finite plane. + */ class CV_EXPORTS WPlane : public Widget3D { public: - //! created default plane with center point at origin and normal oriented along z-axis + /** @brief Constructs a default plane with center point at origin and normal oriented along z-axis. + + @param size Size of the plane + @param color Color of the plane. + */ WPlane(const Size2d& size = Size2d(1.0, 1.0), const Color &color = Color::white()); - //! repositioned plane + /** @brief Constructs a repositioned plane + + @param center Center of the plane + @param normal Plane normal orientation + @param new_yaxis Up-vector. New orientation of plane y-axis. + @param size + @param color Color of the plane. + */ WPlane(const Point3d& center, const Vec3d& normal, const Vec3d& new_yaxis, const Size2d& size = Size2d(1.0, 1.0), const Color &color = Color::white()); }; + /** @brief This 3D Widget defines a sphere. : + */ class CV_EXPORTS WSphere : public Widget3D { public: + /** @brief Constructs a WSphere. + + @param center Center of the sphere. + @param radius Radius of the sphere. + @param sphere_resolution Resolution of the sphere. + @param color Color of the sphere. + */ WSphere(const cv::Point3d ¢er, double radius, int sphere_resolution = 10, const Color &color = Color::white()); }; + /** @brief This 3D Widget defines an arrow. + */ class CV_EXPORTS WArrow : public Widget3D { public: + /** @brief Constructs an WArrow. + + @param pt1 Start point of the arrow. + @param pt2 End point of the arrow. + @param thickness Thickness of the arrow. Thickness of arrow head is also adjusted + accordingly. + @param color Color of the arrow. + + Arrow head is located at the end point of the arrow. + */ WArrow(const Point3d& pt1, const Point3d& pt2, double thickness = 0.03, const Color &color = Color::white()); }; + /** @brief This 3D Widget defines a circle. + */ class CV_EXPORTS WCircle : public Widget3D { public: - //! creates default planar circle centred at origin with plane normal along z-axis + /** @brief Constructs default planar circle centred at origin with plane normal along z-axis + + @param radius Radius of the circle. + @param thickness Thickness of the circle. + @param color Color of the circle. + */ WCircle(double radius, double thickness = 0.01, const Color &color = Color::white()); - //! creates repositioned circle + /** @brief Constructs repositioned planar circle. + + @param radius Radius of the circle. + @param center Center of the circle. + @param normal Normal of the plane in which the circle lies. + @param thickness Thickness of the circle. + @param color Color of the circle. + */ WCircle(double radius, const Point3d& center, const Vec3d& normal, double thickness = 0.01, const Color &color = Color::white()); }; + /** @brief This 3D Widget defines a cone. : + */ class CV_EXPORTS WCone : public Widget3D { public: - //! create default cone, oriented along x-axis with center of its base located at origin + /** @brief Constructs default cone oriented along x-axis with center of its base located at origin + + @param length Length of the cone. + @param radius Radius of the cone. + @param resolution Resolution of the cone. + @param color Color of the cone. + */ WCone(double length, double radius, int resolution = 6.0, const Color &color = Color::white()); - //! creates repositioned cone + /** @brief Constructs repositioned planar cone. + + @param radius Radius of the cone. + @param center Center of the cone base. + @param tip Tip of the cone. + @param resolution Resolution of the cone. + @param color Color of the cone. + + */ WCone(double radius, const Point3d& center, const Point3d& tip, int resolution = 6.0, const Color &color = Color::white()); }; + /** @brief This 3D Widget defines a cylinder. : + */ class CV_EXPORTS WCylinder : public Widget3D { public: + /** @brief Constructs a WCylinder. + + @param axis_point1 A point1 on the axis of the cylinder. + @param axis_point2 A point2 on the axis of the cylinder. + @param radius Radius of the cylinder. + @param numsides Resolution of the cylinder. + @param color Color of the cylinder. + */ WCylinder(const Point3d& axis_point1, const Point3d& axis_point2, double radius, int numsides = 30, const Color &color = Color::white()); }; + /** @brief This 3D Widget defines a cube. + */ class CV_EXPORTS WCube : public Widget3D { public: + /** @brief Constructs a WCube. + + @param min_point Specifies minimum point of the bounding box. + @param max_point Specifies maximum point of the bounding box. + @param wire_frame If true, cube is represented as wireframe. + @param color Color of the cube. + + ![Cube Widget](images/cube_widget.png) + */ WCube(const Point3d& min_point = Vec3d::all(-0.5), const Point3d& max_point = Vec3d::all(0.5), bool wire_frame = true, const Color &color = Color::white()); }; + /** @brief This 3D Widget defines a poly line. : + */ class CV_EXPORTS WPolyLine : public Widget3D { public: WPolyLine(InputArray points, InputArray colors); + /** @brief Constructs a WPolyLine. + + @param points Point set. + @param color Color of the poly line. + */ WPolyLine(InputArray points, const Color &color = Color::white()); }; ///////////////////////////////////////////////////////////////////////////// /// Text and image widgets + /** @brief This 2D Widget represents text overlay. + */ class CV_EXPORTS WText : public Widget2D { public: + /** @brief Constructs a WText. + + @param text Text content of the widget. + @param pos Position of the text. + @param font_size Font size. + @param color Color of the text. + */ WText(const String &text, const Point &pos, int font_size = 20, const Color &color = Color::white()); + /** @brief Sets the text content of the widget. + + @param text Text content of the widget. + */ void setText(const String &text); + /** @brief Returns the current text content of the widget. + */ String getText() const; }; + /** @brief This 3D Widget represents 3D text. The text always faces the camera. + */ class CV_EXPORTS WText3D : public Widget3D { public: - //! creates text label in 3D. If face_camera = false, text plane normal is oriented along z-axis. Use widget pose to orient it properly + /** @brief Constructs a WText3D. + + @param text Text content of the widget. + @param position Position of the text. + @param text_scale Size of the text. + @param face_camera If true, text always faces the camera. + @param color Color of the text. + */ WText3D(const String &text, const Point3d &position, double text_scale = 1., bool face_camera = true, const Color &color = Color::white()); + /** @brief Sets the text content of the widget. + + @param text Text content of the widget. + + */ void setText(const String &text); + /** @brief Returns the current text content of the widget. + */ String getText() const; }; + /** @brief This 2D Widget represents an image overlay. : + */ class CV_EXPORTS WImageOverlay : public Widget2D { public: + /** @brief Constructs an WImageOverlay. + + @param image BGR or Gray-Scale image. + @param rect Image is scaled and positioned based on rect. + */ WImageOverlay(InputArray image, const Rect &rect); + /** @brief Sets the image content of the widget. + + @param image BGR or Gray-Scale image. + */ void setImage(InputArray image); }; + /** @brief This 3D Widget represents an image in 3D space. : + */ class CV_EXPORTS WImage3D : public Widget3D { public: - //! Creates 3D image in a plane centered at the origin with normal orientaion along z-axis, - //! image x- and y-axes are oriented along x- and y-axes of 3d world + /** @brief Constructs an WImage3D. + + @param image BGR or Gray-Scale image. + @param size Size of the image. + */ WImage3D(InputArray image, const Size2d &size); - //! Creates 3D image at a given position, pointing in the direction of the normal, and having the up_vector orientation + /** @brief Constructs an WImage3D. + + @param image BGR or Gray-Scale image. + @param size Size of the image. + @param center Position of the image. + @param normal Normal of the plane that represents the image. + @param up_vector Determines orientation of the image. + */ WImage3D(InputArray image, const Size2d &size, const Vec3d ¢er, const Vec3d &normal, const Vec3d &up_vector); + /** @brief Sets the image content of the widget. + + @param image BGR or Gray-Scale image. + */ void setImage(InputArray image); }; ///////////////////////////////////////////////////////////////////////////// /// Compond widgets + /** @brief This 3D Widget represents a coordinate system. : + */ class CV_EXPORTS WCoordinateSystem : public Widget3D { public: + /** @brief Constructs a WCoordinateSystem. + + @param scale Determines the size of the axes. + */ WCoordinateSystem(double scale = 1.0); }; + /** @brief This 3D Widget defines a grid. : + */ class CV_EXPORTS WGrid : public Widget3D { public: - //! Creates grid at the origin and normal oriented along z-axis + /** @brief Constructs a WGrid. + + @param cells Number of cell columns and rows, respectively. + @param cells_spacing Size of each cell, respectively. + @param color Color of the grid. + */ WGrid(const Vec2i &cells = Vec2i::all(10), const Vec2d &cells_spacing = Vec2d::all(1.0), const Color &color = Color::white()); //! Creates repositioned grid @@ -267,47 +529,134 @@ namespace cv const Vec2i &cells = Vec2i::all(10), const Vec2d &cells_spacing = Vec2d::all(1.0), const Color &color = Color::white()); }; + /** @brief This 3D Widget represents camera position in a scene by its axes or viewing frustum. : + */ class CV_EXPORTS WCameraPosition : public Widget3D { public: - //! Creates camera coordinate frame (axes) at the origin + /** @brief Creates camera coordinate frame at the origin. + + ![Camera coordinate frame](images/cpw1.png) + */ WCameraPosition(double scale = 1.0); - //! Creates frustum based on the intrinsic marix K at the origin + /** @brief Display the viewing frustum + @param K Intrinsic matrix of the camera. + @param scale Scale of the frustum. + @param color Color of the frustum. + + Creates viewing frustum of the camera based on its intrinsic matrix K. + + ![Camera viewing frustum](images/cpw2.png) + */ WCameraPosition(const Matx33d &K, double scale = 1.0, const Color &color = Color::white()); - //! Creates frustum based on the field of view at the origin + /** @brief Display the viewing frustum + @param fov Field of view of the camera (horizontal, vertical). + @param scale Scale of the frustum. + @param color Color of the frustum. + + Creates viewing frustum of the camera based on its field of view fov. + + ![Camera viewing frustum](images/cpw2.png) + */ WCameraPosition(const Vec2d &fov, double scale = 1.0, const Color &color = Color::white()); - //! Creates frustum and display given image at the far plane + /** @brief Display image on the far plane of the viewing frustum + + @param K Intrinsic matrix of the camera. + @param image BGR or Gray-Scale image that is going to be displayed on the far plane of the frustum. + @param scale Scale of the frustum and image. + @param color Color of the frustum. + + Creates viewing frustum of the camera based on its intrinsic matrix K, and displays image on + the far end plane. + + ![Camera viewing frustum with image](images/cpw3.png) + */ WCameraPosition(const Matx33d &K, InputArray image, double scale = 1.0, const Color &color = Color::white()); - //! Creates frustum and display given image at the far plane + /** @brief Display image on the far plane of the viewing frustum + + @param fov Field of view of the camera (horizontal, vertical). + @param image BGR or Gray-Scale image that is going to be displayed on the far plane of the frustum. + @param scale Scale of the frustum and image. + @param color Color of the frustum. + + Creates viewing frustum of the camera based on its intrinsic matrix K, and displays image on + the far end plane. + + ![Camera viewing frustum with image](images/cpw3.png) + */ WCameraPosition(const Vec2d &fov, InputArray image, double scale = 1.0, const Color &color = Color::white()); }; ///////////////////////////////////////////////////////////////////////////// /// Trajectories + /** @brief This 3D Widget represents a trajectory. : + */ class CV_EXPORTS WTrajectory : public Widget3D { public: enum {FRAMES = 1, PATH = 2, BOTH = FRAMES + PATH }; - //! Takes vector> and displays trajectory of the given path either by coordinate frames or polyline + /** @brief Constructs a WTrajectory. + + @param path List of poses on a trajectory. Takes std::vector\\> with T == [float | double] + @param display_mode Display mode. This can be PATH, FRAMES, and BOTH. + @param scale Scale of the frames. Polyline is not affected. + @param color Color of the polyline that represents path. + + Frames are not affected. + Displays trajectory of the given path as follows: + - PATH : Displays a poly line that represents the path. + - FRAMES : Displays coordinate frames at each pose. + - PATH & FRAMES : Displays both poly line and coordinate frames. + */ WTrajectory(InputArray path, int display_mode = WTrajectory::PATH, double scale = 1.0, const Color &color = Color::white()); }; + /** @brief This 3D Widget represents a trajectory. : + */ class CV_EXPORTS WTrajectoryFrustums : public Widget3D { public: - //! Takes vector> and displays trajectory of the given path by frustums + /** @brief Constructs a WTrajectoryFrustums. + + @param path List of poses on a trajectory. Takes std::vector\\> with T == [float | double] + @param K Intrinsic matrix of the camera. + @param scale Scale of the frustums. + @param color Color of the frustums. + + Displays frustums at each pose of the trajectory. + */ WTrajectoryFrustums(InputArray path, const Matx33d &K, double scale = 1., const Color &color = Color::white()); - //! Takes vector> and displays trajectory of the given path by frustums + /** @brief Constructs a WTrajectoryFrustums. + + @param path List of poses on a trajectory. Takes std::vector\\> with T == [float | double] + @param fov Field of view of the camera (horizontal, vertical). + @param scale Scale of the frustums. + @param color Color of the frustums. + + Displays frustums at each pose of the trajectory. + */ WTrajectoryFrustums(InputArray path, const Vec2d &fov, double scale = 1., const Color &color = Color::white()); }; + /** @brief This 3D Widget represents a trajectory using spheres and lines + + where spheres represent the positions of the camera, and lines represent the direction from + previous position to the current. : + */ class CV_EXPORTS WTrajectorySpheres: public Widget3D { public: - //! Takes vector> and displays trajectory of the given path + /** @brief Constructs a WTrajectorySpheres. + + @param path List of poses on a trajectory. Takes std::vector\\> with T == [float | double] + @param line_length Max length of the lines which point to previous position + @param radius Radius of the spheres. + @param from Color for first sphere. + @param to Color for last sphere. Intermediate spheres will have interpolated color. + */ WTrajectorySpheres(InputArray path, double line_length = 0.05, double radius = 0.007, const Color &from = Color::red(), const Color &to = Color::white()); }; @@ -315,19 +664,47 @@ namespace cv ///////////////////////////////////////////////////////////////////////////// /// Clouds + /** @brief This 3D Widget defines a point cloud. : + + @note In case there are four channels in the cloud, fourth channel is ignored. + */ class CV_EXPORTS WCloud: public Widget3D { public: - //! Each point in cloud is mapped to a color in colors + /** @brief Constructs a WCloud. + + @param cloud Set of points which can be of type: CV_32FC3, CV_32FC4, CV_64FC3, CV_64FC4. + @param colors Set of colors. It has to be of the same size with cloud. + + Points in the cloud belong to mask when they are set to (NaN, NaN, NaN). + */ WCloud(InputArray cloud, InputArray colors); - //! All points in cloud have the same color + /** @brief Constructs a WCloud. + @param cloud Set of points which can be of type: CV_32FC3, CV_32FC4, CV_64FC3, CV_64FC4. + @param color A single Color for the whole cloud. + + Points in the cloud belong to mask when they are set to (NaN, NaN, NaN). + */ WCloud(InputArray cloud, const Color &color = Color::white()); - //! Each point in cloud is mapped to a color in colors, normals are used for shading + /** @brief Constructs a WCloud. + @param cloud Set of points which can be of type: CV_32FC3, CV_32FC4, CV_64FC3, CV_64FC4. + @param colors Set of colors. It has to be of the same size with cloud. + @param normals Normals for each point in cloud. Size and type should match with the cloud parameter. + + Points in the cloud belong to mask when they are set to (NaN, NaN, NaN). + */ WCloud(InputArray cloud, InputArray colors, InputArray normals); - //! All points in cloud have the same color, normals are used for shading + /** @brief Constructs a WCloud. + @param cloud Set of points which can be of type: CV_32FC3, CV_32FC4, CV_64FC3, CV_64FC4. + @param color A single Color for the whole cloud. + @param normals Normals for each point in cloud. + + Size and type should match with the cloud parameter. + Points in the cloud belong to mask when they are set to (NaN, NaN, NaN). + */ WCloud(InputArray cloud, const Color &color, InputArray normals); }; @@ -344,25 +721,61 @@ namespace cv WPaintedCloud(InputArray cloud, const Point3d& p1, const Point3d& p2, const Color& c1, const Color c2); }; + /** @brief This 3D Widget defines a collection of clouds. : + @note In case there are four channels in the cloud, fourth channel is ignored. + */ class CV_EXPORTS WCloudCollection : public Widget3D { public: WCloudCollection(); - //! Each point in cloud is mapped to a color in colors + /** @brief Adds a cloud to the collection. + + @param cloud Point set which can be of type: CV_32FC3, CV_32FC4, CV_64FC3, CV_64FC4. + @param colors Set of colors. It has to be of the same size with cloud. + @param pose Pose of the cloud. Points in the cloud belong to mask when they are set to (NaN, NaN, NaN). + */ void addCloud(InputArray cloud, InputArray colors, const Affine3d &pose = Affine3d::Identity()); - //! All points in cloud have the same color + /** @brief Adds a cloud to the collection. + + @param cloud Point set which can be of type: CV_32FC3, CV_32FC4, CV_64FC3, CV_64FC4. + @param color A single Color for the whole cloud. + @param pose Pose of the cloud. Points in the cloud belong to mask when they are set to (NaN, NaN, NaN). + */ void addCloud(InputArray cloud, const Color &color = Color::white(), const Affine3d &pose = Affine3d::Identity()); - //! Repacks internal structure to single cloud + /** @brief Finalizes cloud data by repacking to single cloud. + + Useful for large cloud collections to reduce memory usage + */ void finalize(); }; + /** @brief This 3D Widget represents normals of a point cloud. : + */ class CV_EXPORTS WCloudNormals : public Widget3D { public: + /** @brief Constructs a WCloudNormals. + + @param cloud Point set which can be of type: CV_32FC3, CV_32FC4, CV_64FC3, CV_64FC4. + @param normals A set of normals that has to be of same type with cloud. + @param level Display only every level th normal. + @param scale Scale of the arrows that represent normals. + @param color Color of the arrows that represent normals. + + @note In case there are four channels in the cloud, fourth channel is ignored. + */ WCloudNormals(InputArray cloud, InputArray normals, int level = 64, double scale = 0.1, const Color &color = Color::white()); }; + /** @brief Constructs a WMesh. + + @param mesh Mesh object that will be displayed. + @param cloud Points of the mesh object. + @param polygons Points of the mesh object. + @param colors Point colors. + @param normals Point normals. + */ class CV_EXPORTS WMesh : public Widget3D { public: @@ -370,6 +783,13 @@ namespace cv WMesh(InputArray cloud, InputArray polygons, InputArray colors = noArray(), InputArray normals = noArray()); }; + /** @brief This class allows to merge several widgets to single one. + + It has quite limited functionality and can't merge widgets with different attributes. For + instance, if widgetA has color array and widgetB has only global color defined, then result + of merge won't have color at all. The class is suitable for merging large amount of similar + widgets. : + */ class CV_EXPORTS WWidgetMerger : public Widget3D { public: @@ -413,6 +833,8 @@ namespace cv template<> CV_EXPORTS WMesh Widget::cast(); template<> CV_EXPORTS WWidgetMerger Widget::cast(); +//! @} + } /* namespace viz */ } /* namespace cv */