From c5a698fb54d55c87d667dca2eb52aa9856988ad0 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Wed, 19 Nov 2014 16:13:41 +0300 Subject: [PATCH] Doxygen documentation: calib3d and features2d modules --- modules/calib3d/include/opencv2/calib3d.hpp | 1459 ++++++++++++++++- .../include/opencv2/calib3d/calib3d_c.h | 6 + .../features2d/include/opencv2/features2d.hpp | 681 ++++++-- 3 files changed, 1980 insertions(+), 166 deletions(-) diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 4f405afc66..e2e0dd0416 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -48,9 +48,140 @@ #include "opencv2/features2d.hpp" #include "opencv2/core/affine.hpp" +/** + @defgroup calib3d Camera Calibration and 3D Reconstruction + +The functions in this section use a so-called pinhole camera model. In this model, a scene view is +formed by projecting 3D points into the image plane using a perspective transformation. + +\f[s \; m' = A [R|t] M'\f] + +or + +\f[s \vecthree{u}{v}{1} = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1} +\begin{bmatrix} +r_{11} & r_{12} & r_{13} & t_1 \\ +r_{21} & r_{22} & r_{23} & t_2 \\ +r_{31} & r_{32} & r_{33} & t_3 +\end{bmatrix} +\begin{bmatrix} +X \\ +Y \\ +Z \\ +1 +\end{bmatrix}\f] + +where: + +- \f$(X, Y, Z)\f$ are the coordinates of a 3D point in the world coordinate space +- \f$(u, v)\f$ are the coordinates of the projection point in pixels +- \f$A\f$ is a camera matrix, or a matrix of intrinsic parameters +- \f$(cx, cy)\f$ is a principal point that is usually at the image center +- \f$fx, fy\f$ are the focal lengths expressed in pixel units. + +Thus, if an image from the camera is scaled by a factor, all of these parameters should be scaled +(multiplied/divided, respectively) by the same factor. The matrix of intrinsic parameters does not +depend on the scene viewed. So, once estimated, it can be re-used as long as the focal length is +fixed (in case of zoom lens). The joint rotation-translation matrix \f$[R|t]\f$ is called a matrix of +extrinsic parameters. It is used to describe the camera motion around a static scene, or vice versa, +rigid motion of an object in front of a still camera. That is, \f$[R|t]\f$ translates coordinates of a +point \f$(X, Y, Z)\f$ to a coordinate system, fixed with respect to the camera. The transformation above +is equivalent to the following (when \f$z \ne 0\f$ ): + +\f[\begin{array}{l} +\vecthree{x}{y}{z} = R \vecthree{X}{Y}{Z} + t \\ +x' = x/z \\ +y' = y/z \\ +u = f_x*x' + c_x \\ +v = f_y*y' + c_y +\end{array}\f] + +Real lenses usually have some distortion, mostly radial distortion and slight tangential distortion. +So, the above model is extended as: + +\f[\begin{array}{l} \vecthree{x}{y}{z} = R \vecthree{X}{Y}{Z} + t \\ x' = x/z \\ y' = y/z \\ x'' = x' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + 2 p_1 x' y' + p_2(r^2 + 2 x'^2) + s_1 r^2 + s_2 r^4 \\ y'' = y' \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' + s_1 r^2 + s_2 r^4 \\ \text{where} \quad r^2 = x'^2 + y'^2 \\ u = f_x*x'' + c_x \\ v = f_y*y'' + c_y \end{array}\f] + +\f$k_1\f$, \f$k_2\f$, \f$k_3\f$, \f$k_4\f$, \f$k_5\f$, and \f$k_6\f$ are radial distortion coefficients. \f$p_1\f$ and \f$p_2\f$ are +tangential distortion coefficients. \f$s_1\f$, \f$s_2\f$, \f$s_3\f$, and \f$s_4\f$, are the thin prism distortion +coefficients. Higher-order coefficients are not considered in OpenCV. In the functions below the +coefficients are passed or returned as + +\f[(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f] + +vector. That is, if the vector contains four elements, it means that \f$k_3=0\f$ . The distortion +coefficients do not depend on the scene viewed. Thus, they also belong to the intrinsic camera +parameters. And they remain the same regardless of the captured image resolution. If, for example, a +camera has been calibrated on images of 320 x 240 resolution, absolutely the same distortion +coefficients can be used for 640 x 480 images from the same camera while \f$f_x\f$, \f$f_y\f$, \f$c_x\f$, and +\f$c_y\f$ need to be scaled appropriately. + +The functions below use the above model to do the following: + +- Project 3D points to the image plane given intrinsic and extrinsic parameters. +- Compute extrinsic parameters given intrinsic parameters, a few 3D points, and their +projections. +- Estimate intrinsic and extrinsic camera parameters from several views of a known calibration +pattern (every view is described by several 3D-2D point correspondences). +- Estimate the relative position and orientation of the stereo camera "heads" and compute the +*rectification* transformation that makes the camera optical axes parallel. + +@note + - A calibration sample for 3 cameras in horizontal position can be found at + opencv\_source\_code/samples/cpp/3calibration.cpp + - A calibration sample based on a sequence of images can be found at + opencv\_source\_code/samples/cpp/calibration.cpp + - A calibration sample in order to do 3D reconstruction can be found at + opencv\_source\_code/samples/cpp/build3dmodel.cpp + - A calibration sample of an artificially generated camera and chessboard patterns can be + found at opencv\_source\_code/samples/cpp/calibration\_artificial.cpp + - A calibration example on stereo calibration can be found at + opencv\_source\_code/samples/cpp/stereo\_calib.cpp + - A calibration example on stereo matching can be found at + opencv\_source\_code/samples/cpp/stereo\_match.cpp + - (Python) A camera calibration sample can be found at + opencv\_source\_code/samples/python2/calibrate.py + + @{ + @defgroup calib3d_fisheye Fisheye camera model + + Definitions: Let P be a point in 3D of coordinates X in the world reference frame (stored in the + matrix X) The coordinate vector of P in the camera reference frame is: + + \f[Xc = R X + T\f] + + where R is the rotation matrix corresponding to the rotation vector om: R = rodrigues(om); call x, y + and z the 3 coordinates of Xc: + + \f[x = Xc_1 \\ y = Xc_2 \\ z = Xc_3\f] + + The pinehole projection coordinates of P is [a; b] where + + \f[a = x / z \ and \ b = y / z \\ r^2 = a^2 + b^2 \\ \theta = atan(r)\f] + + Fisheye distortion: + + \f[\theta_d = \theta (1 + k_1 \theta^2 + k_2 \theta^4 + k_3 \theta^6 + k_4 \theta^8)\f] + + The distorted point coordinates are [x'; y'] where + + \f[x' = (\theta_d / r) x \\ y' = (\theta_d / r) y \f] + + Finally, convertion into pixel coordinates: The final pixel coordinates vector [u; v] where: + + \f[u = f_x (x' + \alpha y') + c_x \\ + v = f_y yy + c_y\f] + + @defgroup calib3d_c C API + + @} + */ + namespace cv { +//! @addtogroup calib3d +//! @{ + //! type of the robust estimation algorithm enum { LMEDS = 4, //!< least-median algorithm RANSAC = 8 //!< RANSAC algorithm @@ -105,26 +236,143 @@ enum { FM_7POINT = 1, //!< 7-point algorithm -//! converts rotation vector to rotation matrix or vice versa using Rodrigues transformation +/** @brief Converts a rotation matrix to a rotation vector or vice versa. + +@param src Input rotation vector (3x1 or 1x3) or rotation matrix (3x3). +@param dst Output rotation matrix (3x3) or rotation vector (3x1 or 1x3), respectively. +@param jacobian Optional output Jacobian matrix, 3x9 or 9x3, which is a matrix of partial +derivatives of the output array components with respect to the input array components. + +\f[\begin{array}{l} \theta \leftarrow norm(r) \\ r \leftarrow r/ \theta \\ R = \cos{\theta} I + (1- \cos{\theta} ) r r^T + \sin{\theta} \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} \end{array}\f] + +Inverse transformation can be also done easily, since + +\f[\sin ( \theta ) \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} = \frac{R - R^T}{2}\f] + +A rotation vector is a convenient and most compact representation of a rotation matrix (since any +rotation matrix has just 3 degrees of freedom). The representation is used in the global 3D geometry +optimization procedures like calibrateCamera, stereoCalibrate, or solvePnP . + */ CV_EXPORTS_W void Rodrigues( InputArray src, OutputArray dst, OutputArray jacobian = noArray() ); -//! computes the best-fit perspective transformation mapping srcPoints to dstPoints. +/** @brief Finds a perspective transformation between two planes. + +@param srcPoints Coordinates of the points in the original plane, a matrix of the type CV\_32FC2 +or vector\ . +@param dstPoints Coordinates of the points in the target plane, a matrix of the type CV\_32FC2 or +a vector\ . +@param method Method used to computed a homography matrix. The following methods are possible: +- **0** - a regular method using all the points +- **RANSAC** - RANSAC-based robust method +- **LMEDS** - Least-Median robust method +@param ransacReprojThreshold Maximum allowed reprojection error to treat a point pair as an inlier +(used in the RANSAC method only). That is, if +\f[\| \texttt{dstPoints} _i - \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \| > \texttt{ransacReprojThreshold}\f] +then the point \f$i\f$ is considered an outlier. If srcPoints and dstPoints are measured in pixels, +it usually makes sense to set this parameter somewhere in the range of 1 to 10. +@param mask Optional output mask set by a robust method ( RANSAC or LMEDS ). Note that the input +mask values are ignored. +@param maxIters The maximum number of RANSAC iterations, 2000 is the maximum it can be. +@param confidence Confidence level, between 0 and 1. + +The functions find and return the perspective transformation \f$H\f$ between the source and the +destination planes: + +\f[s_i \vecthree{x'_i}{y'_i}{1} \sim H \vecthree{x_i}{y_i}{1}\f] + +so that the back-projection error + +\f[\sum _i \left ( x'_i- \frac{h_{11} x_i + h_{12} y_i + h_{13}}{h_{31} x_i + h_{32} y_i + h_{33}} \right )^2+ \left ( y'_i- \frac{h_{21} x_i + h_{22} y_i + h_{23}}{h_{31} x_i + h_{32} y_i + h_{33}} \right )^2\f] + +is minimized. If the parameter method is set to the default value 0, the function uses all the point +pairs to compute an initial homography estimate with a simple least-squares scheme. + +However, if not all of the point pairs ( \f$srcPoints_i\f$, \f$dstPoints_i\f$ ) fit the rigid perspective +transformation (that is, there are some outliers), this initial estimate will be poor. In this case, +you can use one of the two robust methods. Both methods, RANSAC and LMeDS , try many different +random subsets of the corresponding point pairs (of four pairs each), estimate the homography matrix +using this subset and a simple least-square algorithm, and then compute the quality/goodness of the +computed homography (which is the number of inliers for RANSAC or the median re-projection error for +LMeDs). The best subset is then used to produce the initial estimate of the homography matrix and +the mask of inliers/outliers. + +Regardless of the method, robust or not, the computed homography matrix is refined further (using +inliers only in case of a robust method) with the Levenberg-Marquardt method to reduce the +re-projection error even more. + +The method RANSAC can handle practically any ratio of outliers but it needs a threshold to +distinguish inliers from outliers. The method LMeDS does not need any threshold but it works +correctly only when there are more than 50% of inliers. Finally, if there are no outliers and the +noise is rather small, use the default method (method=0). + +The function is used to find initial intrinsic and extrinsic matrices. Homography matrix is +determined up to a scale. Thus, it is normalized so that \f$h_{33}=1\f$. Note that whenever an H matrix +cannot be estimated, an empty one will be returned. + +@sa + getAffineTransform, getPerspectiveTransform, estimateRigidTransform, warpPerspective, + perspectiveTransform + +@note + - A example on calculating a homography for image matching can be found at + opencv\_source\_code/samples/cpp/video\_homography.cpp + + */ CV_EXPORTS_W Mat findHomography( InputArray srcPoints, InputArray dstPoints, int method = 0, double ransacReprojThreshold = 3, OutputArray mask=noArray(), const int maxIters = 2000, const double confidence = 0.995); -//! variant of findHomography for backward compatibility +/** @overload */ CV_EXPORTS Mat findHomography( InputArray srcPoints, InputArray dstPoints, OutputArray mask, int method = 0, double ransacReprojThreshold = 3 ); -//! Computes RQ decomposition of 3x3 matrix +/** @brief Computes an RQ decomposition of 3x3 matrices. + +@param src 3x3 input matrix. +@param mtxR Output 3x3 upper-triangular matrix. +@param mtxQ Output 3x3 orthogonal matrix. +@param Qx Optional output 3x3 rotation matrix around x-axis. +@param Qy Optional output 3x3 rotation matrix around y-axis. +@param Qz Optional output 3x3 rotation matrix around z-axis. + +The function computes a RQ decomposition using the given rotations. This function is used in +decomposeProjectionMatrix to decompose the left 3x3 submatrix of a projection matrix into a camera +and a rotation matrix. + +It optionally returns three rotation matrices, one for each axis, and the three Euler angles in +degrees (as the return value) that could be used in OpenGL. Note, there is always more than one +sequence of rotations about the three principle axes that results in the same orientation of an +object, eg. see @cite Slabaugh. Returned tree rotation matrices and corresponding three Euler angules +are only one of the possible solutions. + */ CV_EXPORTS_W Vec3d RQDecomp3x3( InputArray src, OutputArray mtxR, OutputArray mtxQ, OutputArray Qx = noArray(), OutputArray Qy = noArray(), OutputArray Qz = noArray()); -//! Decomposes the projection matrix into camera matrix and the rotation martix and the translation vector +/** @brief Decomposes a projection matrix into a rotation matrix and a camera matrix. + +@param projMatrix 3x4 input projection matrix P. +@param cameraMatrix Output 3x3 camera matrix K. +@param rotMatrix Output 3x3 external rotation matrix R. +@param transVect Output 4x1 translation vector T. +@param rotMatrixX Optional 3x3 rotation matrix around x-axis. +@param rotMatrixY Optional 3x3 rotation matrix around y-axis. +@param rotMatrixZ Optional 3x3 rotation matrix around z-axis. +@param eulerAngles Optional three-element vector containing three Euler angles of rotation in +degrees. + +The function computes a decomposition of a projection matrix into a calibration and a rotation +matrix and the position of a camera. + +It optionally returns three rotation matrices, one for each axis, and three Euler angles that could +be used in OpenGL. Note, there is always more than one sequence of rotations about the three +principle axes that results in the same orientation of an object, eg. see @cite Slabaugh. Returned +tree rotation matrices and corresponding three Euler angules are only one of the possible solutions. + +The function is based on RQDecomp3x3 . + */ CV_EXPORTS_W void decomposeProjectionMatrix( InputArray projMatrix, OutputArray cameraMatrix, OutputArray rotMatrix, OutputArray transVect, OutputArray rotMatrixX = noArray(), @@ -132,10 +380,51 @@ CV_EXPORTS_W void decomposeProjectionMatrix( InputArray projMatrix, OutputArray OutputArray rotMatrixZ = noArray(), OutputArray eulerAngles =noArray() ); -//! computes derivatives of the matrix product w.r.t each of the multiplied matrix coefficients +/** @brief Computes partial derivatives of the matrix product for each multiplied matrix. + +@param A First multiplied matrix. +@param B Second multiplied matrix. +@param dABdA First output derivative matrix d(A\*B)/dA of size +\f$\texttt{A.rows*B.cols} \times {A.rows*A.cols}\f$ . +@param dABdB Second output derivative matrix d(A\*B)/dB of size +\f$\texttt{A.rows*B.cols} \times {B.rows*B.cols}\f$ . + +The function computes partial derivatives of the elements of the matrix product \f$A*B\f$ with regard to +the elements of each of the two input matrices. The function is used to compute the Jacobian +matrices in stereoCalibrate but can also be used in any other similar optimization function. + */ CV_EXPORTS_W void matMulDeriv( InputArray A, InputArray B, OutputArray dABdA, OutputArray dABdB ); -//! composes 2 [R|t] transformations together. Also computes the derivatives of the result w.r.t the arguments +/** @brief Combines two rotation-and-shift transformations. + +@param rvec1 First rotation vector. +@param tvec1 First translation vector. +@param rvec2 Second rotation vector. +@param tvec2 Second translation vector. +@param rvec3 Output rotation vector of the superposition. +@param tvec3 Output translation vector of the superposition. +@param dr3dr1 +@param dr3dt1 +@param dr3dr2 +@param dr3dt2 +@param dt3dr1 +@param dt3dt1 +@param dt3dr2 +@param dt3dt2 Optional output derivatives of rvec3 or tvec3 with regard to rvec1, rvec2, tvec1 and +tvec2, respectively. + +The functions compute: + +\f[\begin{array}{l} \texttt{rvec3} = \mathrm{rodrigues} ^{-1} \left ( \mathrm{rodrigues} ( \texttt{rvec2} ) \cdot \mathrm{rodrigues} ( \texttt{rvec1} ) \right ) \\ \texttt{tvec3} = \mathrm{rodrigues} ( \texttt{rvec2} ) \cdot \texttt{tvec1} + \texttt{tvec2} \end{array} ,\f] + +where \f$\mathrm{rodrigues}\f$ denotes a rotation vector to a rotation matrix transformation, and +\f$\mathrm{rodrigues}^{-1}\f$ denotes the inverse transformation. See Rodrigues for details. + +Also, the functions can compute the derivatives of the output vectors with regards to the input +vectors (see matMulDeriv ). The functions are used inside stereoCalibrate but can also be used in +your own code where Levenberg-Marquardt or another gradient-based solver is used to optimize a +function that contains a matrix multiplication. + */ CV_EXPORTS_W void composeRT( InputArray rvec1, InputArray tvec1, InputArray rvec2, InputArray tvec2, OutputArray rvec3, OutputArray tvec3, @@ -144,7 +433,38 @@ CV_EXPORTS_W void composeRT( InputArray rvec1, InputArray tvec1, OutputArray dt3dr1 = noArray(), OutputArray dt3dt1 = noArray(), OutputArray dt3dr2 = noArray(), OutputArray dt3dt2 = noArray() ); -//! projects points from the model coordinate space to the image coordinates. Also computes derivatives of the image coordinates w.r.t the intrinsic and extrinsic camera parameters +/** @brief Projects 3D points to an image plane. + +@param objectPoints Array of object points, 3xN/Nx3 1-channel or 1xN/Nx1 3-channel (or +vector\ ), where N is the number of points in the view. +@param rvec Rotation vector. See Rodrigues for details. +@param tvec Translation vector. +@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements. If +the vector is NULL/empty, the zero distortion coefficients are assumed. +@param imagePoints Output array of image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, or +vector\ . +@param jacobian Optional output 2Nx(10+\) jacobian matrix of derivatives of image +points with respect to components of the rotation vector, translation vector, focal lengths, +coordinates of the principal point and the distortion coefficients. In the old interface different +components of the jacobian are returned via different output parameters. +@param aspectRatio Optional "fixed aspect ratio" parameter. If the parameter is not 0, the +function assumes that the aspect ratio (*fx/fy*) is fixed and correspondingly adjusts the jacobian +matrix. + +The function computes projections of 3D points to the image plane given intrinsic and extrinsic +camera parameters. Optionally, the function computes Jacobians - matrices of partial derivatives of +image points coordinates (as functions of all the input parameters) with respect to the particular +parameters, intrinsic and/or extrinsic. The Jacobians are used during the global optimization in +calibrateCamera, solvePnP, and stereoCalibrate . The function itself can also be used to compute a +re-projection error given the current intrinsic and extrinsic parameters. + +@note By setting rvec=tvec=(0,0,0) or by setting cameraMatrix to a 3x3 identity matrix, or by +passing zero distortion coefficients, you can get various useful partial cases of the function. This +means that you can compute the distorted coordinates for a sparse set of points or apply a +perspective transformation (and also compute the derivatives) in the ideal zero-distortion setup. + */ CV_EXPORTS_W void projectPoints( InputArray objectPoints, InputArray rvec, InputArray tvec, InputArray cameraMatrix, InputArray distCoeffs, @@ -152,13 +472,86 @@ CV_EXPORTS_W void projectPoints( InputArray objectPoints, OutputArray jacobian = noArray(), double aspectRatio = 0 ); -//! computes the camera pose from a few 3D points and the corresponding projections. The outliers are not handled. +/** @brief Finds an object pose from 3D-2D point correspondences. + +@param objectPoints Array of object points in the object coordinate space, 3xN/Nx3 1-channel or +1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. +@param imagePoints Array of corresponding image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, +where N is the number of points. vector\ can be also passed here. +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements. If +the vector is NULL/empty, the zero distortion coefficients are assumed. +@param rvec Output rotation vector (see Rodrigues ) that, together with tvec , brings points from +the model coordinate system to the camera coordinate system. +@param tvec Output translation vector. +@param useExtrinsicGuess Parameter used for SOLVEPNP\_ITERATIVE. If true (1), the function uses +the provided rvec and tvec values as initial approximations of the rotation and translation +vectors, respectively, and further optimizes them. +@param flags Method for solving a PnP problem: +- **SOLVEPNP\_ITERATIVE** Iterative method is based on Levenberg-Marquardt optimization. In +this case the function finds such a pose that minimizes reprojection error, that is the sum +of squared distances between the observed projections imagePoints and the projected (using +projectPoints ) objectPoints . +- **SOLVEPNP\_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang +"Complete Solution Classification for the Perspective-Three-Point Problem". In this case the +function requires exactly four object and image points. +- **SOLVEPNP\_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the +paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation". +- **SOLVEPNP\_DLS** Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis. +"A Direct Least-Squares (DLS) Method for PnP". +- **SOLVEPNP\_UPNP** Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto, +F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length +Estimation". In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ +assuming that both have the same value. Then the cameraMatrix is updated with the estimated +focal length. + +The function estimates the object pose given a set of object points, their corresponding image +projections, as well as the camera matrix and the distortion coefficients. + +@note + - An example of how to use solvePnP for planar augmented reality can be found at + opencv\_source\_code/samples/python2/plane\_ar.py + */ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE ); -//! computes the camera pose from a few 3D points and the corresponding projections. The outliers are possible. +/** @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme. + +@param objectPoints Array of object points in the object coordinate space, 3xN/Nx3 1-channel or +1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. +@param imagePoints Array of corresponding image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, +where N is the number of points. vector\ can be also passed here. +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements. If +the vector is NULL/empty, the zero distortion coefficients are assumed. +@param rvec Output rotation vector (see Rodrigues ) that, together with tvec , brings points from +the model coordinate system to the camera coordinate system. +@param tvec Output translation vector. +@param useExtrinsicGuess Parameter used for SOLVEPNP\_ITERATIVE. If true (1), the function uses +the provided rvec and tvec values as initial approximations of the rotation and translation +vectors, respectively, and further optimizes them. +@param iterationsCount Number of iterations. +@param reprojectionError Inlier threshold value used by the RANSAC procedure. The parameter value +is the maximum allowed distance between the observed and computed point projections to consider it +an inlier. +@param confidence The probability that the algorithm produces a useful result. +@param inliers Output vector that contains indices of inliers in objectPoints and imagePoints . +@param flags Method for solving a PnP problem (see solvePnP ). + +The function estimates an object pose given a set of object points, their corresponding image +projections, as well as the camera matrix and the distortion coefficients. This function finds such +a pose that minimizes reprojection error, that is, the sum of squared distances between the observed +projections imagePoints and the projected (using projectPoints ) objectPoints. The use of RANSAC +makes the function resistant to outliers. + +@note + - An example of how to use solvePNPRansac for object detection can be found at + opencv\_source\_code/samples/cpp/tutorial\_code/calib3d/real\_time\_pose\_estimation/ + */ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, OutputArray rvec, OutputArray tvec, @@ -166,28 +559,227 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint float reprojectionError = 8.0, double confidence = 0.99, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE ); -//! initializes camera matrix from a few 3D points and the corresponding projections. +/** @brief Finds an initial camera matrix from 3D-2D point correspondences. + +@param objectPoints Vector of vectors of the calibration pattern points in the calibration pattern +coordinate space. In the old interface all the per-view vectors are concatenated. See +calibrateCamera for details. +@param imagePoints Vector of vectors of the projections of the calibration pattern points. In the +old interface all the per-view vectors are concatenated. +@param imageSize Image size in pixels used to initialize the principal point. +@param aspectRatio If it is zero or negative, both \f$f_x\f$ and \f$f_y\f$ are estimated independently. +Otherwise, \f$f_x = f_y * \texttt{aspectRatio}\f$ . + +The function estimates and returns an initial camera matrix for the camera calibration process. +Currently, the function only supports planar calibration patterns, which are patterns where each +object point has z-coordinate =0. + */ CV_EXPORTS_W Mat initCameraMatrix2D( InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, Size imageSize, double aspectRatio = 1.0 ); -//! finds checkerboard pattern of the specified size in the image +/** @brief Finds the positions of internal corners of the chessboard. + +@param image Source chessboard view. It must be an 8-bit grayscale or color image. +@param patternSize Number of inner corners per a chessboard row and column +( patternSize = cvSize(points\_per\_row,points\_per\_colum) = cvSize(columns,rows) ). +@param corners Output array of detected corners. +@param flags Various operation flags that can be zero or a combination of the following values: +- **CV\_CALIB\_CB\_ADAPTIVE\_THRESH** Use adaptive thresholding to convert the image to black +and white, rather than a fixed threshold level (computed from the average image brightness). +- **CV\_CALIB\_CB\_NORMALIZE\_IMAGE** Normalize the image gamma with equalizeHist before +applying fixed or adaptive thresholding. +- **CV\_CALIB\_CB\_FILTER\_QUADS** Use additional criteria (like contour area, perimeter, +square-like shape) to filter out false quads extracted at the contour retrieval stage. +- **CALIB\_CB\_FAST\_CHECK** Run a fast check on the image that looks for chessboard corners, +and shortcut the call if none is found. This can drastically speed up the call in the +degenerate condition when no chessboard is observed. + +The function attempts to determine whether the input image is a view of the chessboard pattern and +locate the internal chessboard corners. The function returns a non-zero value if all of the corners +are found and they are placed in a certain order (row by row, left to right in every row). +Otherwise, if the function fails to find all the corners or reorder them, it returns 0. For example, +a regular chessboard has 8 x 8 squares and 7 x 7 internal corners, that is, points where the black +squares touch each other. The detected coordinates are approximate, and to determine their positions +more accurately, the function calls cornerSubPix. You also may use the function cornerSubPix with +different parameters if returned coordinates are not accurate enough. + +Sample usage of detecting and drawing chessboard corners: : +@code + Size patternsize(8,6); //interior number of corners + Mat gray = ....; //source image + vector corners; //this will be filled by the detected corners + + //CALIB_CB_FAST_CHECK saves a lot of time on images + //that do not contain any chessboard corners + bool patternfound = findChessboardCorners(gray, patternsize, corners, + CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE + + CALIB_CB_FAST_CHECK); + + if(patternfound) + cornerSubPix(gray, corners, Size(11, 11), Size(-1, -1), + TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 30, 0.1)); + + drawChessboardCorners(img, patternsize, Mat(corners), patternfound); +@endcode +@note The function requires white space (like a square-thick border, the wider the better) around +the board to make the detection more robust in various environments. Otherwise, if there is no +border and the background is dark, the outer black squares cannot be segmented properly and so the +square grouping and ordering algorithm fails. + */ CV_EXPORTS_W bool findChessboardCorners( InputArray image, Size patternSize, OutputArray corners, int flags = CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE ); //! finds subpixel-accurate positions of the chessboard corners CV_EXPORTS bool find4QuadCornerSubpix( InputArray img, InputOutputArray corners, Size region_size ); -//! draws the checkerboard pattern (found or partly found) in the image +/** @brief Renders the detected chessboard corners. + +@param image Destination image. It must be an 8-bit color image. +@param patternSize Number of inner corners per a chessboard row and column +(patternSize = cv::Size(points\_per\_row,points\_per\_column)). +@param corners Array of detected corners, the output of findChessboardCorners. +@param patternWasFound Parameter indicating whether the complete board was found or not. The +return value of findChessboardCorners should be passed here. + +The function draws individual chessboard corners detected either as red circles if the board was not +found, or as colored corners connected with lines if the board was found. + */ CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSize, InputArray corners, bool patternWasFound ); -//! finds circles' grid pattern of the specified size in the image +/** @brief Finds centers in the grid of circles. + +@param image grid view of input circles; it must be an 8-bit grayscale or color image. +@param patternSize number of circles per row and column +( patternSize = Size(points\_per\_row, points\_per\_colum) ). +@param centers output array of detected centers. +@param flags various operation flags that can be one of the following values: +- **CALIB\_CB\_SYMMETRIC\_GRID** uses symmetric pattern of circles. +- **CALIB\_CB\_ASYMMETRIC\_GRID** uses asymmetric pattern of circles. +- **CALIB\_CB\_CLUSTERING** uses a special algorithm for grid detection. It is more robust to +perspective distortions but much more sensitive to background clutter. +@param blobDetector feature detector that finds blobs like dark circles on light background. + +The function attempts to determine whether the input image contains a grid of circles. If it is, the +function locates centers of the circles. The function returns a non-zero value if all of the centers +have been found and they have been placed in a certain order (row by row, left to right in every +row). Otherwise, if the function fails to find all the corners or reorder them, it returns 0. + +Sample usage of detecting and drawing the centers of circles: : +@code + Size patternsize(7,7); //number of centers + Mat gray = ....; //source image + vector centers; //this will be filled by the detected centers + + bool patternfound = findCirclesGrid(gray, patternsize, centers); + + drawChessboardCorners(img, patternsize, Mat(centers), patternfound); +@endcode +@note The function requires white space (like a square-thick border, the wider the better) around +the board to make the detection more robust in various environments. + */ CV_EXPORTS_W bool findCirclesGrid( InputArray image, Size patternSize, OutputArray centers, int flags = CALIB_CB_SYMMETRIC_GRID, const Ptr &blobDetector = SimpleBlobDetector::create()); -//! finds intrinsic and extrinsic camera parameters from several fews of a known calibration pattern. +/** @brief Finds the camera intrinsic and extrinsic parameters from several views of a calibration pattern. + +@param objectPoints In the new interface it is a vector of vectors of calibration pattern points +in the calibration pattern coordinate space. The outer vector contains as many elements as the +number of the pattern views. If the same calibration pattern is shown in each view and it is fully +visible, all the vectors will be the same. Although, it is possible to use partially occluded +patterns, or even different patterns in different views. Then, the vectors will be different. The +points are 3D, but since they are in a pattern coordinate system, then, if the rig is planar, it +may make sense to put the model to a XY coordinate plane so that Z-coordinate of each input object +point is 0. +In the old interface all the vectors of object points from different views are concatenated +together. +@param imagePoints In the new interface it is a vector of vectors of the projections of +calibration pattern points. imagePoints.size() and objectPoints.size() and imagePoints[i].size() +must be equal to objectPoints[i].size() for each i. +In the old interface all the vectors of object points from different views are concatenated +together. +@param imageSize Size of the image used only to initialize the intrinsic camera matrix. +@param cameraMatrix Output 3x3 floating-point camera matrix +\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS +and/or CV\_CALIB\_FIX\_ASPECT\_RATIO are specified, some or all of fx, fy, cx, cy must be +initialized before calling the function. +@param distCoeffs Output vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements. +@param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view. +That is, each k-th rotation vector together with the corresponding k-th translation vector (see +the next output parameter description) brings the calibration pattern from the model coordinate +space (in which object points are specified) to the world coordinate space, that is, a real +position of the calibration pattern in the k-th pattern view (k=0.. *M* -1). +@param tvecs Output vector of translation vectors estimated for each pattern view. +@param flags Different flags that may be zero or a combination of the following values: +- **CV\_CALIB\_USE\_INTRINSIC\_GUESS** cameraMatrix contains valid initial values of +fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image +center ( imageSize is used), and focal distances are computed in a least-squares fashion. +Note, that if intrinsic parameters are known, there is no need to use this function just to +estimate extrinsic parameters. Use solvePnP instead. +- **CV\_CALIB\_FIX\_PRINCIPAL\_POINT** The principal point is not changed during the global +optimization. It stays at the center or at a different location specified when +CV\_CALIB\_USE\_INTRINSIC\_GUESS is set too. +- **CV\_CALIB\_FIX\_ASPECT\_RATIO** The functions considers only fy as a free parameter. The +ratio fx/fy stays the same as in the input cameraMatrix . When +CV\_CALIB\_USE\_INTRINSIC\_GUESS is not set, the actual input values of fx and fy are +ignored, only their ratio is computed and used further. +- **CV\_CALIB\_ZERO\_TANGENT\_DIST** Tangential distortion coefficients \f$(p_1, p_2)\f$ are set +to zeros and stay zero. +- **CV\_CALIB\_FIX\_K1,...,CV\_CALIB\_FIX\_K6** The corresponding radial distortion +coefficient is not changed during the optimization. If CV\_CALIB\_USE\_INTRINSIC\_GUESS is +set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. +- **CV\_CALIB\_RATIONAL\_MODEL** Coefficients k4, k5, and k6 are enabled. To provide the +backward compatibility, this extra flag should be explicitly specified to make the +calibration function use the rational model and return 8 coefficients. If the flag is not +set, the function computes and returns only 5 distortion coefficients. +- **CALIB\_THIN\_PRISM\_MODEL** Coefficients s1, s2, s3 and s4 are enabled. To provide the +backward compatibility, this extra flag should be explicitly specified to make the +calibration function use the thin prism model and return 12 coefficients. If the flag is not +set, the function computes and returns only 5 distortion coefficients. +- **CALIB\_FIX\_S1\_S2\_S3\_S4** The thin prism distortion coefficients are not changed during +the optimization. If CV\_CALIB\_USE\_INTRINSIC\_GUESS is set, the coefficient from the +supplied distCoeffs matrix is used. Otherwise, it is set to 0. +@param criteria Termination criteria for the iterative optimization algorithm. + +The function estimates the intrinsic camera parameters and extrinsic parameters for each of the +views. The algorithm is based on @cite Zhang2000 and @cite BouguetMCT. The coordinates of 3D object +points and their corresponding 2D projections in each view must be specified. That may be achieved +by using an object with a known geometry and easily detectable feature points. Such an object is +called a calibration rig or calibration pattern, and OpenCV has built-in support for a chessboard as +a calibration rig (see findChessboardCorners ). Currently, initialization of intrinsic parameters +(when CV\_CALIB\_USE\_INTRINSIC\_GUESS is not set) is only implemented for planar calibration +patterns (where Z-coordinates of the object points must be all zeros). 3D calibration rigs can also +be used as long as initial cameraMatrix is provided. + +The algorithm performs the following steps: + +- Compute the initial intrinsic parameters (the option only available for planar calibration + patterns) or read them from the input parameters. The distortion coefficients are all set to + zeros initially unless some of CV\_CALIB\_FIX\_K? are specified. + +- Estimate the initial camera pose as if the intrinsic parameters have been already known. This is + done using solvePnP . + +- Run the global Levenberg-Marquardt optimization algorithm to minimize the reprojection error, + that is, the total sum of squared distances between the observed feature points imagePoints and + the projected (using the current estimates for camera parameters and the poses) object points + objectPoints. See projectPoints for details. + +The function returns the final re-projection error. + +@note + If you use a non-square (=non-NxN) grid and findChessboardCorners for calibration, and + calibrateCamera returns bad values (zero distortion coefficients, an image center very far from + (w/2-0.5,h/2-0.5), and/or large differences between \f$f_x\f$ and \f$f_y\f$ (ratios of 10:1 or more)), + then you have probably used patternSize=cvSize(rows,cols) instead of using + patternSize=cvSize(cols,rows) in findChessboardCorners . + +@sa + findChessboardCorners, solvePnP, initCameraMatrix2D, stereoCalibrate, undistort + */ CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs, @@ -195,14 +787,117 @@ CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints, int flags = 0, TermCriteria criteria = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) ); -//! computes several useful camera characteristics from the camera matrix, camera frame resolution and the physical sensor size. +/** @brief Computes useful camera characteristics from the camera matrix. + +@param cameraMatrix Input camera matrix that can be estimated by calibrateCamera or +stereoCalibrate . +@param imageSize Input image size in pixels. +@param apertureWidth Physical width in mm of the sensor. +@param apertureHeight Physical height in mm of the sensor. +@param fovx Output field of view in degrees along the horizontal sensor axis. +@param fovy Output field of view in degrees along the vertical sensor axis. +@param focalLength Focal length of the lens in mm. +@param principalPoint Principal point in mm. +@param aspectRatio \f$f_y/f_x\f$ + +The function computes various useful camera characteristics from the previously estimated camera +matrix. + +@note + Do keep in mind that the unity measure 'mm' stands for whatever unit of measure one chooses for + the chessboard pitch (it can thus be any value). + */ CV_EXPORTS_W void calibrationMatrixValues( InputArray cameraMatrix, Size imageSize, double apertureWidth, double apertureHeight, CV_OUT double& fovx, CV_OUT double& fovy, CV_OUT double& focalLength, CV_OUT Point2d& principalPoint, CV_OUT double& aspectRatio ); -//! finds intrinsic and extrinsic parameters of a stereo camera +/** @brief Calibrates the stereo camera. + +@param objectPoints Vector of vectors of the calibration pattern points. +@param imagePoints1 Vector of vectors of the projections of the calibration pattern points, +observed by the first camera. +@param imagePoints2 Vector of vectors of the projections of the calibration pattern points, +observed by the second camera. +@param cameraMatrix1 Input/output first camera matrix: +\f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If +any of CV\_CALIB\_USE\_INTRINSIC\_GUESS , CV\_CALIB\_FIX\_ASPECT\_RATIO , +CV\_CALIB\_FIX\_INTRINSIC , or CV\_CALIB\_FIX\_FOCAL\_LENGTH are specified, some or all of the +matrix components must be initialized. See the flags description for details. +@param distCoeffs1 Input/output vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 ot 12 elements. The +output vector length depends on the flags. +@param cameraMatrix2 Input/output second camera matrix. The parameter is similar to cameraMatrix1 +@param distCoeffs2 Input/output lens distortion coefficients for the second camera. The parameter +is similar to distCoeffs1 . +@param imageSize Size of the image used only to initialize intrinsic camera matrix. +@param R Output rotation matrix between the 1st and the 2nd camera coordinate systems. +@param T Output translation vector between the coordinate systems of the cameras. +@param E Output essential matrix. +@param F Output fundamental matrix. +@param flags Different flags that may be zero or a combination of the following values: +- **CV\_CALIB\_FIX\_INTRINSIC** Fix cameraMatrix? and distCoeffs? so that only R, T, E , and F +matrices are estimated. +- **CV\_CALIB\_USE\_INTRINSIC\_GUESS** Optimize some or all of the intrinsic parameters +according to the specified flags. Initial values are provided by the user. +- **CV\_CALIB\_FIX\_PRINCIPAL\_POINT** Fix the principal points during the optimization. +- **CV\_CALIB\_FIX\_FOCAL\_LENGTH** Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ . +- **CV\_CALIB\_FIX\_ASPECT\_RATIO** Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$ +. +- **CV\_CALIB\_SAME\_FOCAL\_LENGTH** Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ . +- **CV\_CALIB\_ZERO\_TANGENT\_DIST** Set tangential distortion coefficients for each camera to +zeros and fix there. +- **CV\_CALIB\_FIX\_K1,...,CV\_CALIB\_FIX\_K6** Do not change the corresponding radial +distortion coefficient during the optimization. If CV\_CALIB\_USE\_INTRINSIC\_GUESS is set, +the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. +- **CV\_CALIB\_RATIONAL\_MODEL** Enable coefficients k4, k5, and k6. To provide the backward +compatibility, this extra flag should be explicitly specified to make the calibration +function use the rational model and return 8 coefficients. If the flag is not set, the +function computes and returns only 5 distortion coefficients. +- **CALIB\_THIN\_PRISM\_MODEL** Coefficients s1, s2, s3 and s4 are enabled. To provide the +backward compatibility, this extra flag should be explicitly specified to make the +calibration function use the thin prism model and return 12 coefficients. If the flag is not +set, the function computes and returns only 5 distortion coefficients. +- **CALIB\_FIX\_S1\_S2\_S3\_S4** The thin prism distortion coefficients are not changed during +the optimization. If CV\_CALIB\_USE\_INTRINSIC\_GUESS is set, the coefficient from the +supplied distCoeffs matrix is used. Otherwise, it is set to 0. +@param criteria Termination criteria for the iterative optimization algorithm. + +The function estimates transformation between two cameras making a stereo pair. If you have a stereo +camera where the relative position and orientation of two cameras is fixed, and if you computed +poses of an object relative to the first camera and to the second camera, (R1, T1) and (R2, T2), +respectively (this can be done with solvePnP ), then those poses definitely relate to each other. +This means that, given ( \f$R_1\f$,\f$T_1\f$ ), it should be possible to compute ( \f$R_2\f$,\f$T_2\f$ ). You only +need to know the position and orientation of the second camera relative to the first camera. This is +what the described function does. It computes ( \f$R\f$,\f$T\f$ ) so that: + +\f[R_2=R*R_1 +T_2=R*T_1 + T,\f] + +Optionally, it computes the essential matrix E: + +\f[E= \vecthreethree{0}{-T_2}{T_1}{T_2}{0}{-T_0}{-T_1}{T_0}{0} *R\f] + +where \f$T_i\f$ are components of the translation vector \f$T\f$ : \f$T=[T_0, T_1, T_2]^T\f$ . And the function +can also compute the fundamental matrix F: + +\f[F = cameraMatrix2^{-T} E cameraMatrix1^{-1}\f] + +Besides the stereo-related information, the function can also perform a full calibration of each of +two cameras. However, due to the high dimensionality of the parameter space and noise in the input +data, the function can diverge from the correct solution. If the intrinsic parameters can be +estimated with high accuracy for each of the cameras individually (for example, using +calibrateCamera ), you are recommended to do so and then pass CV\_CALIB\_FIX\_INTRINSIC flag to the +function along with the computed intrinsic parameters. Otherwise, if all the parameters are +estimated at once, it makes sense to restrict some parameters, for example, pass +CV\_CALIB\_SAME\_FOCAL\_LENGTH and CV\_CALIB\_ZERO\_TANGENT\_DIST flags, which is usually a +reasonable assumption. + +Similarly to calibrateCamera , the function minimizes the total re-projection error for all the +points in all the available views from both cameras. The function returns the final value of the +re-projection error. + */ CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2, InputOutputArray cameraMatrix1, InputOutputArray distCoeffs1, @@ -212,7 +907,85 @@ CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints, TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 1e-6) ); -//! computes the rectification transformation for a stereo camera from its intrinsic and extrinsic parameters +/** @brief Computes rectification transforms for each head of a calibrated stereo camera. + +@param cameraMatrix1 First camera matrix. +@param cameraMatrix2 Second camera matrix. +@param distCoeffs1 First camera distortion parameters. +@param distCoeffs2 Second camera distortion parameters. +@param imageSize Size of the image used for stereo calibration. +@param R Rotation matrix between the coordinate systems of the first and the second cameras. +@param T Translation vector between coordinate systems of the cameras. +@param R1 Output 3x3 rectification transform (rotation matrix) for the first camera. +@param R2 Output 3x3 rectification transform (rotation matrix) for the second camera. +@param P1 Output 3x4 projection matrix in the new (rectified) coordinate systems for the first +camera. +@param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second +camera. +@param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ). +@param flags Operation flags that may be zero or CV\_CALIB\_ZERO\_DISPARITY . If the flag is set, +the function makes the principal points of each camera have the same pixel coordinates in the +rectified views. And if the flag is not set, the function may still shift the images in the +horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the +useful image area. +@param alpha Free scaling parameter. If it is -1 or absent, the function performs the default +scaling. Otherwise, the parameter should be between 0 and 1. alpha=0 means that the rectified +images are zoomed and shifted so that only valid pixels are visible (no black areas after +rectification). alpha=1 means that the rectified image is decimated and shifted so that all the +pixels from the original images from the cameras are retained in the rectified images (no source +image pixels are lost). Obviously, any intermediate value yields an intermediate result between +those two extreme cases. +@param newImageSize New image resolution after rectification. The same size should be passed to +initUndistortRectifyMap (see the stereo\_calib.cpp sample in OpenCV samples directory). When (0,0) +is passed (default), it is set to the original imageSize . Setting it to larger value can help you +preserve details in the original image, especially when there is a big radial distortion. +@param validPixROI1 Optional output rectangles inside the rectified images where all the pixels +are valid. If alpha=0 , the ROIs cover the whole images. Otherwise, they are likely to be smaller +(see the picture below). +@param validPixROI2 Optional output rectangles inside the rectified images where all the pixels +are valid. If alpha=0 , the ROIs cover the whole images. Otherwise, they are likely to be smaller +(see the picture below). + +The function computes the rotation matrices for each camera that (virtually) make both camera image +planes the same plane. Consequently, this makes all the epipolar lines parallel and thus simplifies +the dense stereo correspondence problem. The function takes the matrices computed by stereoCalibrate +as input. As output, it provides two rotation matrices and also two projection matrices in the new +coordinates. The function distinguishes the following two cases: + +- **Horizontal stereo**: the first and the second camera views are shifted relative to each other + mainly along the x axis (with possible small vertical shift). In the rectified images, the + corresponding epipolar lines in the left and right cameras are horizontal and have the same + y-coordinate. P1 and P2 look like: + + \f[\texttt{P1} = \begin{bmatrix} f & 0 & cx_1 & 0 \\ 0 & f & cy & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix}\f] + + \f[\texttt{P2} = \begin{bmatrix} f & 0 & cx_2 & T_x*f \\ 0 & f & cy & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix} ,\f] + + where \f$T_x\f$ is a horizontal shift between the cameras and \f$cx_1=cx_2\f$ if + CV\_CALIB\_ZERO\_DISPARITY is set. + +- **Vertical stereo**: the first and the second camera views are shifted relative to each other + mainly in vertical direction (and probably a bit in the horizontal direction too). The epipolar + lines in the rectified images are vertical and have the same x-coordinate. P1 and P2 look like: + + \f[\texttt{P1} = \begin{bmatrix} f & 0 & cx & 0 \\ 0 & f & cy_1 & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix}\f] + + \f[\texttt{P2} = \begin{bmatrix} f & 0 & cx & 0 \\ 0 & f & cy_2 & T_y*f \\ 0 & 0 & 1 & 0 \end{bmatrix} ,\f] + + where \f$T_y\f$ is a vertical shift between the cameras and \f$cy_1=cy_2\f$ if CALIB\_ZERO\_DISPARITY is + set. + +As you can see, the first three columns of P1 and P2 will effectively be the new "rectified" camera +matrices. The matrices, together with R1 and R2 , can then be passed to initUndistortRectifyMap to +initialize the rectification map for each camera. + +See below the screenshot from the stereo\_calib.cpp sample. Some red horizontal lines pass through +the corresponding image regions. This means that the images are well rectified, which is what most +stereo correspondence algorithms rely on. The green rectangles are roi1 and roi2 . You see that +their interiors are all valid pixels. + +![image](pics/stereo_undistort.jpg) + */ CV_EXPORTS_W void stereoRectify( InputArray cameraMatrix1, InputArray distCoeffs1, InputArray cameraMatrix2, InputArray distCoeffs2, Size imageSize, InputArray R, InputArray T, @@ -222,7 +995,35 @@ CV_EXPORTS_W void stereoRectify( InputArray cameraMatrix1, InputArray distCoeffs double alpha = -1, Size newImageSize = Size(), CV_OUT Rect* validPixROI1 = 0, CV_OUT Rect* validPixROI2 = 0 ); -//! computes the rectification transformation for an uncalibrated stereo camera (zero distortion is assumed) +/** @brief Computes a rectification transform for an uncalibrated stereo camera. + +@param points1 Array of feature points in the first image. +@param points2 The corresponding points in the second image. The same formats as in +findFundamentalMat are supported. +@param F Input fundamental matrix. It can be computed from the same set of point pairs using +findFundamentalMat . +@param imgSize Size of the image. +@param H1 Output rectification homography matrix for the first image. +@param H2 Output rectification homography matrix for the second image. +@param threshold Optional threshold used to filter out the outliers. If the parameter is greater +than zero, all the point pairs that do not comply with the epipolar geometry (that is, the points +for which \f$|\texttt{points2[i]}^T*\texttt{F}*\texttt{points1[i]}|>\texttt{threshold}\f$ ) are +rejected prior to computing the homographies. Otherwise,all the points are considered inliers. + +The function computes the rectification transformations without knowing intrinsic parameters of the +cameras and their relative position in the space, which explains the suffix "uncalibrated". Another +related difference from stereoRectify is that the function outputs not the rectification +transformations in the object (3D) space, but the planar perspective transformations encoded by the +homography matrices H1 and H2 . The function implements the algorithm @cite Hartley99. + +@note + While the algorithm does not need to know the intrinsic parameters of the cameras, it heavily + depends on the epipolar geometry. Therefore, if the camera lenses have a significant distortion, + it would be better to correct it before computing the fundamental matrix and calling this + function. For example, distortion coefficients can be estimated for each head of stereo camera + separately by using calibrateCamera . Then, the images can be corrected using undistort , or + just the point coordinates can be corrected with undistortPoints . + */ CV_EXPORTS_W bool stereoRectifyUncalibrated( InputArray points1, InputArray points2, InputArray F, Size imgSize, OutputArray H1, OutputArray H2, @@ -240,60 +1041,311 @@ CV_EXPORTS_W float rectify3Collinear( InputArray cameraMatrix1, InputArray distC OutputArray Q, double alpha, Size newImgSize, CV_OUT Rect* roi1, CV_OUT Rect* roi2, int flags ); -//! returns the optimal new camera matrix +/** @brief Returns the new camera matrix based on the free scaling parameter. + +@param cameraMatrix Input camera matrix. +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements. If +the vector is NULL/empty, the zero distortion coefficients are assumed. +@param imageSize Original image size. +@param alpha Free scaling parameter between 0 (when all the pixels in the undistorted image are +valid) and 1 (when all the source image pixels are retained in the undistorted image). See +stereoRectify for details. +@param newImgSize Image size after rectification. By default,it is set to imageSize . +@param validPixROI Optional output rectangle that outlines all-good-pixels region in the +undistorted image. See roi1, roi2 description in stereoRectify . +@param centerPrincipalPoint Optional flag that indicates whether in the new camera matrix the +principal point should be at the image center or not. By default, the principal point is chosen to +best fit a subset of the source image (determined by alpha) to the corrected image. +@return new\_camera\_matrix Output new camera matrix. + +The function computes and returns the optimal new camera matrix based on the free scaling parameter. +By varying this parameter, you may retrieve only sensible pixels alpha=0 , keep all the original +image pixels if there is valuable information in the corners alpha=1 , or get something in between. +When alpha\>0 , the undistortion result is likely to have some black pixels corresponding to +"virtual" pixels outside of the captured distorted image. The original camera matrix, distortion +coefficients, the computed new camera matrix, and newImageSize should be passed to +initUndistortRectifyMap to produce the maps for remap . + */ CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray distCoeffs, Size imageSize, double alpha, Size newImgSize = Size(), CV_OUT Rect* validPixROI = 0, bool centerPrincipalPoint = false); -//! converts point coordinates from normal pixel coordinates to homogeneous coordinates ((x,y)->(x,y,1)) +/** @brief Converts points from Euclidean to homogeneous space. + +@param src Input vector of N-dimensional points. +@param dst Output vector of N+1-dimensional points. + +The function converts points from Euclidean to homogeneous space by appending 1's to the tuple of +point coordinates. That is, each point (x1, x2, ..., xn) is converted to (x1, x2, ..., xn, 1). + */ CV_EXPORTS_W void convertPointsToHomogeneous( InputArray src, OutputArray dst ); -//! converts point coordinates from homogeneous to normal pixel coordinates ((x,y,z)->(x/z, y/z)) +/** @brief Converts points from homogeneous to Euclidean space. + +@param src Input vector of N-dimensional points. +@param dst Output vector of N-1-dimensional points. + +The function converts points homogeneous to Euclidean space using perspective projection. That is, +each point (x1, x2, ... x(n-1), xn) is converted to (x1/xn, x2/xn, ..., x(n-1)/xn). When xn=0, the +output point coordinates will be (0,0,0,...). + */ CV_EXPORTS_W void convertPointsFromHomogeneous( InputArray src, OutputArray dst ); -//! for backward compatibility +/** @brief Converts points to/from homogeneous coordinates. + +@param src Input array or vector of 2D, 3D, or 4D points. +@param dst Output vector of 2D, 3D, or 4D points. + +The function converts 2D or 3D points from/to homogeneous coordinates by calling either +convertPointsToHomogeneous or convertPointsFromHomogeneous. + +@note The function is obsolete. Use one of the previous two functions instead. + */ CV_EXPORTS void convertPointsHomogeneous( InputArray src, OutputArray dst ); -//! finds fundamental matrix from a set of corresponding 2D points +/** @brief Calculates a fundamental matrix from the corresponding points in two images. + +@param points1 Array of N points from the first image. The point coordinates should be +floating-point (single or double precision). +@param points2 Array of the second image points of the same size and format as points1 . +@param method Method for computing a fundamental matrix. +- **CV\_FM\_7POINT** for a 7-point algorithm. \f$N = 7\f$ +- **CV\_FM\_8POINT** for an 8-point algorithm. \f$N \ge 8\f$ +- **CV\_FM\_RANSAC** for the RANSAC algorithm. \f$N \ge 8\f$ +- **CV\_FM\_LMEDS** for the LMedS algorithm. \f$N \ge 8\f$ +@param param1 Parameter used for RANSAC. It is the maximum distance from a point to an epipolar +line in pixels, beyond which the point is considered an outlier and is not used for computing the +final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the +point localization, image resolution, and the image noise. +@param param2 Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level +of confidence (probability) that the estimated matrix is correct. +@param mask + +The epipolar geometry is described by the following equation: + +\f[[p_2; 1]^T F [p_1; 1] = 0\f] + +where \f$F\f$ is a fundamental matrix, \f$p_1\f$ and \f$p_2\f$ are corresponding points in the first and the +second images, respectively. + +The function calculates the fundamental matrix using one of four methods listed above and returns +the found fundamental matrix. Normally just one matrix is found. But in case of the 7-point +algorithm, the function may return up to 3 solutions ( \f$9 \times 3\f$ matrix that stores all 3 +matrices sequentially). + +The calculated fundamental matrix may be passed further to computeCorrespondEpilines that finds the +epipolar lines corresponding to the specified points. It can also be passed to +stereoRectifyUncalibrated to compute the rectification transformation. : +@code + // Example. Estimation of fundamental matrix using the RANSAC algorithm + int point_count = 100; + vector points1(point_count); + vector points2(point_count); + + // initialize the points here ... + for( int i = 0; i < point_count; i++ ) + { + points1[i] = ...; + points2[i] = ...; + } + + Mat fundamental_matrix = + findFundamentalMat(points1, points2, FM_RANSAC, 3, 0.99); +@endcode + */ CV_EXPORTS_W Mat findFundamentalMat( InputArray points1, InputArray points2, int method = FM_RANSAC, double param1 = 3., double param2 = 0.99, OutputArray mask = noArray() ); -//! variant of findFundamentalMat for backward compatibility +/** @overload */ CV_EXPORTS Mat findFundamentalMat( InputArray points1, InputArray points2, OutputArray mask, int method = FM_RANSAC, double param1 = 3., double param2 = 0.99 ); -//! finds essential matrix from a set of corresponding 2D points using five-point algorithm +/** @brief Calculates an essential matrix from the corresponding points in two images. + +@param points1 Array of N (N \>= 5) 2D points from the first image. The point coordinates should +be floating-point (single or double precision). +@param points2 Array of the second image points of the same size and format as points1 . +@param focal focal length of the camera. Note that this function assumes that points1 and points2 +are feature points from cameras with same focal length and principle point. +@param pp principle point of the camera. +@param method Method for computing a fundamental matrix. +- **RANSAC** for the RANSAC algorithm. +- **MEDS** for the LMedS algorithm. +@param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar +line in pixels, beyond which the point is considered an outlier and is not used for computing the +final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the +point localization, image resolution, and the image noise. +@param prob Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level of +confidence (probability) that the estimated matrix is correct. +@param mask Output array of N elements, every element of which is set to 0 for outliers and to 1 +for the other points. The array is computed only in the RANSAC and LMedS methods. + +This function estimates essential matrix based on the five-point algorithm solver in @cite Nister03. +@cite SteweniusCFS is also a related. The epipolar geometry is described by the following equation: + +\f[[p_2; 1]^T K^T E K [p_1; 1] = 0 \\\f]\f[K = +\begin{bmatrix} +f & 0 & x_{pp} \\ +0 & f & y_{pp} \\ +0 & 0 & 1 +\end{bmatrix}\f] + +where \f$E\f$ is an essential matrix, \f$p_1\f$ and \f$p_2\f$ are corresponding points in the first and the +second images, respectively. The result of this function may be passed further to +decomposeEssentialMat or recoverPose to recover the relative pose between cameras. + */ CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2, double focal = 1.0, Point2d pp = Point2d(0, 0), int method = RANSAC, double prob = 0.999, double threshold = 1.0, OutputArray mask = noArray() ); -//! decompose essential matrix to possible rotation matrix and one translation vector +/** @brief Decompose an essential matrix to possible rotations and translation. + +@param E The input essential matrix. +@param R1 One possible rotation matrix. +@param R2 Another possible rotation matrix. +@param t One possible translation. + +This function decompose an essential matrix E using svd decomposition @cite HartleyZ00. Generally 4 +possible poses exists for a given E. They are \f$[R_1, t]\f$, \f$[R_1, -t]\f$, \f$[R_2, t]\f$, \f$[R_2, -t]\f$. By +decomposing E, you can only get the direction of the translation, so the function returns unit t. + */ CV_EXPORTS_W void decomposeEssentialMat( InputArray E, OutputArray R1, OutputArray R2, OutputArray t ); -//! recover relative camera pose from a set of corresponding 2D points +/** @brief Recover relative camera rotation and translation from an estimated essential matrix and the +corresponding points in two images, using cheirality check. Returns the number of inliers which pass +the check. + +@param E The input essential matrix. +@param points1 Array of N 2D points from the first image. The point coordinates should be +floating-point (single or double precision). +@param points2 Array of the second image points of the same size and format as points1 . +@param R Recovered relative rotation. +@param t Recoverd relative translation. +@param focal Focal length of the camera. Note that this function assumes that points1 and points2 +are feature points from cameras with same focal length and principle point. +@param pp Principle point of the camera. +@param mask Input/output mask for inliers in points1 and points2. +: If it is not empty, then it marks inliers in points1 and points2 for then given essential +matrix E. Only these inliers will be used to recover pose. In the output mask only inliers +which pass the cheirality check. +This function decomposes an essential matrix using decomposeEssentialMat and then verifies possible +pose hypotheses by doing cheirality check. The cheirality check basically means that the +triangulated 3D points should have positive depth. Some details can be found in @cite Nister03. + +This function can be used to process output E and mask from findEssentialMat. In this scenario, +points1 and points2 are the same input for findEssentialMat. : +@code + // Example. Estimation of fundamental matrix using the RANSAC algorithm + int point_count = 100; + vector points1(point_count); + vector points2(point_count); + + // initialize the points here ... + for( int i = 0; i < point_count; i++ ) + { + points1[i] = ...; + points2[i] = ...; + } + + double focal = 1.0; + cv::Point2d pp(0.0, 0.0); + Mat E, R, t, mask; + + E = findEssentialMat(points1, points2, focal, pp, RANSAC, 0.999, 1.0, mask); + recoverPose(E, points1, points2, R, t, focal, pp, mask); +@endcode + */ CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray points2, OutputArray R, OutputArray t, double focal = 1.0, Point2d pp = Point2d(0, 0), InputOutputArray mask = noArray() ); -//! finds coordinates of epipolar lines corresponding the specified points +/** @brief For points in an image of a stereo pair, computes the corresponding epilines in the other image. + +@param points Input points. \f$N \times 1\f$ or \f$1 \times N\f$ matrix of type CV\_32FC2 or +vector\ . +@param whichImage Index of the image (1 or 2) that contains the points . +@param F Fundamental matrix that can be estimated using findFundamentalMat or stereoRectify . +@param lines Output vector of the epipolar lines corresponding to the points in the other image. +Each line \f$ax + by + c=0\f$ is encoded by 3 numbers \f$(a, b, c)\f$ . + +For every point in one of the two images of a stereo pair, the function finds the equation of the +corresponding epipolar line in the other image. + +From the fundamental matrix definition (see findFundamentalMat ), line \f$l^{(2)}_i\f$ in the second +image for the point \f$p^{(1)}_i\f$ in the first image (when whichImage=1 ) is computed as: + +\f[l^{(2)}_i = F p^{(1)}_i\f] + +And vice versa, when whichImage=2, \f$l^{(1)}_i\f$ is computed from \f$p^{(2)}_i\f$ as: + +\f[l^{(1)}_i = F^T p^{(2)}_i\f] + +Line coefficients are defined up to a scale. They are normalized so that \f$a_i^2+b_i^2=1\f$ . + */ CV_EXPORTS_W void computeCorrespondEpilines( InputArray points, int whichImage, InputArray F, OutputArray lines ); +/** @brief Reconstructs points by triangulation. + +@param projMatr1 3x4 projection matrix of the first camera. +@param projMatr2 3x4 projection matrix of the second camera. +@param projPoints1 2xN array of feature points in the first image. In case of c++ version it can +be also a vector of feature points or two-channel matrix of size 1xN or Nx1. +@param projPoints2 2xN array of corresponding points in the second image. In case of c++ version +it can be also a vector of feature points or two-channel matrix of size 1xN or Nx1. +@param points4D 4xN array of reconstructed points in homogeneous coordinates. + +The function reconstructs 3-dimensional points (in homogeneous coordinates) by using their +observations with a stereo camera. Projections matrices can be obtained from stereoRectify. + +@note + Keep in mind that all input data should be of float type in order for this function to work. + +@sa + reprojectImageTo3D + */ CV_EXPORTS_W void triangulatePoints( InputArray projMatr1, InputArray projMatr2, InputArray projPoints1, InputArray projPoints2, OutputArray points4D ); +/** @brief Refines coordinates of corresponding points. + +@param F 3x3 fundamental matrix. +@param points1 1xN array containing the first set of points. +@param points2 1xN array containing the second set of points. +@param newPoints1 The optimized points1. +@param newPoints2 The optimized points2. + +The function implements the Optimal Triangulation Method (see Multiple View Geometry for details). +For each given point correspondence points1[i] \<-\> points2[i], and a fundamental matrix F, it +computes the corrected correspondences newPoints1[i] \<-\> newPoints2[i] that minimize the geometric +error \f$d(points1[i], newPoints1[i])^2 + d(points2[i],newPoints2[i])^2\f$ (where \f$d(a,b)\f$ is the +geometric distance between points \f$a\f$ and \f$b\f$ ) subject to the epipolar constraint +\f$newPoints2^T * F * newPoints1 = 0\f$ . + */ CV_EXPORTS_W void correctMatches( InputArray F, InputArray points1, InputArray points2, OutputArray newPoints1, OutputArray newPoints2 ); -//! filters off speckles (small regions of incorrectly computed disparity) +/** @brief Filters off small noise blobs (speckles) in the disparity map + +@param img The input 16-bit signed disparity image +@param newVal The disparity value used to paint-off the speckles +@param maxSpeckleSize The maximum speckle size to consider it a speckle. Larger blobs are not +affected by the algorithm +@param maxDiff Maximum difference between neighbor disparity pixels to put them into the same +blob. Note that since StereoBM, StereoSGBM and may be other algorithms return a fixed-point +disparity map, where disparity values are multiplied by 16, this scale factor should be taken into +account when specifying this parameter value. +@param buf The optional temporary buffer to avoid memory allocation within the function. + */ CV_EXPORTS_W void filterSpeckles( InputOutputArray img, double newVal, int maxSpeckleSize, double maxDiff, InputOutputArray buf = noArray() ); @@ -308,23 +1360,77 @@ CV_EXPORTS_W void validateDisparity( InputOutputArray disparity, InputArray cost int minDisparity, int numberOfDisparities, int disp12MaxDisp = 1 ); -//! reprojects disparity image to 3D: (x,y,d)->(X,Y,Z) using the matrix Q returned by cv::stereoRectify +/** @brief Reprojects a disparity image to 3D space. + +@param disparity Input single-channel 8-bit unsigned, 16-bit signed, 32-bit signed or 32-bit +floating-point disparity image. +@param \_3dImage Output 3-channel floating-point image of the same size as disparity . Each +element of \_3dImage(x,y) contains 3D coordinates of the point (x,y) computed from the disparity +map. +@param Q \f$4 \times 4\f$ perspective transformation matrix that can be obtained with stereoRectify. +@param handleMissingValues Indicates, whether the function should handle missing values (i.e. +points where the disparity was not computed). If handleMissingValues=true, then pixels with the +minimal disparity that corresponds to the outliers (see StereoMatcher::compute ) are transformed +to 3D points with a very large Z value (currently set to 10000). +@param ddepth The optional output array depth. If it is -1, the output image will have CV\_32F +depth. ddepth can also be set to CV\_16S, CV\_32S or CV\_32F. + +The function transforms a single-channel disparity map to a 3-channel image representing a 3D +surface. That is, for each pixel (x,y) andthe corresponding disparity d=disparity(x,y) , it +computes: + +\f[\begin{array}{l} [X \; Y \; Z \; W]^T = \texttt{Q} *[x \; y \; \texttt{disparity} (x,y) \; 1]^T \\ \texttt{\_3dImage} (x,y) = (X/W, \; Y/W, \; Z/W) \end{array}\f] + +The matrix Q can be an arbitrary \f$4 \times 4\f$ matrix (for example, the one computed by +stereoRectify). To reproject a sparse set of points {(x,y,d),...} to 3D space, use +perspectiveTransform . + */ CV_EXPORTS_W void reprojectImageTo3D( InputArray disparity, OutputArray _3dImage, InputArray Q, bool handleMissingValues = false, int ddepth = -1 ); +/** @brief Computes an optimal affine transformation between two 3D point sets. + +@param src First input 3D point set. +@param dst Second input 3D point set. +@param out Output 3D affine transformation matrix \f$3 \times 4\f$ . +@param inliers Output vector indicating which points are inliers. +@param ransacThreshold Maximum reprojection error in the RANSAC algorithm to consider a point as +an inlier. +@param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything +between 0.95 and 0.99 is usually good enough. Values too close to 1 can slow down the estimation +significantly. Values lower than 0.8-0.9 can result in an incorrectly estimated transformation. + +The function estimates an optimal 3D affine transformation between two 3D point sets using the +RANSAC algorithm. + */ CV_EXPORTS_W int estimateAffine3D(InputArray src, InputArray dst, OutputArray out, OutputArray inliers, double ransacThreshold = 3, double confidence = 0.99); +/** @brief Decompose a homography matrix to rotation(s), translation(s) and plane normal(s). +@param H The input homography matrix between two images. +@param K The input intrinsic camera calibration matrix. +@param rotations Array of rotation matrices. +@param translations Array of translation matrices. +@param normals Array of plane normal matrices. + +This function extracts relative camera motion between two views observing a planar object from the +homography H induced by the plane. The intrinsic camera matrix K must also be provided. The function +may return up to four mathematical solution sets. At least two of the solutions may further be +invalidated if point correspondences are available by applying positive depth constraint (all points +must be in front of the camera). The decomposition method is described in detail in @cite Malis. + */ CV_EXPORTS_W int decomposeHomographyMat(InputArray H, InputArray K, OutputArrayOfArrays rotations, OutputArrayOfArrays translations, OutputArrayOfArrays normals); +/** @brief The base class for stereo correspondence algorithms. + */ class CV_EXPORTS_W StereoMatcher : public Algorithm { public: @@ -332,6 +1438,14 @@ public: DISP_SCALE = (1 << DISP_SHIFT) }; + /** @brief Computes disparity map for the specified stereo pair + + @param left Left 8-bit single-channel image. + @param right Right image of the same size and the same type as the left one. + @param disparity Output disparity map. It has the same size as the input images. Some algorithms, + like StereoBM or StereoSGBM compute 16-bit fixed-point disparity map (where each disparity value + has 4 fractional bits), whereas other algorithms output 32-bit floating-point disparity map. + */ CV_WRAP virtual void compute( InputArray left, InputArray right, OutputArray disparity ) = 0; @@ -355,7 +1469,9 @@ public: }; - +/** @brief Class for computing stereo correspondence using the block matching algorithm, introduced and +contributed to OpenCV by K. Konolige. + */ class CV_EXPORTS_W StereoBM : public StereoMatcher { public: @@ -387,10 +1503,40 @@ public: CV_WRAP virtual Rect getROI2() const = 0; CV_WRAP virtual void setROI2(Rect roi2) = 0; + /** @brief Creates StereoBM object + + @param numDisparities the disparity search range. For each pixel algorithm will find the best + disparity from 0 (default minimum disparity) to numDisparities. The search range can then be + shifted by changing the minimum disparity. + @param blockSize the linear size of the blocks compared by the algorithm. The size should be odd + (as the block is centered at the current pixel). Larger block size implies smoother, though less + accurate disparity map. Smaller block size gives more detailed disparity map, but there is higher + chance for algorithm to find a wrong correspondence. + + The function create StereoBM object. You can then call StereoBM::compute() to compute disparity for + a specific stereo pair. + */ CV_WRAP static Ptr create(int numDisparities = 0, int blockSize = 21); }; - +/** @brief The class implements the modified H. Hirschmuller algorithm @cite HH08 that differs from the original +one as follows: + +- By default, the algorithm is single-pass, which means that you consider only 5 directions +instead of 8. Set mode=StereoSGBM::MODE\_HH in createStereoSGBM to run the full variant of the +algorithm but beware that it may consume a lot of memory. +- The algorithm matches blocks, not individual pixels. Though, setting blockSize=1 reduces the +blocks to single pixels. +- Mutual information cost function is not implemented. Instead, a simpler Birchfield-Tomasi +sub-pixel metric from @cite BT98 is used. Though, the color images are supported as well. +- Some pre- and post- processing steps from K. Konolige algorithm StereoBM are included, for +example: pre-filtering (StereoBM::PREFILTER\_XSOBEL type) and post-filtering (uniqueness +check, quadratic interpolation and speckle filtering). + +@note + - (Python) An example illustrating the use of the StereoSGBM matching algorithm can be found + at opencv\_source\_code/samples/python2/stereo\_match.py + */ class CV_EXPORTS_W StereoSGBM : public StereoMatcher { public: @@ -415,6 +1561,43 @@ public: CV_WRAP virtual int getMode() const = 0; CV_WRAP virtual void setMode(int mode) = 0; + /** @brief Creates StereoSGBM object + + @param minDisparity Minimum possible disparity value. Normally, it is zero but sometimes + rectification algorithms can shift images, so this parameter needs to be adjusted accordingly. + @param numDisparities Maximum disparity minus minimum disparity. The value is always greater than + zero. In the current implementation, this parameter must be divisible by 16. + @param blockSize Matched block size. It must be an odd number \>=1 . Normally, it should be + somewhere in the 3..11 range. + @param P1 The first parameter controlling the disparity smoothness. See below. + @param P2 The second parameter controlling the disparity smoothness. The larger the values are, + the smoother the disparity is. P1 is the penalty on the disparity change by plus or minus 1 + between neighbor pixels. P2 is the penalty on the disparity change by more than 1 between neighbor + pixels. The algorithm requires P2 \> P1 . See stereo\_match.cpp sample where some reasonably good + P1 and P2 values are shown (like 8\*number\_of\_image\_channels\*SADWindowSize\*SADWindowSize and + 32\*number\_of\_image\_channels\*SADWindowSize\*SADWindowSize , respectively). + @param disp12MaxDiff Maximum allowed difference (in integer pixel units) in the left-right + disparity check. Set it to a non-positive value to disable the check. + @param preFilterCap Truncation value for the prefiltered image pixels. The algorithm first + computes x-derivative at each pixel and clips its value by [-preFilterCap, preFilterCap] interval. + The result values are passed to the Birchfield-Tomasi pixel cost function. + @param uniquenessRatio Margin in percentage by which the best (minimum) computed cost function + value should "win" the second best value to consider the found match correct. Normally, a value + within the 5-15 range is good enough. + @param speckleWindowSize Maximum size of smooth disparity regions to consider their noise speckles + and invalidate. Set it to 0 to disable speckle filtering. Otherwise, set it somewhere in the + 50-200 range. + @param speckleRange Maximum disparity variation within each connected component. If you do speckle + filtering, set the parameter to a positive value, it will be implicitly multiplied by 16. + Normally, 1 or 2 is good enough. + @param mode Set it to StereoSGBM::MODE\_HH to run the full-scale two-pass dynamic programming + algorithm. It will consume O(W\*H\*numDisparities) bytes, which is large for 640x480 stereo and + huge for HD-size pictures. By default, it is set to false . + + The first constructor initializes StereoSGBM with all the default parameters. So, you only have to + set StereoSGBM::numDisparities at minimum. The second constructor enables you to set each parameter + to a custom value. + */ CV_WRAP static Ptr create(int minDisparity, int numDisparities, int blockSize, int P1 = 0, int P2 = 0, int disp12MaxDiff = 0, int preFilterCap = 0, int uniquenessRatio = 0, @@ -422,8 +1605,15 @@ public: int mode = StereoSGBM::MODE_SGBM); }; + +/** @brief The methods in this namespace use a so-called fisheye camera model. + @ingroup calib3d_fisheye +*/ namespace fisheye { +//! @addtogroup calib3d_fisheye +//! @{ + enum{ CALIB_USE_INTRINSIC_GUESS = 1, CALIB_RECOMPUTE_EXTRINSIC = 2, @@ -436,52 +1626,233 @@ namespace fisheye CALIB_FIX_INTRINSIC = 256 }; - //! projects 3D points using fisheye model + /** @brief Projects points using fisheye model + + @param objectPoints Array of object points, 1xN/Nx1 3-channel (or vector\ ), where N is + the number of points in the view. + @param imagePoints Output array of image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, or + vector\. + @param affine + @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param alpha The skew coefficient. + @param jacobian Optional output 2Nx15 jacobian matrix of derivatives of image points with respect + to components of the focal lengths, coordinates of the principal point, distortion coefficients, + rotation vector, translation vector, and the skew. In the old interface different components of + the jacobian are returned via different output parameters. + + The function computes projections of 3D points to the image plane given intrinsic and extrinsic + camera parameters. Optionally, the function computes Jacobians - matrices of partial derivatives of + image points coordinates (as functions of all the input parameters) with respect to the particular + parameters, intrinsic and/or extrinsic. + */ CV_EXPORTS void projectPoints(InputArray objectPoints, OutputArray imagePoints, const Affine3d& affine, InputArray K, InputArray D, double alpha = 0, OutputArray jacobian = noArray()); - //! projects points using fisheye model + /** @overload */ CV_EXPORTS void projectPoints(InputArray objectPoints, OutputArray imagePoints, InputArray rvec, InputArray tvec, InputArray K, InputArray D, double alpha = 0, OutputArray jacobian = noArray()); - //! distorts 2D points using fisheye model + /** @brief Distorts 2D points using fisheye model. + + @param undistorted Array of object points, 1xN/Nx1 2-channel (or vector\ ), where N is + the number of points in the view. + @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param alpha The skew coefficient. + @param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\ . + */ CV_EXPORTS void distortPoints(InputArray undistorted, OutputArray distorted, InputArray K, InputArray D, double alpha = 0); - //! undistorts 2D points using fisheye model + /** @brief Undistorts 2D points using fisheye model + + @param distorted Array of object points, 1xN/Nx1 2-channel (or vector\ ), where N is the + number of points in the view. + @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3 + 1-channel or 1x1 3-channel + @param P New camera matrix (3x3) or new projection matrix (3x4) + @param undistorted Output array of image points, 1xN/Nx1 2-channel, or vector\ . + */ CV_EXPORTS void undistortPoints(InputArray distorted, OutputArray undistorted, InputArray K, InputArray D, InputArray R = noArray(), InputArray P = noArray()); - //! computing undistortion and rectification maps for image transform by cv::remap() - //! If D is empty zero distortion is used, if R or P is empty identity matrixes are used + /** @brief Computes undistortion and rectification maps for image transform by cv::remap(). If D is empty zero + distortion is used, if R or P is empty identity matrixes are used. + + @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3 + 1-channel or 1x1 3-channel + @param P New camera matrix (3x3) or new projection matrix (3x4) + @param size Undistorted image size. + @param m1type Type of the first output map that can be CV\_32FC1 or CV\_16SC2 . See convertMaps() + for details. + @param map1 The first output map. + @param map2 The second output map. + */ CV_EXPORTS void initUndistortRectifyMap(InputArray K, InputArray D, InputArray R, InputArray P, const cv::Size& size, int m1type, OutputArray map1, OutputArray map2); - //! undistorts image, optionally changes resolution and camera matrix. If Knew zero identity matrix is used + /** @brief Transforms an image to compensate for fisheye lens distortion. + + @param distorted image with fisheye lens distortion. + @param undistorted Output image with compensated fisheye lens distortion. + @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param Knew Camera matrix of the distorted image. By default, it is the identity matrix but you + may additionally scale and shift the result by using a different matrix. + @param new_size + + The function transforms an image to compensate radial and tangential lens distortion. + + The function is simply a combination of fisheye::initUndistortRectifyMap (with unity R ) and remap + (with bilinear interpolation). See the former function for details of the transformation being + performed. + + See below the results of undistortImage. + - a\) result of undistort of perspective camera model (all possible coefficients (k\_1, k\_2, k\_3, + k\_4, k\_5, k\_6) of distortion were optimized under calibration) + - b\) result of fisheye::undistortImage of fisheye camera model (all possible coefficients (k\_1, k\_2, + k\_3, k\_4) of fisheye distortion were optimized under calibration) + - c\) original image was captured with fisheye lens + + Pictures a) and b) almost the same. But if we consider points of image located far from the center + of image, we can notice that on image a) these points are distorted. + + ![image](pics/fisheye_undistorted.jpg) + */ CV_EXPORTS void undistortImage(InputArray distorted, OutputArray undistorted, InputArray K, InputArray D, InputArray Knew = cv::noArray(), const Size& new_size = Size()); - //! estimates new camera matrix for undistortion or rectification + /** @brief Estimates new camera matrix for undistortion or rectification. + + @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$. + @param image_size + @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3 + 1-channel or 1x1 3-channel + @param P New camera matrix (3x3) or new projection matrix (3x4) + @param balance Sets the new focal length in range between the min focal length and the max focal + length. Balance is in range of [0, 1]. + @param new_size + @param fov_scale Divisor for new focal length. + */ CV_EXPORTS void estimateNewCameraMatrixForUndistortRectify(InputArray K, InputArray D, const Size &image_size, InputArray R, OutputArray P, double balance = 0.0, const Size& new_size = Size(), double fov_scale = 1.0); - //! performs camera calibaration + /** @brief Performs camera calibaration + + @param objectPoints vector of vectors of calibration pattern points in the calibration pattern + coordinate space. + @param imagePoints vector of vectors of the projections of calibration pattern points. + imagePoints.size() and objectPoints.size() and imagePoints[i].size() must be equal to + objectPoints[i].size() for each i. + @param image\_size Size of the image used only to initialize the intrinsic camera matrix. + @param K Output 3x3 floating-point camera matrix + \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If + fisheye::CALIB\_USE\_INTRINSIC\_GUESS/ is specified, some or all of fx, fy, cx, cy must be + initialized before calling the function. + @param D Output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$. + @param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view. + That is, each k-th rotation vector together with the corresponding k-th translation vector (see + the next output parameter description) brings the calibration pattern from the model coordinate + space (in which object points are specified) to the world coordinate space, that is, a real + position of the calibration pattern in the k-th pattern view (k=0.. *M* -1). + @param tvecs Output vector of translation vectors estimated for each pattern view. + @param flags Different flags that may be zero or a combination of the following values: + - **fisheye::CALIB\_USE\_INTRINSIC\_GUESS** cameraMatrix contains valid initial values of + fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image + center ( imageSize is used), and focal distances are computed in a least-squares fashion. + - **fisheye::CALIB\_RECOMPUTE\_EXTRINSIC** Extrinsic will be recomputed after each iteration + of intrinsic optimization. + - **fisheye::CALIB\_CHECK\_COND** The functions will check validity of condition number. + - **fisheye::CALIB\_FIX\_SKEW** Skew coefficient (alpha) is set to zero and stay zero. + - **fisheye::CALIB\_FIX\_K1..4** Selected distortion coefficients are set to zeros and stay + zero. + @param criteria Termination criteria for the iterative optimization algorithm. + */ CV_EXPORTS double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, const Size& image_size, InputOutputArray K, InputOutputArray D, OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, int flags = 0, TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 100, DBL_EPSILON)); - //! stereo rectification estimation + /** @brief Stereo rectification for fisheye camera model + + @param K1 First camera matrix. + @param D1 First camera distortion parameters. + @param K2 Second camera matrix. + @param D2 Second camera distortion parameters. + @param imageSize Size of the image used for stereo calibration. + @param R Rotation matrix between the coordinate systems of the first and the second + cameras. + @param tvec Translation vector between coordinate systems of the cameras. + @param R1 Output 3x3 rectification transform (rotation matrix) for the first camera. + @param R2 Output 3x3 rectification transform (rotation matrix) for the second camera. + @param P1 Output 3x4 projection matrix in the new (rectified) coordinate systems for the first + camera. + @param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second + camera. + @param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ). + @param flags Operation flags that may be zero or CV\_CALIB\_ZERO\_DISPARITY . If the flag is set, + the function makes the principal points of each camera have the same pixel coordinates in the + rectified views. And if the flag is not set, the function may still shift the images in the + horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the + useful image area. + @param newImageSize New image resolution after rectification. The same size should be passed to + initUndistortRectifyMap (see the stereo\_calib.cpp sample in OpenCV samples directory). When (0,0) + is passed (default), it is set to the original imageSize . Setting it to larger value can help you + preserve details in the original image, especially when there is a big radial distortion. + @param balance Sets the new focal length in range between the min focal length and the max focal + length. Balance is in range of [0, 1]. + @param fov_scale Divisor for new focal length. + */ CV_EXPORTS void stereoRectify(InputArray K1, InputArray D1, InputArray K2, InputArray D2, const Size &imageSize, InputArray R, InputArray tvec, OutputArray R1, OutputArray R2, OutputArray P1, OutputArray P2, OutputArray Q, int flags, const Size &newImageSize = Size(), double balance = 0.0, double fov_scale = 1.0); - //! performs stereo calibaration + /** @brief Performs stereo calibration + + @param objectPoints Vector of vectors of the calibration pattern points. + @param imagePoints1 Vector of vectors of the projections of the calibration pattern points, + observed by the first camera. + @param imagePoints2 Vector of vectors of the projections of the calibration pattern points, + observed by the second camera. + @param K1 Input/output first camera matrix: + \f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If + any of fisheye::CALIB\_USE\_INTRINSIC\_GUESS , fisheye::CV\_CALIB\_FIX\_INTRINSIC are specified, + some or all of the matrix components must be initialized. + @param D1 Input/output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$ of 4 elements. + @param K2 Input/output second camera matrix. The parameter is similar to K1 . + @param D2 Input/output lens distortion coefficients for the second camera. The parameter is + similar to D1 . + @param imageSize Size of the image used only to initialize intrinsic camera matrix. + @param R Output rotation matrix between the 1st and the 2nd camera coordinate systems. + @param T Output translation vector between the coordinate systems of the cameras. + @param flags Different flags that may be zero or a combination of the following values: + - **fisheye::CV\_CALIB\_FIX\_INTRINSIC** Fix K1, K2? and D1, D2? so that only R, T matrices + are estimated. + - **fisheye::CALIB\_USE\_INTRINSIC\_GUESS** K1, K2 contains valid initial values of + fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image + center (imageSize is used), and focal distances are computed in a least-squares fashion. + - **fisheye::CALIB\_RECOMPUTE\_EXTRINSIC** Extrinsic will be recomputed after each iteration + of intrinsic optimization. + - **fisheye::CALIB\_CHECK\_COND** The functions will check validity of condition number. + - **fisheye::CALIB\_FIX\_SKEW** Skew coefficient (alpha) is set to zero and stay zero. + - **fisheye::CALIB\_FIX\_K1..4** Selected distortion coefficients are set to zeros and stay + zero. + @param criteria Termination criteria for the iterative optimization algorithm. + */ CV_EXPORTS double stereoCalibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2, InputOutputArray K1, InputOutputArray D1, InputOutputArray K2, InputOutputArray D2, Size imageSize, OutputArray R, OutputArray T, int flags = CALIB_FIX_INTRINSIC, TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 100, DBL_EPSILON)); +//! @} calib3d_fisheye } +//! @} calib3d + } // cv #endif diff --git a/modules/calib3d/include/opencv2/calib3d/calib3d_c.h b/modules/calib3d/include/opencv2/calib3d/calib3d_c.h index c99c25a4d9..2392692389 100644 --- a/modules/calib3d/include/opencv2/calib3d/calib3d_c.h +++ b/modules/calib3d/include/opencv2/calib3d/calib3d_c.h @@ -50,6 +50,10 @@ extern "C" { #endif +/** @addtogroup calib3d_c + @{ + */ + /****************************************************************************************\ * Camera Calibration, Pose Estimation and Stereo * \****************************************************************************************/ @@ -371,6 +375,8 @@ CVAPI(void) cvReprojectImageTo3D( const CvArr* disparityImage, CvArr* _3dImage, const CvMat* Q, int handleMissingValues CV_DEFAULT(0) ); +/** @} calib3d_c */ + #ifdef __cplusplus } // extern "C" diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp index a60a93bddf..31ad66fc16 100644 --- a/modules/features2d/include/opencv2/features2d.hpp +++ b/modules/features2d/include/opencv2/features2d.hpp @@ -46,18 +46,54 @@ #include "opencv2/core.hpp" #include "opencv2/flann/miniflann.hpp" +/** + @defgroup features2d 2D Features Framework + @{ + @defgroup features2d_main Feature Detection and Description + @defgroup features2d_match Descriptor Matchers + +Matchers of keypoint descriptors in OpenCV have wrappers with a common interface that enables you to +easily switch between different algorithms solving the same problem. This section is devoted to +matching descriptors that are represented as vectors in a multidimensional space. All objects that +implement vector descriptor matchers inherit the DescriptorMatcher interface. + +@note + - An example explaining keypoint matching can be found at + opencv\_source\_code/samples/cpp/descriptor\_extractor\_matcher.cpp + - An example on descriptor matching evaluation can be found at + opencv\_source\_code/samples/cpp/detector\_descriptor\_matcher\_evaluation.cpp + - An example on one to many image matching can be found at + opencv\_source\_code/samples/cpp/matching\_to\_many\_images.cpp + + @defgroup features2d_draw Drawing Function of Keypoints and Matches + @defgroup features2d_category Object Categorization + +This section describes approaches based on local 2D features and used to categorize objects. + +@note + - A complete Bag-Of-Words sample can be found at + opencv\_source\_code/samples/cpp/bagofwords\_classification.cpp + - (Python) An example using the features2D framework to perform object categorization can be + found at opencv\_source\_code/samples/python2/find\_obj.py + + @} + */ + namespace cv { +//! @addtogroup features2d +//! @{ + // //! writes vector of keypoints to the file storage // CV_EXPORTS void write(FileStorage& fs, const String& name, const std::vector& keypoints); // //! reads vector of keypoints from the specified file storage node // CV_EXPORTS void read(const FileNode& node, CV_OUT std::vector& keypoints); -/* - * A class filters a vector of keypoints. - * Because now it is difficult to provide a convenient interface for all usage scenarios of the keypoints filter class, - * it has only several needed by now static methods. +/** @brief A class filters a vector of keypoints. + + Because now it is difficult to provide a convenient interface for all usage scenarios of the + keypoints filter class, it has only several needed by now static methods. */ class CV_EXPORTS KeyPointsFilter { @@ -91,44 +127,66 @@ public: /************************************ Base Classes ************************************/ -/* - * Abstract base class for 2D image feature detectors and descriptor extractors - */ +/** @brief Abstract base class for 2D image feature detectors and descriptor extractors +*/ class CV_EXPORTS_W Feature2D : public virtual Algorithm { public: virtual ~Feature2D(); - /* - * Detect keypoints in an image. - * image The image. - * keypoints The detected keypoints. - * mask Mask specifying where to look for keypoints (optional). Must be a char - * matrix with non-zero values in the region of interest. + /** @brief Detects keypoints in an image (first variant) or image set (second variant). + + @param image Image. + @param keypoints The detected keypoints. In the second variant of the method keypoints[i] is a set + of keypoints detected in images[i] . + @param mask Mask specifying where to look for keypoints (optional). It must be a 8-bit integer + matrix with non-zero values in the region of interest. */ CV_WRAP virtual void detect( InputArray image, CV_OUT std::vector& keypoints, InputArray mask=noArray() ); + /** @overload + @param images Image set. + @param keypoints The detected keypoints. In the second variant of the method keypoints[i] is a set + of keypoints detected in images[i] . + @param masks Masks for each input image specifying where to look for keypoints (optional). + masks[i] is a mask for images[i]. + */ virtual void detect( InputArrayOfArrays images, std::vector >& keypoints, InputArrayOfArrays masks=noArray() ); - /* - * Compute the descriptors for a set of keypoints in an image. - * image The image. - * keypoints The input keypoints. Keypoints for which a descriptor cannot be computed are removed. - * descriptors Copmputed descriptors. Row i is the descriptor for keypoint i. + /** @brief Computes the descriptors for a set of keypoints detected in an image (first variant) or image set + (second variant). + + @param image Image. + @param keypoints Input collection of keypoints. Keypoints for which a descriptor cannot be + computed are removed. Sometimes new keypoints can be added, for example: SIFT duplicates keypoint + with several dominant orientations (for each orientation). + @param descriptors Computed descriptors. In the second variant of the method descriptors[i] are + descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the + descriptor for keypoint j-th keypoint. */ CV_WRAP virtual void compute( InputArray image, CV_OUT CV_IN_OUT std::vector& keypoints, OutputArray descriptors ); + /** @overload + + @param images Image set. + @param keypoints Input collection of keypoints. Keypoints for which a descriptor cannot be + computed are removed. Sometimes new keypoints can be added, for example: SIFT duplicates keypoint + with several dominant orientations (for each orientation). + @param descriptors Computed descriptors. In the second variant of the method descriptors[i] are + descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the + descriptor for keypoint j-th keypoint. + */ virtual void compute( InputArrayOfArrays images, std::vector >& keypoints, OutputArrayOfArrays descriptors ); - /* Detects keypoints and computes the descriptors */ + /** Detects keypoints and computes the descriptors */ CV_WRAP virtual void detectAndCompute( InputArray image, InputArray mask, CV_OUT std::vector& keypoints, OutputArray descriptors, @@ -138,33 +196,96 @@ public: CV_WRAP virtual int descriptorType() const; CV_WRAP virtual int defaultNorm() const; - // Return true if detector object is empty + //! Return true if detector object is empty CV_WRAP virtual bool empty() const; }; +/** Feature detectors in OpenCV have wrappers with a common interface that enables you to easily switch +between different algorithms solving the same problem. All objects that implement keypoint detectors +inherit the FeatureDetector interface. */ typedef Feature2D FeatureDetector; + +/** Extractors of keypoint descriptors in OpenCV have wrappers with a common interface that enables you +to easily switch between different algorithms solving the same problem. This section is devoted to +computing descriptors represented as vectors in a multidimensional space. All objects that implement +the vector descriptor extractors inherit the DescriptorExtractor interface. + */ typedef Feature2D DescriptorExtractor; -/*! - BRISK implementation -*/ +//! @addtogroup features2d_main +//! @{ + +/** @brief Class implementing the BRISK keypoint detector and descriptor extractor, described in @cite LCS11. + */ class CV_EXPORTS_W BRISK : public Feature2D { public: + /** @brief The BRISK constructor + + @param thresh FAST/AGAST detection threshold score. + @param octaves detection octaves. Use 0 to do single scale. + @param patternScale apply this scale to the pattern used for sampling the neighbourhood of a + keypoint. + */ CV_WRAP static Ptr create(int thresh=30, int octaves=3, float patternScale=1.0f); - // custom setup + + /** @brief The BRISK constructor for a custom pattern + + @param radiusList defines the radii (in pixels) where the samples around a keypoint are taken (for + keypoint scale 1). + @param numberList defines the number of sampling points on the sampling circle. Must be the same + size as radiusList.. + @param dMax threshold for the short pairings used for descriptor formation (in pixels for keypoint + scale 1). + @param dMin threshold for the long pairings used for orientation determination (in pixels for + keypoint scale 1). + @param indexChange index remapping of the bits. */ CV_WRAP static Ptr create(const std::vector &radiusList, const std::vector &numberList, float dMax=5.85f, float dMin=8.2f, const std::vector& indexChange=std::vector()); }; -/*! - ORB implementation. -*/ +/** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor + +described in @cite RRKB11. The algorithm uses FAST in pyramids to detect stable keypoints, selects +the strongest features using FAST or Harris response, finds their orientation using first-order +moments and computes the descriptors using BRIEF (where the coordinates of random point pairs (or +k-tuples) are rotated according to the measured orientation). + */ class CV_EXPORTS_W ORB : public Feature2D { public: enum { kBytes = 32, HARRIS_SCORE=0, FAST_SCORE=1 }; + /** @brief The ORB constructor + + @param nfeatures The maximum number of features to retain. + @param scaleFactor Pyramid decimation ratio, greater than 1. scaleFactor==2 means the classical + pyramid, where each next level has 4x less pixels than the previous, but such a big scale factor + will degrade feature matching scores dramatically. On the other hand, too close to 1 scale factor + will mean that to cover certain scale range you will need more pyramid levels and so the speed + will suffer. + @param nlevels The number of pyramid levels. The smallest level will have linear size equal to + input\_image\_linear\_size/pow(scaleFactor, nlevels). + @param edgeThreshold This is size of the border where the features are not detected. It should + roughly match the patchSize parameter. + @param firstLevel It should be 0 in the current implementation. + @param WTA\_K The number of points that produce each element of the oriented BRIEF descriptor. The + default value 2 means the BRIEF where we take a random point pair and compare their brightnesses, + so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3 + random points (of course, those point coordinates are random, but they are generated from the + pre-defined seed, so each element of BRIEF descriptor is computed deterministically from the pixel + rectangle), find point of maximum brightness and output index of the winner (0, 1 or 2). Such + output will occupy 2 bits, and therefore it will need a special variant of Hamming distance, + denoted as NORM\_HAMMING2 (2 bits per bin). When WTA\_K=4, we take 4 random points to compute each + bin (that will also occupy 2 bits with possible values 0, 1, 2 or 3). + @param scoreType The default HARRIS\_SCORE means that Harris algorithm is used to rank features + (the score is written to KeyPoint::score and is used to retain best nfeatures features); + FAST\_SCORE is alternative value of the parameter that produces slightly less stable keypoints, + but it is a little faster to compute. + @param patchSize size of the patch used by the oriented BRIEF descriptor. Of course, on smaller + pyramid layers the perceived image area covered by a feature will be larger. + @param fastThreshold + */ CV_WRAP static Ptr create(int nfeatures=500, float scaleFactor=1.2f, int nlevels=8, int edgeThreshold=31, int firstLevel=0, int WTA_K=2, int scoreType=ORB::HARRIS_SCORE, int patchSize=31, int fastThreshold=20); @@ -196,15 +317,16 @@ public: CV_WRAP virtual int getFastThreshold() const = 0; }; -/*! - Maximal Stable Extremal Regions class. +/** @brief Maximally stable extremal region extractor. : - The class implements MSER algorithm introduced by J. Matas. - Unlike SIFT, SURF and many other detectors in OpenCV, this is salient region detector, - not the salient point detector. +The class encapsulates all the parameters of the MSER extraction algorithm (see +). Also see + for useful comments and parameters description. - It returns the regions, each of those is encoded as a contour. -*/ +@note + - (Python) A complete example showing the use of the MSER detector can be found at + opencv\_source\_code/samples/python2/mser.py + */ class CV_EXPORTS_W MSER : public Feature2D { public: @@ -231,13 +353,38 @@ public: CV_WRAP virtual bool getPass2Only() const = 0; }; -//! detects corners using FAST algorithm by E. Rosten +/** @overload */ CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector& keypoints, int threshold, bool nonmaxSuppression=true ); +/** @brief Detects corners using the FAST algorithm + +@param image grayscale image where keypoints (corners) are detected. +@param keypoints keypoints detected on the image. +@param threshold threshold on difference between intensity of the central pixel and pixels of a +circle around this pixel. +@param nonmaxSuppression if true, non-maximum suppression is applied to detected corners +(keypoints). +@param type one of the three neighborhoods as defined in the paper: +FastFeatureDetector::TYPE\_9\_16, FastFeatureDetector::TYPE\_7\_12, +FastFeatureDetector::TYPE\_5\_8 + +Detects corners using the FAST algorithm by @cite Rosten06. + +@note In Python API, types are given as cv2.FAST\_FEATURE\_DETECTOR\_TYPE\_5\_8, +cv2.FAST\_FEATURE\_DETECTOR\_TYPE\_7\_12 and cv2.FAST\_FEATURE\_DETECTOR\_TYPE\_9\_16. For corner +detection, use cv2.FAST.detect() method. + */ CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector& keypoints, int threshold, bool nonmaxSuppression, int type ); +//! @} features2d_main + +//! @addtogroup features2d_main +//! @{ + +/** @brief Wrapping class for feature detection using the FAST method. : + */ class CV_EXPORTS_W FastFeatureDetector : public Feature2D { public: @@ -261,7 +408,8 @@ public: CV_WRAP virtual int getType() const = 0; }; - +/** @brief Wrapping class for feature detection using the goodFeaturesToTrack function. : + */ class CV_EXPORTS_W GFTTDetector : public Feature2D { public: @@ -286,7 +434,37 @@ public: CV_WRAP virtual double getK() const = 0; }; - +/** @brief Class for extracting blobs from an image. : + +The class implements a simple algorithm for extracting blobs from an image: + +1. Convert the source image to binary images by applying thresholding with several thresholds from + minThreshold (inclusive) to maxThreshold (exclusive) with distance thresholdStep between + neighboring thresholds. +2. Extract connected components from every binary image by findContours and calculate their + centers. +3. Group centers from several binary images by their coordinates. Close centers form one group that + corresponds to one blob, which is controlled by the minDistBetweenBlobs parameter. +4. From the groups, estimate final centers of blobs and their radiuses and return as locations and + sizes of keypoints. + +This class performs several filtrations of returned blobs. You should set filterBy\* to true/false +to turn on/off corresponding filtration. Available filtrations: + +- **By color**. This filter compares the intensity of a binary image at the center of a blob to +blobColor. If they differ, the blob is filtered out. Use blobColor = 0 to extract dark blobs +and blobColor = 255 to extract light blobs. +- **By area**. Extracted blobs have an area between minArea (inclusive) and maxArea (exclusive). +- **By circularity**. Extracted blobs have circularity +(\f$\frac{4*\pi*Area}{perimeter * perimeter}\f$) between minCircularity (inclusive) and +maxCircularity (exclusive). +- **By ratio of the minimum inertia to maximum inertia**. Extracted blobs have this ratio +between minInertiaRatio (inclusive) and maxInertiaRatio (exclusive). +- **By convexity**. Extracted blobs have convexity (area / area of blob convex hull) between +minConvexity (inclusive) and maxConvexity (exclusive). + +Default values of parameters are tuned to extract dark circular blobs. + */ class CV_EXPORTS_W SimpleBlobDetector : public Feature2D { public: @@ -322,9 +500,16 @@ public: create(const SimpleBlobDetector::Params ¶meters = SimpleBlobDetector::Params()); }; +//! @} features2d_main + +//! @addtogroup features2d_main +//! @{ -/*! -KAZE implementation +/** @brief Class implementing the KAZE keypoint detector and descriptor extractor, described in @cite ABD12. + +@note AKAZE descriptor can only be used with KAZE or AKAZE keypoints .. [ABD12] KAZE Features. Pablo +F. Alcantarilla, Adrien Bartoli and Andrew J. Davison. In European Conference on Computer Vision +(ECCV), Fiorenze, Italy, October 2012. */ class CV_EXPORTS_W KAZE : public Feature2D { @@ -337,6 +522,16 @@ public: DIFF_CHARBONNIER = 3 }; + /** @brief The KAZE constructor + + @param extended Set to enable extraction of extended (128-byte) descriptor. + @param upright Set to enable use of upright descriptors (non rotation-invariant). + @param threshold Detector response threshold to accept point + @param nOctaves Maximum octave evolution of the image + @param nOctaveLayers Default number of sublevels per scale level + @param diffusivity Diffusivity type. DIFF\_PM\_G1, DIFF\_PM\_G2, DIFF\_WEICKERT or + DIFF\_CHARBONNIER + */ CV_WRAP static Ptr create(bool extended=false, bool upright=false, float threshold = 0.001f, int nOctaves = 4, int nOctaveLayers = 4, @@ -361,9 +556,13 @@ public: CV_WRAP virtual int getDiffusivity() const = 0; }; -/*! -AKAZE implementation -*/ +/** @brief Class implementing the AKAZE keypoint detector and descriptor extractor, described in @cite ANB13. : + +@note AKAZE descriptors can only be used with KAZE or AKAZE keypoints. Try to avoid using *extract* +and *detect* instead of *operator()* due to performance reasons. .. [ANB13] Fast Explicit Diffusion +for Accelerated Features in Nonlinear Scale Spaces. Pablo F. Alcantarilla, Jesús Nuevo and Adrien +Bartoli. In British Machine Vision Conference (BMVC), Bristol, UK, September 2013. + */ class CV_EXPORTS_W AKAZE : public Feature2D { public: @@ -376,6 +575,18 @@ public: DESCRIPTOR_MLDB = 5 }; + /** @brief The AKAZE constructor + + @param descriptor\_type Type of the extracted descriptor: DESCRIPTOR\_KAZE, + DESCRIPTOR\_KAZE\_UPRIGHT, DESCRIPTOR\_MLDB or DESCRIPTOR\_MLDB\_UPRIGHT. + @param descriptor\_size Size of the descriptor in bits. 0 -\> Full size + @param descriptor\_channels Number of channels in the descriptor (1, 2, 3) + @param threshold Detector response threshold to accept point + @param nOctaves Maximum octave evolution of the image + @param nOctaveLayers Default number of sublevels per scale level + @param diffusivity Diffusivity type. DIFF\_PM\_G1, DIFF\_PM\_G2, DIFF\_WEICKERT or + DIFF\_CHARBONNIER + */ CV_WRAP static Ptr create(int descriptor_type=AKAZE::DESCRIPTOR_MLDB, int descriptor_size = 0, int descriptor_channels = 3, float threshold = 0.001f, int nOctaves = 4, @@ -403,6 +614,8 @@ public: CV_WRAP virtual int getDiffusivity() const = 0; }; +//! @} features2d_main + /****************************************************************************************\ * Distance * \****************************************************************************************/ @@ -501,76 +714,153 @@ template struct HammingMultilevel /****************************************************************************************\ * DescriptorMatcher * \****************************************************************************************/ -/* - * Abstract base class for matching two sets of descriptors. + +//! @addtogroup features2d_match +//! @{ + +/** @brief Abstract base class for matching keypoint descriptors. + +It has two groups of match methods: for matching descriptors of an image with another image or with +an image set. */ class CV_EXPORTS_W DescriptorMatcher : public Algorithm { public: virtual ~DescriptorMatcher(); - /* - * Add descriptors to train descriptor collection. - * descriptors Descriptors to add. Each descriptors[i] is a descriptors set from one image. + /** @brief Adds descriptors to train a CPU(trainDescCollectionis) or GPU(utrainDescCollectionis) descriptor + collection. + + If the collection is not empty, the new descriptors are added to existing train descriptors. + + @param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same + train image. */ CV_WRAP virtual void add( InputArrayOfArrays descriptors ); - /* - * Get train descriptors collection. + + /** @brief Returns a constant link to the train descriptor collection trainDescCollection . */ CV_WRAP const std::vector& getTrainDescriptors() const; - /* - * Clear train descriptors collection. + + /** @brief Clears the train descriptor collections. */ CV_WRAP virtual void clear(); - /* - * Return true if there are not train descriptors in collection. + /** @brief Returns true if there are no train descriptors in the both collections. */ CV_WRAP virtual bool empty() const; - /* - * Return true if the matcher supports mask in match methods. + + /** @brief Returns true if the descriptor matcher supports masking permissible matches. */ CV_WRAP virtual bool isMaskSupported() const = 0; - /* - * Train matcher (e.g. train flann index). - * In all methods to match the method train() is run every time before matching. - * Some descriptor matchers (e.g. BruteForceMatcher) have empty implementation - * of this method, other matchers really train their inner structures - * (e.g. FlannBasedMatcher trains flann::Index). So nonempty implementation - * of train() should check the class object state and do traing/retraining - * only if the state requires that (e.g. FlannBasedMatcher trains flann::Index - * if it has not trained yet or if new descriptors have been added to the train - * collection). + /** @brief Trains a descriptor matcher + + Trains a descriptor matcher (for example, the flann index). In all methods to match, the method + train() is run every time before matching. Some descriptor matchers (for example, BruteForceMatcher) + have an empty implementation of this method. Other matchers really train their inner structures (for + example, FlannBasedMatcher trains flann::Index ). */ CV_WRAP virtual void train(); - /* - * Group of methods to match descriptors from image pair. - * Method train() is run in this methods. + + /** @brief Finds the best match for each descriptor from a query set. + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches. If a query descriptor is masked out in mask , no match is added for this + descriptor. So, matches size may be smaller than the query descriptors count. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + + In the first variant of this method, the train descriptors are passed as an input argument. In the + second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is + used. Optional mask (or masks) can be passed to specify which query and training descriptors can be + matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if + mask.at\(i,j) is non-zero. */ - // Find one best match for each query descriptor (if mask is empty). CV_WRAP void match( InputArray queryDescriptors, InputArray trainDescriptors, CV_OUT std::vector& matches, InputArray mask=noArray() ) const; - // Find k best matches for each query descriptor (in increasing order of distances). - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. + + /** @brief Finds the k best matches for each descriptor from a query set. + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param matches Matches. Each matches[i] is k or less matches for the same query descriptor. + @param k Count of best matches found per each query descriptor or less if a query descriptor has + less than k possible matches in total. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + + These extended variants of DescriptorMatcher::match methods find several best matches for each query + descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match + for the details about query and train descriptors. + */ CV_WRAP void knnMatch( InputArray queryDescriptors, InputArray trainDescriptors, CV_OUT std::vector >& matches, int k, InputArray mask=noArray(), bool compactResult=false ) const; - // Find best matches for each query descriptor which have distance less than - // maxDistance (in increasing order of distances). + + /** @brief For each query descriptor, finds the training descriptors not farther than the specified distance. + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Found matches. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + @param maxDistance Threshold for the distance between matched descriptors. Distance means here + metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured + in Pixels)! + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + + For each query descriptor, the methods find such training descriptors that the distance between the + query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are + returned in the distance increasing order. + */ void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors, std::vector >& matches, float maxDistance, InputArray mask=noArray(), bool compactResult=false ) const; - /* - * Group of methods to match descriptors from one image to image set. - * See description of similar methods for matching image pair above. - */ + + /** @overload + @param queryDescriptors Query set of descriptors. + @param matches Matches. If a query descriptor is masked out in mask , no match is added for this + descriptor. So, matches size may be smaller than the query descriptors count. + @param masks Set of masks. Each masks[i] specifies permissible matches between the input query + descriptors and stored train descriptors from the i-th image trainDescCollection[i]. + */ CV_WRAP void match( InputArray queryDescriptors, CV_OUT std::vector& matches, InputArrayOfArrays masks=noArray() ); + /** @overload + @param queryDescriptors Query set of descriptors. + @param matches Matches. Each matches[i] is k or less matches for the same query descriptor. + @param k Count of best matches found per each query descriptor or less if a query descriptor has + less than k possible matches in total. + @param masks Set of masks. Each masks[i] specifies permissible matches between the input query + descriptors and stored train descriptors from the i-th image trainDescCollection[i]. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + */ CV_WRAP void knnMatch( InputArray queryDescriptors, CV_OUT std::vector >& matches, int k, InputArrayOfArrays masks=noArray(), bool compactResult=false ); + /** @overload + @param queryDescriptors Query set of descriptors. + @param matches Found matches. + @param maxDistance Threshold for the distance between matched descriptors. Distance means here + metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured + in Pixels)! + @param masks Set of masks. Each masks[i] specifies permissible matches between the input query + descriptors and stored train descriptors from the i-th image trainDescCollection[i]. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + */ void radiusMatch( InputArray queryDescriptors, std::vector >& matches, float maxDistance, InputArrayOfArrays masks=noArray(), bool compactResult=false ); @@ -579,14 +869,28 @@ public: // Writes matcher object to a file storage virtual void write( FileStorage& ) const; - // Clone the matcher. If emptyTrainData is false the method create deep copy of the object, i.e. copies - // both parameters and train data. If emptyTrainData is true the method create object copy with current parameters - // but with empty train data. + /** @brief Clones the matcher. + + @param emptyTrainData If emptyTrainData is false, the method creates a deep copy of the object, + that is, copies both parameters and train data. If emptyTrainData is true, the method creates an + object copy with the current parameters but with empty train data. + */ virtual Ptr clone( bool emptyTrainData=false ) const = 0; + /** @brief Creates a descriptor matcher of a given type with the default parameters (using default + constructor). + + @param descriptorMatcherType Descriptor matcher type. Now the following matcher types are + supported: + - `BruteForce` (it uses L2 ) + - `BruteForce-L1` + - `BruteForce-Hamming` + - `BruteForce-Hamming(2)` + - `FlannBased` + */ CV_WRAP static Ptr create( const String& descriptorMatcherType ); protected: - /* + /** * Class to work with descriptors from several images as with one merged matrix. * It is used e.g. in FlannBasedMatcher. */ @@ -613,9 +917,9 @@ protected: std::vector startIdxs; }; - // In fact the matching is implemented only by the following two methods. These methods suppose - // that the class object has been trained already. Public match methods call these methods - // after calling train(). + //! In fact the matching is implemented only by the following two methods. These methods suppose + //! that the class object has been trained already. Public match methods call these methods + //! after calling train(). virtual void knnMatchImpl( InputArray queryDescriptors, std::vector >& matches, int k, InputArrayOfArrays masks=noArray(), bool compactResult=false ) = 0; virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector >& matches, float maxDistance, @@ -627,23 +931,33 @@ protected: static Mat clone_op( Mat m ) { return m.clone(); } void checkMasks( InputArrayOfArrays masks, int queryDescriptorsCount ) const; - // Collection of descriptors from train images. + //! Collection of descriptors from train images. std::vector trainDescCollection; std::vector utrainDescCollection; }; -/* - * Brute-force descriptor matcher. - * - * For each descriptor in the first set, this matcher finds the closest - * descriptor in the second set by trying each one. - * - * For efficiency, BruteForceMatcher is templated on the distance metric. - * For float descriptors, a common choice would be cv::L2. +/** @brief Brute-force descriptor matcher. + +For each descriptor in the first set, this matcher finds the closest descriptor in the second set +by trying each one. This descriptor matcher supports masking permissible matches of descriptor +sets. */ class CV_EXPORTS_W BFMatcher : public DescriptorMatcher { public: + /** @brief Brute-force matcher constructor. + + @param normType One of NORM\_L1, NORM\_L2, NORM\_HAMMING, NORM\_HAMMING2. L1 and L2 norms are + preferable choices for SIFT and SURF descriptors, NORM\_HAMMING should be used with ORB, BRISK and + BRIEF, NORM\_HAMMING2 should be used with ORB when WTA\_K==3 or 4 (see ORB::ORB constructor + description). + @param crossCheck If it is false, this is will be default BFMatcher behaviour when it finds the k + nearest neighbors for each query descriptor. If crossCheck==true, then the knnMatch() method with + k=1 will only return pairs (i,j) such that for i-th query descriptor the j-th descriptor in the + matcher's collection is the nearest and vice versa, i.e. the BFMatcher will only return consistent + pairs. Such technique usually produces best results with minimal number of outliers when there are + enough matches. This is alternative to the ratio test, used by D. Lowe in SIFT paper. + */ CV_WRAP BFMatcher( int normType=NORM_L2, bool crossCheck=false ); virtual ~BFMatcher() {} @@ -661,8 +975,12 @@ protected: }; -/* - * Flann based matcher +/** @brief Flann-based descriptor matcher. + +This matcher trains flann::Index\_ on a train descriptor collection and calls its nearest search +methods to find the best matches. So, this matcher may be faster when matching a large train +collection than the brute force matcher. FlannBasedMatcher does not support masking permissible +matches of descriptor sets because flann::Index does not support this. : */ class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher { @@ -700,42 +1018,85 @@ protected: int addedDescCount; }; +//! @} features2d_match /****************************************************************************************\ * Drawing functions * \****************************************************************************************/ + +//! @addtogroup features2d_draw +//! @{ + struct CV_EXPORTS DrawMatchesFlags { - enum{ DEFAULT = 0, // Output image matrix will be created (Mat::create), - // i.e. existing memory of output image may be reused. - // Two source image, matches and single keypoints will be drawn. - // For each keypoint only the center point will be drawn (without - // the circle around keypoint with keypoint size and orientation). - DRAW_OVER_OUTIMG = 1, // Output image matrix will not be created (Mat::create). - // Matches will be drawn on existing content of output image. - NOT_DRAW_SINGLE_POINTS = 2, // Single keypoints will not be drawn. - DRAW_RICH_KEYPOINTS = 4 // For each keypoint the circle around keypoint with keypoint size and - // orientation will be drawn. + enum{ DEFAULT = 0, //!< Output image matrix will be created (Mat::create), + //!< i.e. existing memory of output image may be reused. + //!< Two source image, matches and single keypoints will be drawn. + //!< For each keypoint only the center point will be drawn (without + //!< the circle around keypoint with keypoint size and orientation). + DRAW_OVER_OUTIMG = 1, //!< Output image matrix will not be created (Mat::create). + //!< Matches will be drawn on existing content of output image. + NOT_DRAW_SINGLE_POINTS = 2, //!< Single keypoints will not be drawn. + DRAW_RICH_KEYPOINTS = 4 //!< For each keypoint the circle around keypoint with keypoint size and + //!< orientation will be drawn. }; }; -// Draw keypoints. +/** @brief Draws keypoints. + +@param image Source image. +@param keypoints Keypoints from the source image. +@param outImage Output image. Its content depends on the flags value defining what is drawn in the +output image. See possible flags bit values below. +@param color Color of keypoints. +@param flags Flags setting drawing features. Possible flags bit values are defined by +DrawMatchesFlags. See details above in drawMatches . + +@note +For Python API, flags are modified as cv2.DRAW\_MATCHES\_FLAGS\_DEFAULT, +cv2.DRAW\_MATCHES\_FLAGS\_DRAW\_RICH\_KEYPOINTS, cv2.DRAW\_MATCHES\_FLAGS\_DRAW\_OVER\_OUTIMG, +cv2.DRAW\_MATCHES\_FLAGS\_NOT\_DRAW\_SINGLE\_POINTS + */ CV_EXPORTS_W void drawKeypoints( InputArray image, const std::vector& keypoints, InputOutputArray outImage, const Scalar& color=Scalar::all(-1), int flags=DrawMatchesFlags::DEFAULT ); -// Draws matches of keypints from two images on output image. +/** @brief Draws the found matches of keypoints from two images. + +@param img1 First source image. +@param keypoints1 Keypoints from the first source image. +@param img2 Second source image. +@param keypoints2 Keypoints from the second source image. +@param matches1to2 Matches from the first image to the second one, which means that keypoints1[i] +has a corresponding point in keypoints2[matches[i]] . +@param outImg Output image. Its content depends on the flags value defining what is drawn in the +output image. See possible flags bit values below. +@param matchColor Color of matches (lines and connected keypoints). If matchColor==Scalar::all(-1) +, the color is generated randomly. +@param singlePointColor Color of single keypoints (circles), which means that keypoints do not +have the matches. If singlePointColor==Scalar::all(-1) , the color is generated randomly. +@param matchesMask Mask determining which matches are drawn. If the mask is empty, all matches are +drawn. +@param flags Flags setting drawing features. Possible flags bit values are defined by +DrawMatchesFlags. + +This function draws matches of keypoints from two images in the output image. Match is a line +connecting two keypoints (circles). See cv::DrawMatchesFlags. + */ CV_EXPORTS_W void drawMatches( InputArray img1, const std::vector& keypoints1, InputArray img2, const std::vector& keypoints2, const std::vector& matches1to2, InputOutputArray outImg, const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1), const std::vector& matchesMask=std::vector(), int flags=DrawMatchesFlags::DEFAULT ); +/** @overload */ CV_EXPORTS_AS(drawMatchesKnn) void drawMatches( InputArray img1, const std::vector& keypoints1, InputArray img2, const std::vector& keypoints2, const std::vector >& matches1to2, InputOutputArray outImg, const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1), const std::vector >& matchesMask=std::vector >(), int flags=DrawMatchesFlags::DEFAULT ); +//! @} features2d_draw + /****************************************************************************************\ * Functions to evaluate the feature detectors and [generic] descriptor extractors * \****************************************************************************************/ @@ -755,8 +1116,14 @@ CV_EXPORTS int getNearestPoint( const std::vector& recallPrecisionCurve /****************************************************************************************\ * Bag of visual words * \****************************************************************************************/ -/* - * Abstract base class for training of a 'bag of visual words' vocabulary from a set of descriptors + +//! @addtogroup features2d_category +//! @{ + +/** @brief Abstract base class for training the *bag of visual words* vocabulary from a set of descriptors. + +For details, see, for example, *Visual Categorization with Bags of Keypoints* by Gabriella Csurka, +Christopher R. Dance, Lixin Fan, Jutta Willamowski, Cedric Bray, 2004. : */ class CV_EXPORTS_W BOWTrainer { @@ -764,20 +1131,37 @@ public: BOWTrainer(); virtual ~BOWTrainer(); + /** @brief Adds descriptors to a training set. + + @param descriptors Descriptors to add to a training set. Each row of the descriptors matrix is a + descriptor. + + The training set is clustered using clustermethod to construct the vocabulary. + */ CV_WRAP void add( const Mat& descriptors ); + + /** @brief Returns a training set of descriptors. + */ CV_WRAP const std::vector& getDescriptors() const; + + /** @brief Returns the count of all descriptors stored in the training set. + */ CV_WRAP int descriptorsCount() const; CV_WRAP virtual void clear(); - /* - * Train visual words vocabulary, that is cluster training descriptors and - * compute cluster centers. - * Returns cluster centers. - * - * descriptors Training descriptors computed on images keypoints. - */ + /** @overload */ CV_WRAP virtual Mat cluster() const = 0; + + /** @brief Clusters train descriptors. + + @param descriptors Descriptors to cluster. Each row of the descriptors matrix is a descriptor. + Descriptors are not added to the inner train descriptor set. + + The vocabulary consists of cluster centers. So, this method returns the vocabulary. In the first + variant of the method, train descriptors stored in the object are clustered. In the second variant, + input descriptors are clustered. + */ CV_WRAP virtual Mat cluster( const Mat& descriptors ) const = 0; protected: @@ -785,12 +1169,15 @@ protected: int size; }; -/* - * This is BOWTrainer using cv::kmeans to get vocabulary. +/** @brief kmeans -based class to train visual vocabulary using the *bag of visual words* approach. : */ class CV_EXPORTS_W BOWKMeansTrainer : public BOWTrainer { public: + /** @brief The constructor. + + @see cv::kmeans + */ CV_WRAP BOWKMeansTrainer( int clusterCount, const TermCriteria& termcrit=TermCriteria(), int attempts=3, int flags=KMEANS_PP_CENTERS ); virtual ~BOWKMeansTrainer(); @@ -807,21 +1194,62 @@ protected: int flags; }; -/* - * Class to compute image descriptor using bag of visual words. +/** @brief Class to compute an image descriptor using the *bag of visual words*. + +Such a computation consists of the following steps: + +1. Compute descriptors for a given image and its keypoints set. +2. Find the nearest visual words from the vocabulary for each keypoint descriptor. +3. Compute the bag-of-words image descriptor as is a normalized histogram of vocabulary words +encountered in the image. The i-th bin of the histogram is a frequency of i-th word of the +vocabulary in the given image. */ class CV_EXPORTS_W BOWImgDescriptorExtractor { public: + /** @brief The constructor. + + @param dextractor Descriptor extractor that is used to compute descriptors for an input image and + its keypoints. + @param dmatcher Descriptor matcher that is used to find the nearest word of the trained vocabulary + for each keypoint descriptor of the image. + */ CV_WRAP BOWImgDescriptorExtractor( const Ptr& dextractor, const Ptr& dmatcher ); + /** @overload */ BOWImgDescriptorExtractor( const Ptr& dmatcher ); virtual ~BOWImgDescriptorExtractor(); + /** @brief Sets a visual vocabulary. + + @param vocabulary Vocabulary (can be trained using the inheritor of BOWTrainer ). Each row of the + vocabulary is a visual word (cluster center). + */ CV_WRAP void setVocabulary( const Mat& vocabulary ); + + /** @brief Returns the set vocabulary. + */ CV_WRAP const Mat& getVocabulary() const; + + /** @brief Computes an image descriptor using the set visual vocabulary. + + @param image Image, for which the descriptor is computed. + @param keypoints Keypoints detected in the input image. + @param imgDescriptor Computed output image descriptor. + @param pointIdxsOfClusters Indices of keypoints that belong to the cluster. This means that + pointIdxsOfClusters[i] are keypoint indices that belong to the i -th cluster (word of vocabulary) + returned if it is non-zero. + @param descriptors Descriptors of the image keypoints that are returned if they are non-zero. + */ void compute( InputArray image, std::vector& keypoints, OutputArray imgDescriptor, std::vector >* pointIdxsOfClusters=0, Mat* descriptors=0 ); + /** @overload + @param keypointDescriptors Computed descriptors to match with vocabulary. + @param imgDescriptor Computed output image descriptor. + @param pointIdxsOfClusters Indices of keypoints that belong to the cluster. This means that + pointIdxsOfClusters[i] are keypoint indices that belong to the i -th cluster (word of vocabulary) + returned if it is non-zero. + */ void compute( InputArray keypointDescriptors, OutputArray imgDescriptor, std::vector >* pointIdxsOfClusters=0 ); // compute() is not constant because DescriptorMatcher::match is not constant @@ -829,7 +1257,12 @@ public: CV_WRAP_AS(compute) void compute2( const Mat& image, std::vector& keypoints, CV_OUT Mat& imgDescriptor ) { compute(image,keypoints,imgDescriptor); } + /** @brief Returns an image descriptor size if the vocabulary is set. Otherwise, it returns 0. + */ CV_WRAP int descriptorSize() const; + + /** @brief Returns an image descriptor type. + */ CV_WRAP int descriptorType() const; protected: @@ -838,6 +1271,10 @@ protected: Ptr dmatcher; }; +//! @} features2d_category + +//! @} features2d + } /* namespace cv */ #endif