Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/19153/head
Alexander Alekhin 4 years ago
commit 624d532000
  1. 4
      cmake/OpenCVDetectInferenceEngine.cmake
  2. 2
      doc/js_tutorials/js_setup/js_usage/js_usage.markdown
  3. 223
      modules/calib3d/include/opencv2/calib3d.hpp
  4. 8
      modules/calib3d/test/test_fisheye.cpp
  5. 5
      modules/core/include/opencv2/core/ocl.hpp
  6. 2
      modules/core/src/convert.dispatch.cpp
  7. 50
      modules/core/src/ocl.cpp
  8. 13
      modules/core/src/opencl/halfconvert.cl
  9. 33
      modules/core/src/opengl.cpp
  10. 2
      modules/core/test/test_misc.cpp
  11. 10
      modules/dnn/perf/perf_net.cpp
  12. 2
      modules/dnn/src/darknet/darknet_io.cpp
  13. 26
      modules/dnn/src/layers/convolution_layer.cpp
  14. 2
      modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
  15. 78
      modules/dnn/src/ocl4dnn/src/math_functions.cpp
  16. 88
      modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
  17. 5
      modules/dnn/src/op_inf_engine.hpp
  18. 7
      modules/dnn/src/opencl/conv_spatial_helper.cl
  19. 76
      modules/dnn/src/opencl/gemm_buffer.cl
  20. 2
      modules/dnn/src/tensorflow/tf_importer.cpp
  21. 2
      modules/dnn/test/test_darknet_importer.cpp
  22. 3
      modules/dnn/test/test_onnx_importer.cpp
  23. 4
      modules/imgproc/src/clahe.cpp
  24. 3
      modules/imgproc/src/imgwarp.cpp
  25. 6
      modules/imgproc/src/pyramids.cpp
  26. 17
      modules/videoio/test/test_main.cpp

@ -129,9 +129,9 @@ endif()
if(INF_ENGINE_TARGET) if(INF_ENGINE_TARGET)
if(NOT INF_ENGINE_RELEASE) if(NOT INF_ENGINE_RELEASE)
message(WARNING "InferenceEngine version has not been set, 2021.1 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") message(WARNING "InferenceEngine version has not been set, 2021.2 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
endif() endif()
set(INF_ENGINE_RELEASE "2021010000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") set(INF_ENGINE_RELEASE "2021020000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
set_target_properties(${INF_ENGINE_TARGET} PROPERTIES set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
) )

@ -4,7 +4,7 @@ Using OpenCV.js {#tutorial_js_usage}
Steps Steps
----- -----
In this tutorial, you will learn how to include and start to use `opencv.js` inside a web page. You can get a copy of `opencv.js` from `opencv-{VERSION_NUMBER}-docs.zip` in each [release](https://github.com/opencv/opencv/releases), or simply download the prebuilt script from the online documentations at "https://docs.opencv.org/{VERISON_NUMBER}/opencv.js" (For example, [https://docs.opencv.org/3.4.0/opencv.js](https://docs.opencv.org/3.4.0/opencv.js). Use `master` if you want the latest build). You can also build your own copy by following the tutorial on Build Opencv.js. In this tutorial, you will learn how to include and start to use `opencv.js` inside a web page. You can get a copy of `opencv.js` from `opencv-{VERSION_NUMBER}-docs.zip` in each [release](https://github.com/opencv/opencv/releases), or simply download the prebuilt script from the online documentations at "https://docs.opencv.org/{VERSION_NUMBER}/opencv.js" (For example, [https://docs.opencv.org/3.4.0/opencv.js](https://docs.opencv.org/3.4.0/opencv.js). Use `master` if you want the latest build). You can also build your own copy by following the tutorial on Build Opencv.js.
### Create a web page ### Create a web page

@ -674,15 +674,15 @@ or vector\<Point2f\> .
a vector\<Point2f\> . a vector\<Point2f\> .
@param method Method used to compute a homography matrix. The following methods are possible: @param method Method used to compute a homography matrix. The following methods are possible:
- **0** - a regular method using all the points, i.e., the least squares method - **0** - a regular method using all the points, i.e., the least squares method
- **RANSAC** - RANSAC-based robust method - @ref RANSAC - RANSAC-based robust method
- **LMEDS** - Least-Median robust method - @ref LMEDS - Least-Median robust method
- **RHO** - PROSAC-based robust method - @ref RHO - PROSAC-based robust method
@param ransacReprojThreshold Maximum allowed reprojection error to treat a point pair as an inlier @param ransacReprojThreshold Maximum allowed reprojection error to treat a point pair as an inlier
(used in the RANSAC and RHO methods only). That is, if (used in the RANSAC and RHO methods only). That is, if
\f[\| \texttt{dstPoints} _i - \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \|_2 > \texttt{ransacReprojThreshold}\f] \f[\| \texttt{dstPoints} _i - \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \|_2 > \texttt{ransacReprojThreshold}\f]
then the point \f$i\f$ is considered as an outlier. If srcPoints and dstPoints are measured in pixels, then the point \f$i\f$ is considered as an outlier. If srcPoints and dstPoints are measured in pixels,
it usually makes sense to set this parameter somewhere in the range of 1 to 10. it usually makes sense to set this parameter somewhere in the range of 1 to 10.
@param mask Optional output mask set by a robust method ( RANSAC or LMEDS ). Note that the input @param mask Optional output mask set by a robust method ( RANSAC or LMeDS ). Note that the input
mask values are ignored. mask values are ignored.
@param maxIters The maximum number of RANSAC iterations. @param maxIters The maximum number of RANSAC iterations.
@param confidence Confidence level, between 0 and 1. @param confidence Confidence level, between 0 and 1.
@ -917,37 +917,37 @@ the model coordinate system to the camera coordinate system.
the provided rvec and tvec values as initial approximations of the rotation and translation the provided rvec and tvec values as initial approximations of the rotation and translation
vectors, respectively, and further optimizes them. vectors, respectively, and further optimizes them.
@param flags Method for solving a PnP problem: @param flags Method for solving a PnP problem:
- **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In - @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
this case the function finds such a pose that minimizes reprojection error, that is the sum this case the function finds such a pose that minimizes reprojection error, that is the sum
of squared distances between the observed projections imagePoints and the projected (using of squared distances between the observed projections imagePoints and the projected (using
@ref projectPoints ) objectPoints . @ref projectPoints ) objectPoints .
- **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang - @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). "Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
In this case the function requires exactly four object and image points. In this case the function requires exactly four object and image points.
- **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis - @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). "An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
In this case the function requires exactly four object and image points. In this case the function requires exactly four object and image points.
- **SOLVEPNP_EPNP** Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the - @ref SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the
paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
- **SOLVEPNP_DLS** **Broken implementation. Using this flag will fallback to EPnP.** \n - @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of J. Hesch and S. Roumeliotis. Method is based on the paper of J. Hesch and S. Roumeliotis.
"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). "A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
- **SOLVEPNP_UPNP** **Broken implementation. Using this flag will fallback to EPnP.** \n - @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto, Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
assuming that both have the same value. Then the cameraMatrix is updated with the estimated assuming that both have the same value. Then the cameraMatrix is updated with the estimated
focal length. focal length.
- **SOLVEPNP_IPPE** Method is based on the paper of T. Collins and A. Bartoli. - @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. "Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
- **SOLVEPNP_IPPE_SQUARE** Method is based on the paper of Toby Collins and Adrien Bartoli. - @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. "Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
It requires 4 coplanar object points defined in the following order: It requires 4 coplanar object points defined in the following order:
- point 0: [-squareLength / 2, squareLength / 2, 0] - point 0: [-squareLength / 2, squareLength / 2, 0]
- point 1: [ squareLength / 2, squareLength / 2, 0] - point 1: [ squareLength / 2, squareLength / 2, 0]
- point 2: [ squareLength / 2, -squareLength / 2, 0] - point 2: [ squareLength / 2, -squareLength / 2, 0]
- point 3: [-squareLength / 2, -squareLength / 2, 0] - point 3: [-squareLength / 2, -squareLength / 2, 0]
- **SOLVEPNP_SQPNP** Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the - @ref SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the
Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis20). It requires 3 or more points. Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis20). It requires 3 or more points.
@ -1057,23 +1057,23 @@ a 3D point expressed in the world frame into the camera frame:
- Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of
it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints = it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints =
np.ascontiguousarray(D[:,:2]).reshape((N,1,2)) np.ascontiguousarray(D[:,:2]).reshape((N,1,2))
- The methods **SOLVEPNP_DLS** and **SOLVEPNP_UPNP** cannot be used as the current implementations are - The methods @ref SOLVEPNP_DLS and @ref SOLVEPNP_UPNP cannot be used as the current implementations are
unstable and sometimes give completely wrong results. If you pass one of these two unstable and sometimes give completely wrong results. If you pass one of these two
flags, **SOLVEPNP_EPNP** method will be used instead. flags, @ref SOLVEPNP_EPNP method will be used instead.
- The minimum number of points is 4 in the general case. In the case of **SOLVEPNP_P3P** and **SOLVEPNP_AP3P** - The minimum number of points is 4 in the general case. In the case of @ref SOLVEPNP_P3P and @ref SOLVEPNP_AP3P
methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions
of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error). of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error).
- With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points - With @ref SOLVEPNP_ITERATIVE method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points
are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the
global solution to converge. global solution to converge.
- With **SOLVEPNP_IPPE** input points must be >= 4 and object points must be coplanar. - With @ref SOLVEPNP_IPPE input points must be >= 4 and object points must be coplanar.
- With **SOLVEPNP_IPPE_SQUARE** this is a special case suitable for marker pose estimation. - With @ref SOLVEPNP_IPPE_SQUARE this is a special case suitable for marker pose estimation.
Number of input points must be 4. Object points must be defined in the following order: Number of input points must be 4. Object points must be defined in the following order:
- point 0: [-squareLength / 2, squareLength / 2, 0] - point 0: [-squareLength / 2, squareLength / 2, 0]
- point 1: [ squareLength / 2, squareLength / 2, 0] - point 1: [ squareLength / 2, squareLength / 2, 0]
- point 2: [ squareLength / 2, -squareLength / 2, 0] - point 2: [ squareLength / 2, -squareLength / 2, 0]
- point 3: [-squareLength / 2, -squareLength / 2, 0] - point 3: [-squareLength / 2, -squareLength / 2, 0]
- With **SOLVEPNP_SQPNP** input points must be >= 3 - With @ref SOLVEPNP_SQPNP input points must be >= 3
*/ */
CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints, CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
InputArray cameraMatrix, InputArray distCoeffs, InputArray cameraMatrix, InputArray distCoeffs,
@ -1152,9 +1152,9 @@ assumed.
the model coordinate system to the camera coordinate system. A P3P problem has up to 4 solutions. the model coordinate system to the camera coordinate system. A P3P problem has up to 4 solutions.
@param tvecs Output translation vectors. @param tvecs Output translation vectors.
@param flags Method for solving a P3P problem: @param flags Method for solving a P3P problem:
- **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang - @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). "Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
- **SOLVEPNP_AP3P** Method is based on the paper of T. Ke and S. Roumeliotis. - @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke and S. Roumeliotis.
"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). "An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
The function estimates the object pose given 3 object points, their corresponding image The function estimates the object pose given 3 object points, their corresponding image
@ -1254,39 +1254,39 @@ the model coordinate system to the camera coordinate system.
the provided rvec and tvec values as initial approximations of the rotation and translation the provided rvec and tvec values as initial approximations of the rotation and translation
vectors, respectively, and further optimizes them. vectors, respectively, and further optimizes them.
@param flags Method for solving a PnP problem: @param flags Method for solving a PnP problem:
- **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In - @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
this case the function finds such a pose that minimizes reprojection error, that is the sum this case the function finds such a pose that minimizes reprojection error, that is the sum
of squared distances between the observed projections imagePoints and the projected (using of squared distances between the observed projections imagePoints and the projected (using
projectPoints ) objectPoints . projectPoints ) objectPoints .
- **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang - @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). "Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
In this case the function requires exactly four object and image points. In this case the function requires exactly four object and image points.
- **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis - @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). "An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
In this case the function requires exactly four object and image points. In this case the function requires exactly four object and image points.
- **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the - @ref SOLVEPNP_EPNP Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the
paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
- **SOLVEPNP_DLS** **Broken implementation. Using this flag will fallback to EPnP.** \n - @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis. Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis.
"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). "A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
- **SOLVEPNP_UPNP** **Broken implementation. Using this flag will fallback to EPnP.** \n - @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto, Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto,
F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
assuming that both have the same value. Then the cameraMatrix is updated with the estimated assuming that both have the same value. Then the cameraMatrix is updated with the estimated
focal length. focal length.
- **SOLVEPNP_IPPE** Method is based on the paper of T. Collins and A. Bartoli. - @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. "Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
- **SOLVEPNP_IPPE_SQUARE** Method is based on the paper of Toby Collins and Adrien Bartoli. - @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. "Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
It requires 4 coplanar object points defined in the following order: It requires 4 coplanar object points defined in the following order:
- point 0: [-squareLength / 2, squareLength / 2, 0] - point 0: [-squareLength / 2, squareLength / 2, 0]
- point 1: [ squareLength / 2, squareLength / 2, 0] - point 1: [ squareLength / 2, squareLength / 2, 0]
- point 2: [ squareLength / 2, -squareLength / 2, 0] - point 2: [ squareLength / 2, -squareLength / 2, 0]
- point 3: [-squareLength / 2, -squareLength / 2, 0] - point 3: [-squareLength / 2, -squareLength / 2, 0]
@param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is SOLVEPNP_ITERATIVE @param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE
and useExtrinsicGuess is set to true. and useExtrinsicGuess is set to true.
@param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is SOLVEPNP_ITERATIVE @param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE
and useExtrinsicGuess is set to true. and useExtrinsicGuess is set to true.
@param reprojectionError Optional vector of reprojection error, that is the RMS error @param reprojectionError Optional vector of reprojection error, that is the RMS error
(\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points (\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points
@ -1398,17 +1398,17 @@ a 3D point expressed in the world frame into the camera frame:
- Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of
it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints = it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints =
np.ascontiguousarray(D[:,:2]).reshape((N,1,2)) np.ascontiguousarray(D[:,:2]).reshape((N,1,2))
- The methods **SOLVEPNP_DLS** and **SOLVEPNP_UPNP** cannot be used as the current implementations are - The methods @ref SOLVEPNP_DLS and @ref SOLVEPNP_UPNP cannot be used as the current implementations are
unstable and sometimes give completely wrong results. If you pass one of these two unstable and sometimes give completely wrong results. If you pass one of these two
flags, **SOLVEPNP_EPNP** method will be used instead. flags, @ref SOLVEPNP_EPNP method will be used instead.
- The minimum number of points is 4 in the general case. In the case of **SOLVEPNP_P3P** and **SOLVEPNP_AP3P** - The minimum number of points is 4 in the general case. In the case of @ref SOLVEPNP_P3P and @ref SOLVEPNP_AP3P
methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions
of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error). of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error).
- With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points - With @ref SOLVEPNP_ITERATIVE method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points
are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the
global solution to converge. global solution to converge.
- With **SOLVEPNP_IPPE** input points must be >= 4 and object points must be coplanar. - With @ref SOLVEPNP_IPPE input points must be >= 4 and object points must be coplanar.
- With **SOLVEPNP_IPPE_SQUARE** this is a special case suitable for marker pose estimation. - With @ref SOLVEPNP_IPPE_SQUARE this is a special case suitable for marker pose estimation.
Number of input points must be 4. Object points must be defined in the following order: Number of input points must be 4. Object points must be defined in the following order:
- point 0: [-squareLength / 2, squareLength / 2, 0] - point 0: [-squareLength / 2, squareLength / 2, 0]
- point 1: [ squareLength / 2, squareLength / 2, 0] - point 1: [ squareLength / 2, squareLength / 2, 0]
@ -1448,13 +1448,13 @@ CV_EXPORTS_W Mat initCameraMatrix2D( InputArrayOfArrays objectPoints,
( patternSize = cv::Size(points_per_row,points_per_colum) = cv::Size(columns,rows) ). ( patternSize = cv::Size(points_per_row,points_per_colum) = cv::Size(columns,rows) ).
@param corners Output array of detected corners. @param corners Output array of detected corners.
@param flags Various operation flags that can be zero or a combination of the following values: @param flags Various operation flags that can be zero or a combination of the following values:
- **CALIB_CB_ADAPTIVE_THRESH** Use adaptive thresholding to convert the image to black - @ref CALIB_CB_ADAPTIVE_THRESH Use adaptive thresholding to convert the image to black
and white, rather than a fixed threshold level (computed from the average image brightness). and white, rather than a fixed threshold level (computed from the average image brightness).
- **CALIB_CB_NORMALIZE_IMAGE** Normalize the image gamma with equalizeHist before - @ref CALIB_CB_NORMALIZE_IMAGE Normalize the image gamma with equalizeHist before
applying fixed or adaptive thresholding. applying fixed or adaptive thresholding.
- **CALIB_CB_FILTER_QUADS** Use additional criteria (like contour area, perimeter, - @ref CALIB_CB_FILTER_QUADS Use additional criteria (like contour area, perimeter,
square-like shape) to filter out false quads extracted at the contour retrieval stage. square-like shape) to filter out false quads extracted at the contour retrieval stage.
- **CALIB_CB_FAST_CHECK** Run a fast check on the image that looks for chessboard corners, - @ref CALIB_CB_FAST_CHECK Run a fast check on the image that looks for chessboard corners,
and shortcut the call if none is found. This can drastically speed up the call in the and shortcut the call if none is found. This can drastically speed up the call in the
degenerate condition when no chessboard is observed. degenerate condition when no chessboard is observed.
@ -1665,9 +1665,9 @@ typedef CirclesGridFinderParameters CirclesGridFinderParameters2;
( patternSize = Size(points_per_row, points_per_colum) ). ( patternSize = Size(points_per_row, points_per_colum) ).
@param centers output array of detected centers. @param centers output array of detected centers.
@param flags various operation flags that can be one of the following values: @param flags various operation flags that can be one of the following values:
- **CALIB_CB_SYMMETRIC_GRID** uses symmetric pattern of circles. - @ref CALIB_CB_SYMMETRIC_GRID uses symmetric pattern of circles.
- **CALIB_CB_ASYMMETRIC_GRID** uses asymmetric pattern of circles. - @ref CALIB_CB_ASYMMETRIC_GRID uses asymmetric pattern of circles.
- **CALIB_CB_CLUSTERING** uses a special algorithm for grid detection. It is more robust to - @ref CALIB_CB_CLUSTERING uses a special algorithm for grid detection. It is more robust to
perspective distortions but much more sensitive to background clutter. perspective distortions but much more sensitive to background clutter.
@param blobDetector feature detector that finds blobs like dark circles on light background. @param blobDetector feature detector that finds blobs like dark circles on light background.
If `blobDetector` is NULL then `image` represents Point2f array of candidates. If `blobDetector` is NULL then `image` represents Point2f array of candidates.
@ -1681,7 +1681,7 @@ row). Otherwise, if the function fails to find all the corners or reorder them,
Sample usage of detecting and drawing the centers of circles: : Sample usage of detecting and drawing the centers of circles: :
@code @code
Size patternsize(7,7); //number of centers Size patternsize(7,7); //number of centers
Mat gray = ....; //source image Mat gray = ...; //source image
vector<Point2f> centers; //this will be filled by the detected centers vector<Point2f> centers; //this will be filled by the detected centers
bool patternfound = findCirclesGrid(gray, patternsize, centers); bool patternfound = findCirclesGrid(gray, patternsize, centers);
@ -1720,8 +1720,8 @@ respectively. In the old interface all the vectors of object points from differe
concatenated together. concatenated together.
@param imageSize Size of the image used only to initialize the camera intrinsic matrix. @param imageSize Size of the image used only to initialize the camera intrinsic matrix.
@param cameraMatrix Input/output 3x3 floating-point camera intrinsic matrix @param cameraMatrix Input/output 3x3 floating-point camera intrinsic matrix
\f$\cameramatrix{A}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS \f$\cameramatrix{A}\f$ . If @ref CALIB_USE_INTRINSIC_GUESS
and/or CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be and/or @ref CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be
initialized before calling the function. initialized before calling the function.
@param distCoeffs Input/output vector of distortion coefficients @param distCoeffs Input/output vector of distortion coefficients
\f$\distcoeffs\f$. \f$\distcoeffs\f$.
@ -1744,40 +1744,40 @@ parameters. Order of deviations values: \f$(R_0, T_0, \dotsc , R_{M - 1}, T_{M -
the number of pattern views. \f$R_i, T_i\f$ are concatenated 1x3 vectors. the number of pattern views. \f$R_i, T_i\f$ are concatenated 1x3 vectors.
@param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view. @param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view.
@param flags Different flags that may be zero or a combination of the following values: @param flags Different flags that may be zero or a combination of the following values:
- **CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of - @ref CALIB_USE_INTRINSIC_GUESS cameraMatrix contains valid initial values of
fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image
center ( imageSize is used), and focal distances are computed in a least-squares fashion. center ( imageSize is used), and focal distances are computed in a least-squares fashion.
Note, that if intrinsic parameters are known, there is no need to use this function just to Note, that if intrinsic parameters are known, there is no need to use this function just to
estimate extrinsic parameters. Use solvePnP instead. estimate extrinsic parameters. Use solvePnP instead.
- **CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global - @ref CALIB_FIX_PRINCIPAL_POINT The principal point is not changed during the global
optimization. It stays at the center or at a different location specified when optimization. It stays at the center or at a different location specified when
CALIB_USE_INTRINSIC_GUESS is set too. @ref CALIB_USE_INTRINSIC_GUESS is set too.
- **CALIB_FIX_ASPECT_RATIO** The functions consider only fy as a free parameter. The - @ref CALIB_FIX_ASPECT_RATIO The functions consider only fy as a free parameter. The
ratio fx/fy stays the same as in the input cameraMatrix . When ratio fx/fy stays the same as in the input cameraMatrix . When
CALIB_USE_INTRINSIC_GUESS is not set, the actual input values of fx and fy are @ref CALIB_USE_INTRINSIC_GUESS is not set, the actual input values of fx and fy are
ignored, only their ratio is computed and used further. ignored, only their ratio is computed and used further.
- **CALIB_ZERO_TANGENT_DIST** Tangential distortion coefficients \f$(p_1, p_2)\f$ are set - @ref CALIB_ZERO_TANGENT_DIST Tangential distortion coefficients \f$(p_1, p_2)\f$ are set
to zeros and stay zero. to zeros and stay zero.
- **CALIB_FIX_K1,...,CALIB_FIX_K6** The corresponding radial distortion - @ref CALIB_FIX_K1,..., @ref CALIB_FIX_K6 The corresponding radial distortion
coefficient is not changed during the optimization. If CALIB_USE_INTRINSIC_GUESS is coefficient is not changed during the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is
set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0.
- **CALIB_RATIONAL_MODEL** Coefficients k4, k5, and k6 are enabled. To provide the - @ref CALIB_RATIONAL_MODEL Coefficients k4, k5, and k6 are enabled. To provide the
backward compatibility, this extra flag should be explicitly specified to make the backward compatibility, this extra flag should be explicitly specified to make the
calibration function use the rational model and return 8 coefficients. If the flag is not calibration function use the rational model and return 8 coefficients. If the flag is not
set, the function computes and returns only 5 distortion coefficients. set, the function computes and returns only 5 distortion coefficients.
- **CALIB_THIN_PRISM_MODEL** Coefficients s1, s2, s3 and s4 are enabled. To provide the - @ref CALIB_THIN_PRISM_MODEL Coefficients s1, s2, s3 and s4 are enabled. To provide the
backward compatibility, this extra flag should be explicitly specified to make the backward compatibility, this extra flag should be explicitly specified to make the
calibration function use the thin prism model and return 12 coefficients. If the flag is not calibration function use the thin prism model and return 12 coefficients. If the flag is not
set, the function computes and returns only 5 distortion coefficients. set, the function computes and returns only 5 distortion coefficients.
- **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during - @ref CALIB_FIX_S1_S2_S3_S4 The thin prism distortion coefficients are not changed during
the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
supplied distCoeffs matrix is used. Otherwise, it is set to 0. supplied distCoeffs matrix is used. Otherwise, it is set to 0.
- **CALIB_TILTED_MODEL** Coefficients tauX and tauY are enabled. To provide the - @ref CALIB_TILTED_MODEL Coefficients tauX and tauY are enabled. To provide the
backward compatibility, this extra flag should be explicitly specified to make the backward compatibility, this extra flag should be explicitly specified to make the
calibration function use the tilted sensor model and return 14 coefficients. If the flag is not calibration function use the tilted sensor model and return 14 coefficients. If the flag is not
set, the function computes and returns only 5 distortion coefficients. set, the function computes and returns only 5 distortion coefficients.
- **CALIB_FIX_TAUX_TAUY** The coefficients of the tilted sensor model are not changed during - @ref CALIB_FIX_TAUX_TAUY The coefficients of the tilted sensor model are not changed during
the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
supplied distCoeffs matrix is used. Otherwise, it is set to 0. supplied distCoeffs matrix is used. Otherwise, it is set to 0.
@param criteria Termination criteria for the iterative optimization algorithm. @param criteria Termination criteria for the iterative optimization algorithm.
@ -1789,7 +1789,7 @@ points and their corresponding 2D projections in each view must be specified. Th
by using an object with known geometry and easily detectable feature points. Such an object is by using an object with known geometry and easily detectable feature points. Such an object is
called a calibration rig or calibration pattern, and OpenCV has built-in support for a chessboard as called a calibration rig or calibration pattern, and OpenCV has built-in support for a chessboard as
a calibration rig (see @ref findChessboardCorners). Currently, initialization of intrinsic a calibration rig (see @ref findChessboardCorners). Currently, initialization of intrinsic
parameters (when CALIB_USE_INTRINSIC_GUESS is not set) is only implemented for planar calibration parameters (when @ref CALIB_USE_INTRINSIC_GUESS is not set) is only implemented for planar calibration
patterns (where Z-coordinates of the object points must be all zeros). 3D calibration rigs can also patterns (where Z-coordinates of the object points must be all zeros). 3D calibration rigs can also
be used as long as initial cameraMatrix is provided. be used as long as initial cameraMatrix is provided.
@ -1972,39 +1972,39 @@ second camera coordinate system.
@param F Output fundamental matrix. @param F Output fundamental matrix.
@param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view. @param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view.
@param flags Different flags that may be zero or a combination of the following values: @param flags Different flags that may be zero or a combination of the following values:
- **CALIB_FIX_INTRINSIC** Fix cameraMatrix? and distCoeffs? so that only R, T, E, and F - @ref CALIB_FIX_INTRINSIC Fix cameraMatrix? and distCoeffs? so that only R, T, E, and F
matrices are estimated. matrices are estimated.
- **CALIB_USE_INTRINSIC_GUESS** Optimize some or all of the intrinsic parameters - @ref CALIB_USE_INTRINSIC_GUESS Optimize some or all of the intrinsic parameters
according to the specified flags. Initial values are provided by the user. according to the specified flags. Initial values are provided by the user.
- **CALIB_USE_EXTRINSIC_GUESS** R and T contain valid initial values that are optimized further. - @ref CALIB_USE_EXTRINSIC_GUESS R and T contain valid initial values that are optimized further.
Otherwise R and T are initialized to the median value of the pattern views (each dimension separately). Otherwise R and T are initialized to the median value of the pattern views (each dimension separately).
- **CALIB_FIX_PRINCIPAL_POINT** Fix the principal points during the optimization. - @ref CALIB_FIX_PRINCIPAL_POINT Fix the principal points during the optimization.
- **CALIB_FIX_FOCAL_LENGTH** Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ . - @ref CALIB_FIX_FOCAL_LENGTH Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ .
- **CALIB_FIX_ASPECT_RATIO** Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$ - @ref CALIB_FIX_ASPECT_RATIO Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$
. .
- **CALIB_SAME_FOCAL_LENGTH** Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ . - @ref CALIB_SAME_FOCAL_LENGTH Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ .
- **CALIB_ZERO_TANGENT_DIST** Set tangential distortion coefficients for each camera to - @ref CALIB_ZERO_TANGENT_DIST Set tangential distortion coefficients for each camera to
zeros and fix there. zeros and fix there.
- **CALIB_FIX_K1,...,CALIB_FIX_K6** Do not change the corresponding radial - @ref CALIB_FIX_K1,..., @ref CALIB_FIX_K6 Do not change the corresponding radial
distortion coefficient during the optimization. If CALIB_USE_INTRINSIC_GUESS is set, distortion coefficient during the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set,
the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0.
- **CALIB_RATIONAL_MODEL** Enable coefficients k4, k5, and k6. To provide the backward - @ref CALIB_RATIONAL_MODEL Enable coefficients k4, k5, and k6. To provide the backward
compatibility, this extra flag should be explicitly specified to make the calibration compatibility, this extra flag should be explicitly specified to make the calibration
function use the rational model and return 8 coefficients. If the flag is not set, the function use the rational model and return 8 coefficients. If the flag is not set, the
function computes and returns only 5 distortion coefficients. function computes and returns only 5 distortion coefficients.
- **CALIB_THIN_PRISM_MODEL** Coefficients s1, s2, s3 and s4 are enabled. To provide the - @ref CALIB_THIN_PRISM_MODEL Coefficients s1, s2, s3 and s4 are enabled. To provide the
backward compatibility, this extra flag should be explicitly specified to make the backward compatibility, this extra flag should be explicitly specified to make the
calibration function use the thin prism model and return 12 coefficients. If the flag is not calibration function use the thin prism model and return 12 coefficients. If the flag is not
set, the function computes and returns only 5 distortion coefficients. set, the function computes and returns only 5 distortion coefficients.
- **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during - @ref CALIB_FIX_S1_S2_S3_S4 The thin prism distortion coefficients are not changed during
the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
supplied distCoeffs matrix is used. Otherwise, it is set to 0. supplied distCoeffs matrix is used. Otherwise, it is set to 0.
- **CALIB_TILTED_MODEL** Coefficients tauX and tauY are enabled. To provide the - @ref CALIB_TILTED_MODEL Coefficients tauX and tauY are enabled. To provide the
backward compatibility, this extra flag should be explicitly specified to make the backward compatibility, this extra flag should be explicitly specified to make the
calibration function use the tilted sensor model and return 14 coefficients. If the flag is not calibration function use the tilted sensor model and return 14 coefficients. If the flag is not
set, the function computes and returns only 5 distortion coefficients. set, the function computes and returns only 5 distortion coefficients.
- **CALIB_FIX_TAUX_TAUY** The coefficients of the tilted sensor model are not changed during - @ref CALIB_FIX_TAUX_TAUY The coefficients of the tilted sensor model are not changed during
the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
supplied distCoeffs matrix is used. Otherwise, it is set to 0. supplied distCoeffs matrix is used. Otherwise, it is set to 0.
@param criteria Termination criteria for the iterative optimization algorithm. @param criteria Termination criteria for the iterative optimization algorithm.
@ -2052,10 +2052,10 @@ Besides the stereo-related information, the function can also perform a full cal
the two cameras. However, due to the high dimensionality of the parameter space and noise in the the two cameras. However, due to the high dimensionality of the parameter space and noise in the
input data, the function can diverge from the correct solution. If the intrinsic parameters can be input data, the function can diverge from the correct solution. If the intrinsic parameters can be
estimated with high accuracy for each of the cameras individually (for example, using estimated with high accuracy for each of the cameras individually (for example, using
calibrateCamera ), you are recommended to do so and then pass CALIB_FIX_INTRINSIC flag to the calibrateCamera ), you are recommended to do so and then pass @ref CALIB_FIX_INTRINSIC flag to the
function along with the computed intrinsic parameters. Otherwise, if all the parameters are function along with the computed intrinsic parameters. Otherwise, if all the parameters are
estimated at once, it makes sense to restrict some parameters, for example, pass estimated at once, it makes sense to restrict some parameters, for example, pass
CALIB_SAME_FOCAL_LENGTH and CALIB_ZERO_TANGENT_DIST flags, which is usually a @ref CALIB_SAME_FOCAL_LENGTH and @ref CALIB_ZERO_TANGENT_DIST flags, which is usually a
reasonable assumption. reasonable assumption.
Similarly to calibrateCamera, the function minimizes the total re-projection error for all the Similarly to calibrateCamera, the function minimizes the total re-projection error for all the
@ -2105,7 +2105,7 @@ rectified first camera's image.
camera, i.e. it projects points given in the rectified first camera coordinate system into the camera, i.e. it projects points given in the rectified first camera coordinate system into the
rectified second camera's image. rectified second camera's image.
@param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see @ref reprojectImageTo3D). @param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see @ref reprojectImageTo3D).
@param flags Operation flags that may be zero or CALIB_ZERO_DISPARITY . If the flag is set, @param flags Operation flags that may be zero or @ref CALIB_ZERO_DISPARITY . If the flag is set,
the function makes the principal points of each camera have the same pixel coordinates in the the function makes the principal points of each camera have the same pixel coordinates in the
rectified views. And if the flag is not set, the function may still shift the images in the rectified views. And if the flag is not set, the function may still shift the images in the
horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the
@ -2152,7 +2152,7 @@ coordinates. The function distinguishes the following two cases:
\end{bmatrix} ,\f] \end{bmatrix} ,\f]
where \f$T_x\f$ is a horizontal shift between the cameras and \f$cx_1=cx_2\f$ if where \f$T_x\f$ is a horizontal shift between the cameras and \f$cx_1=cx_2\f$ if
CALIB_ZERO_DISPARITY is set. @ref CALIB_ZERO_DISPARITY is set.
- **Vertical stereo**: the first and the second camera views are shifted relative to each other - **Vertical stereo**: the first and the second camera views are shifted relative to each other
mainly in the vertical direction (and probably a bit in the horizontal direction too). The epipolar mainly in the vertical direction (and probably a bit in the horizontal direction too). The epipolar
@ -2171,7 +2171,7 @@ coordinates. The function distinguishes the following two cases:
\end{bmatrix},\f] \end{bmatrix},\f]
where \f$T_y\f$ is a vertical shift between the cameras and \f$cy_1=cy_2\f$ if where \f$T_y\f$ is a vertical shift between the cameras and \f$cy_1=cy_2\f$ if
CALIB_ZERO_DISPARITY is set. @ref CALIB_ZERO_DISPARITY is set.
As you can see, the first three columns of P1 and P2 will effectively be the new "rectified" camera As you can see, the first three columns of P1 and P2 will effectively be the new "rectified" camera
matrices. The matrices, together with R1 and R2 , can then be passed to initUndistortRectifyMap to matrices. The matrices, together with R1 and R2 , can then be passed to initUndistortRectifyMap to
@ -2680,8 +2680,8 @@ same camera intrinsic matrix. If this assumption does not hold for your use case
to normalized image coordinates, which are valid for the identity camera intrinsic matrix. When to normalized image coordinates, which are valid for the identity camera intrinsic matrix. When
passing these coordinates, pass the identity matrix for this parameter. passing these coordinates, pass the identity matrix for this parameter.
@param method Method for computing an essential matrix. @param method Method for computing an essential matrix.
- **RANSAC** for the RANSAC algorithm. - @ref RANSAC for the RANSAC algorithm.
- **LMEDS** for the LMedS algorithm. - @ref LMEDS for the LMedS algorithm.
@param prob Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level of @param prob Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level of
confidence (probability) that the estimated matrix is correct. confidence (probability) that the estimated matrix is correct.
@param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar @param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar
@ -2713,8 +2713,8 @@ be floating-point (single or double precision).
are feature points from cameras with same focal length and principal point. are feature points from cameras with same focal length and principal point.
@param pp principal point of the camera. @param pp principal point of the camera.
@param method Method for computing a fundamental matrix. @param method Method for computing a fundamental matrix.
- **RANSAC** for the RANSAC algorithm. - @ref RANSAC for the RANSAC algorithm.
- **LMEDS** for the LMedS algorithm. - @ref LMEDS for the LMedS algorithm.
@param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar @param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar
line in pixels, beyond which the point is considered an outlier and is not used for computing the line in pixels, beyond which the point is considered an outlier and is not used for computing the
final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the
@ -3221,8 +3221,8 @@ b_2\\
@param to Second input 2D point set containing \f$(x,y)\f$. @param to Second input 2D point set containing \f$(x,y)\f$.
@param inliers Output vector indicating which points are inliers (1-inlier, 0-outlier). @param inliers Output vector indicating which points are inliers (1-inlier, 0-outlier).
@param method Robust method used to compute transformation. The following methods are possible: @param method Robust method used to compute transformation. The following methods are possible:
- cv::RANSAC - RANSAC-based robust method - @ref RANSAC - RANSAC-based robust method
- cv::LMEDS - Least-Median robust method - @ref LMEDS - Least-Median robust method
RANSAC is the default method. RANSAC is the default method.
@param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider @param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider
a point as an inlier. Applies only to RANSAC. a point as an inlier. Applies only to RANSAC.
@ -3271,8 +3271,8 @@ two 2D point sets.
@param to Second input 2D point set. @param to Second input 2D point set.
@param inliers Output vector indicating which points are inliers. @param inliers Output vector indicating which points are inliers.
@param method Robust method used to compute transformation. The following methods are possible: @param method Robust method used to compute transformation. The following methods are possible:
- cv::RANSAC - RANSAC-based robust method - @ref RANSAC - RANSAC-based robust method
- cv::LMEDS - Least-Median robust method - @ref LMEDS - Least-Median robust method
RANSAC is the default method. RANSAC is the default method.
@param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider @param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider
a point as an inlier. Applies only to RANSAC. a point as an inlier. Applies only to RANSAC.
@ -3772,7 +3772,8 @@ namespace fisheye
CALIB_FIX_K3 = 1 << 6, CALIB_FIX_K3 = 1 << 6,
CALIB_FIX_K4 = 1 << 7, CALIB_FIX_K4 = 1 << 7,
CALIB_FIX_INTRINSIC = 1 << 8, CALIB_FIX_INTRINSIC = 1 << 8,
CALIB_FIX_PRINCIPAL_POINT = 1 << 9 CALIB_FIX_PRINCIPAL_POINT = 1 << 9,
CALIB_ZERO_DISPARITY = 1 << 10
}; };
/** @brief Projects points using fisheye model /** @brief Projects points using fisheye model
@ -3905,7 +3906,7 @@ namespace fisheye
@param image_size Size of the image used only to initialize the camera intrinsic matrix. @param image_size Size of the image used only to initialize the camera intrinsic matrix.
@param K Output 3x3 floating-point camera intrinsic matrix @param K Output 3x3 floating-point camera intrinsic matrix
\f$\cameramatrix{A}\f$ . If \f$\cameramatrix{A}\f$ . If
fisheye::CALIB_USE_INTRINSIC_GUESS/ is specified, some or all of fx, fy, cx, cy must be @ref fisheye::CALIB_USE_INTRINSIC_GUESS is specified, some or all of fx, fy, cx, cy must be
initialized before calling the function. initialized before calling the function.
@param D Output vector of distortion coefficients \f$\distcoeffsfisheye\f$. @param D Output vector of distortion coefficients \f$\distcoeffsfisheye\f$.
@param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view. @param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view.
@ -3915,17 +3916,17 @@ namespace fisheye
position of the calibration pattern in the k-th pattern view (k=0.. *M* -1). position of the calibration pattern in the k-th pattern view (k=0.. *M* -1).
@param tvecs Output vector of translation vectors estimated for each pattern view. @param tvecs Output vector of translation vectors estimated for each pattern view.
@param flags Different flags that may be zero or a combination of the following values: @param flags Different flags that may be zero or a combination of the following values:
- **fisheye::CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of - @ref fisheye::CALIB_USE_INTRINSIC_GUESS cameraMatrix contains valid initial values of
fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image
center ( imageSize is used), and focal distances are computed in a least-squares fashion. center ( imageSize is used), and focal distances are computed in a least-squares fashion.
- **fisheye::CALIB_RECOMPUTE_EXTRINSIC** Extrinsic will be recomputed after each iteration - @ref fisheye::CALIB_RECOMPUTE_EXTRINSIC Extrinsic will be recomputed after each iteration
of intrinsic optimization. of intrinsic optimization.
- **fisheye::CALIB_CHECK_COND** The functions will check validity of condition number. - @ref fisheye::CALIB_CHECK_COND The functions will check validity of condition number.
- **fisheye::CALIB_FIX_SKEW** Skew coefficient (alpha) is set to zero and stay zero. - @ref fisheye::CALIB_FIX_SKEW Skew coefficient (alpha) is set to zero and stay zero.
- **fisheye::CALIB_FIX_K1..fisheye::CALIB_FIX_K4** Selected distortion coefficients - @ref fisheye::CALIB_FIX_K1,..., @ref fisheye::CALIB_FIX_K4 Selected distortion coefficients
are set to zeros and stay zero. are set to zeros and stay zero.
- **fisheye::CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global - @ref fisheye::CALIB_FIX_PRINCIPAL_POINT The principal point is not changed during the global
optimization. It stays at the center or at a different location specified when CALIB_USE_INTRINSIC_GUESS is set too. optimization. It stays at the center or at a different location specified when @ref fisheye::CALIB_USE_INTRINSIC_GUESS is set too.
@param criteria Termination criteria for the iterative optimization algorithm. @param criteria Termination criteria for the iterative optimization algorithm.
*/ */
CV_EXPORTS_W double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, const Size& image_size, CV_EXPORTS_W double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, const Size& image_size,
@ -3949,7 +3950,7 @@ optimization. It stays at the center or at a different location specified when C
@param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second @param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second
camera. camera.
@param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ). @param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ).
@param flags Operation flags that may be zero or CALIB_ZERO_DISPARITY . If the flag is set, @param flags Operation flags that may be zero or @ref fisheye::CALIB_ZERO_DISPARITY . If the flag is set,
the function makes the principal points of each camera have the same pixel coordinates in the the function makes the principal points of each camera have the same pixel coordinates in the
rectified views. And if the flag is not set, the function may still shift the images in the rectified views. And if the flag is not set, the function may still shift the images in the
horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the
@ -3975,7 +3976,7 @@ optimization. It stays at the center or at a different location specified when C
observed by the second camera. observed by the second camera.
@param K1 Input/output first camera intrinsic matrix: @param K1 Input/output first camera intrinsic matrix:
\f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If \f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If
any of fisheye::CALIB_USE_INTRINSIC_GUESS , fisheye::CALIB_FIX_INTRINSIC are specified, any of @ref fisheye::CALIB_USE_INTRINSIC_GUESS , @ref fisheye::CALIB_FIX_INTRINSIC are specified,
some or all of the matrix components must be initialized. some or all of the matrix components must be initialized.
@param D1 Input/output vector of distortion coefficients \f$\distcoeffsfisheye\f$ of 4 elements. @param D1 Input/output vector of distortion coefficients \f$\distcoeffsfisheye\f$ of 4 elements.
@param K2 Input/output second camera intrinsic matrix. The parameter is similar to K1 . @param K2 Input/output second camera intrinsic matrix. The parameter is similar to K1 .
@ -3985,16 +3986,16 @@ optimization. It stays at the center or at a different location specified when C
@param R Output rotation matrix between the 1st and the 2nd camera coordinate systems. @param R Output rotation matrix between the 1st and the 2nd camera coordinate systems.
@param T Output translation vector between the coordinate systems of the cameras. @param T Output translation vector between the coordinate systems of the cameras.
@param flags Different flags that may be zero or a combination of the following values: @param flags Different flags that may be zero or a combination of the following values:
- **fisheye::CALIB_FIX_INTRINSIC** Fix K1, K2? and D1, D2? so that only R, T matrices - @ref fisheye::CALIB_FIX_INTRINSIC Fix K1, K2? and D1, D2? so that only R, T matrices
are estimated. are estimated.
- **fisheye::CALIB_USE_INTRINSIC_GUESS** K1, K2 contains valid initial values of - @ref fisheye::CALIB_USE_INTRINSIC_GUESS K1, K2 contains valid initial values of
fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image
center (imageSize is used), and focal distances are computed in a least-squares fashion. center (imageSize is used), and focal distances are computed in a least-squares fashion.
- **fisheye::CALIB_RECOMPUTE_EXTRINSIC** Extrinsic will be recomputed after each iteration - @ref fisheye::CALIB_RECOMPUTE_EXTRINSIC Extrinsic will be recomputed after each iteration
of intrinsic optimization. of intrinsic optimization.
- **fisheye::CALIB_CHECK_COND** The functions will check validity of condition number. - @ref fisheye::CALIB_CHECK_COND The functions will check validity of condition number.
- **fisheye::CALIB_FIX_SKEW** Skew coefficient (alpha) is set to zero and stay zero. - @ref fisheye::CALIB_FIX_SKEW Skew coefficient (alpha) is set to zero and stay zero.
- **fisheye::CALIB_FIX_K1..4** Selected distortion coefficients are set to zeros and stay - @ref fisheye::CALIB_FIX_K1,..., @ref fisheye::CALIB_FIX_K4 Selected distortion coefficients are set to zeros and stay
zero. zero.
@param criteria Termination criteria for the iterative optimization algorithm. @param criteria Termination criteria for the iterative optimization algorithm.
*/ */

@ -492,6 +492,12 @@ TEST_F(fisheyeTest, EstimateUncertainties)
TEST_F(fisheyeTest, stereoRectify) TEST_F(fisheyeTest, stereoRectify)
{ {
// For consistency purposes
CV_StaticAssert(
static_cast<int>(cv::CALIB_ZERO_DISPARITY) == static_cast<int>(cv::fisheye::CALIB_ZERO_DISPARITY),
"For the purpose of continuity the following should be true: cv::CALIB_ZERO_DISPARITY == cv::fisheye::CALIB_ZERO_DISPARITY"
);
const std::string folder =combine(datasets_repository_path, "calib-3_stereo_from_JY"); const std::string folder =combine(datasets_repository_path, "calib-3_stereo_from_JY");
cv::Size calibration_size = this->imageSize, requested_size = calibration_size; cv::Size calibration_size = this->imageSize, requested_size = calibration_size;
@ -504,7 +510,7 @@ TEST_F(fisheyeTest, stereoRectify)
double balance = 0.0, fov_scale = 1.1; double balance = 0.0, fov_scale = 1.1;
cv::Mat R1, R2, P1, P2, Q; cv::Mat R1, R2, P1, P2, Q;
cv::fisheye::stereoRectify(K1, D1, K2, D2, calibration_size, theR, theT, R1, R2, P1, P2, Q, cv::fisheye::stereoRectify(K1, D1, K2, D2, calibration_size, theR, theT, R1, R2, P1, P2, Q,
cv::CALIB_ZERO_DISPARITY, requested_size, balance, fov_scale); cv::fisheye::CALIB_ZERO_DISPARITY, requested_size, balance, fov_scale);
// Collected with these CMake flags: -DWITH_IPP=OFF -DCV_ENABLE_INTRINSICS=OFF -DCV_DISABLE_OPTIMIZATION=ON -DCMAKE_BUILD_TYPE=Debug // Collected with these CMake flags: -DWITH_IPP=OFF -DCV_ENABLE_INTRINSICS=OFF -DCV_DISABLE_OPTIMIZATION=ON -DCMAKE_BUILD_TYPE=Debug
cv::Matx33d R1_ref( cv::Matx33d R1_ref(

@ -626,7 +626,12 @@ public:
String name() const; String name() const;
String vendor() const; String vendor() const;
/// See CL_PLATFORM_VERSION
String version() const; String version() const;
int versionMajor() const;
int versionMinor() const;
int deviceNumber() const; int deviceNumber() const;
void getDevice(Device& device, int d) const; void getDevice(Device& device, int d) const;

@ -154,7 +154,7 @@ static bool ocl_convertFp16( InputArray _src, OutputArray _dst, int sdepth, int
sdepth == CV_32F ? "half" : "float", sdepth == CV_32F ? "half" : "float",
rowsPerWI, rowsPerWI,
sdepth == CV_32F ? " -D FLOAT_TO_HALF " : ""); sdepth == CV_32F ? " -D FLOAT_TO_HALF " : "");
ocl::Kernel k("convertFp16", ocl::core::halfconvert_oclsrc, build_opt); ocl::Kernel k(sdepth == CV_32F ? "convertFp16_FP32_to_FP16" : "convertFp16_FP16_to_FP32", ocl::core::halfconvert_oclsrc, build_opt);
if (k.empty()) if (k.empty())
return false; return false;

@ -1499,25 +1499,27 @@ Platform& Platform::getDefault()
/////////////////////////////////////// Device //////////////////////////////////////////// /////////////////////////////////////// Device ////////////////////////////////////////////
// deviceVersion has format // Version has format:
// OpenCL<space><major_version.minor_version><space><vendor-specific information> // OpenCL<space><major_version.minor_version><space><vendor-specific information>
// by specification // by specification
// http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
// http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor) // https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetPlatformInfo.html
// https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetPlatformInfo.html
static void parseOpenCLVersion(const String &version, int &major, int &minor)
{ {
major = minor = 0; major = minor = 0;
if (10 >= deviceVersion.length()) if (10 >= version.length())
return; return;
const char *pstr = deviceVersion.c_str(); const char *pstr = version.c_str();
if (0 != strncmp(pstr, "OpenCL ", 7)) if (0 != strncmp(pstr, "OpenCL ", 7))
return; return;
size_t ppos = deviceVersion.find('.', 7); size_t ppos = version.find('.', 7);
if (String::npos == ppos) if (String::npos == ppos)
return; return;
String temp = deviceVersion.substr(7, ppos - 7); String temp = version.substr(7, ppos - 7);
major = atoi(temp.c_str()); major = atoi(temp.c_str());
temp = deviceVersion.substr(ppos + 1); temp = version.substr(ppos + 1);
minor = atoi(temp.c_str()); minor = atoi(temp.c_str());
} }
@ -1555,7 +1557,7 @@ struct Device::Impl
addressBits_ = getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS); addressBits_ = getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS);
String deviceVersion_ = getStrProp(CL_DEVICE_VERSION); String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_); parseOpenCLVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
size_t pos = 0; size_t pos = 0;
while (pos < extensions_.size()) while (pos < extensions_.size())
@ -3529,6 +3531,15 @@ bool Kernel::empty() const
return ptr() == 0; return ptr() == 0;
} }
static cv::String dumpValue(size_t sz, const void* p)
{
if (sz == 4)
return cv::format("%d / %uu / 0x%08x / %g", *(int*)p, *(int*)p, *(int*)p, *(float*)p);
if (sz == 8)
return cv::format("%lld / %lluu / 0x%16llx / %g", *(long long*)p, *(long long*)p, *(long long*)p, *(double*)p);
return cv::format("%p", p);
}
int Kernel::set(int i, const void* value, size_t sz) int Kernel::set(int i, const void* value, size_t sz)
{ {
if (!p || !p->handle) if (!p || !p->handle)
@ -3539,7 +3550,7 @@ int Kernel::set(int i, const void* value, size_t sz)
p->cleanupUMats(); p->cleanupUMats();
cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value); cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%p)", p->name.c_str(), (int)i, (int)sz, (void*)value).c_str()); CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%s)", p->name.c_str(), (int)i, (int)sz, dumpValue(sz, value).c_str()).c_str());
if (retval != CL_SUCCESS) if (retval != CL_SUCCESS)
return -1; return -1;
return i+1; return i+1;
@ -6566,6 +6577,9 @@ struct PlatformInfo::Impl
refcount = 1; refcount = 1;
handle = *(cl_platform_id*)id; handle = *(cl_platform_id*)id;
getDevices(devices, handle); getDevices(devices, handle);
version_ = getStrProp(CL_PLATFORM_VERSION);
parseOpenCLVersion(version_, versionMajor_, versionMinor_);
} }
String getStrProp(cl_platform_info prop) const String getStrProp(cl_platform_info prop) const
@ -6579,6 +6593,10 @@ struct PlatformInfo::Impl
IMPLEMENT_REFCOUNTABLE(); IMPLEMENT_REFCOUNTABLE();
std::vector<cl_device_id> devices; std::vector<cl_device_id> devices;
cl_platform_id handle; cl_platform_id handle;
String version_;
int versionMajor_;
int versionMinor_;
}; };
PlatformInfo::PlatformInfo() PlatformInfo::PlatformInfo()
@ -6641,7 +6659,19 @@ String PlatformInfo::vendor() const
String PlatformInfo::version() const String PlatformInfo::version() const
{ {
return p ? p->getStrProp(CL_PLATFORM_VERSION) : String(); return p ? p->version_ : String();
}
int PlatformInfo::versionMajor() const
{
CV_Assert(p);
return p->versionMajor_;
}
int PlatformInfo::versionMinor() const
{
CV_Assert(p);
return p->versionMinor_;
} }
static void getPlatforms(std::vector<cl_platform_id>& platforms) static void getPlatforms(std::vector<cl_platform_id>& platforms)

@ -47,8 +47,17 @@
#endif #endif
#endif #endif
__kernel void convertFp16(__global const uchar * srcptr, int src_step, int src_offset, __kernel void
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols) #ifdef FLOAT_TO_HALF
convertFp16_FP32_to_FP16
#else
convertFp16_FP16_to_FP32
#endif
(
__global const uchar * srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset,
int dst_rows, int dst_cols
)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y0 = get_global_id(1) * rowsPerWI; int y0 = get_global_id(1) * rowsPerWI;

@ -1575,6 +1575,7 @@ void cv::ogl::render(const ogl::Arrays& arr, InputArray indices, int mode, Scala
// CL-GL Interoperability // CL-GL Interoperability
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
# include "opencv2/core/opencl/runtime/opencl_core.hpp"
# include "opencv2/core/opencl/runtime/opencl_gl.hpp" # include "opencv2/core/opencl/runtime/opencl_gl.hpp"
# ifdef cl_khr_gl_sharing # ifdef cl_khr_gl_sharing
# define HAVE_OPENCL_OPENGL_SHARING # define HAVE_OPENCL_OPENGL_SHARING
@ -1595,6 +1596,34 @@ void cv::ogl::render(const ogl::Arrays& arr, InputArray indices, int mode, Scala
namespace cv { namespace ogl { namespace cv { namespace ogl {
#if defined(HAVE_OPENCL) && defined(HAVE_OPENGL) && defined(HAVE_OPENCL_OPENGL_SHARING)
// Check to avoid crash in OpenCL runtime: https://github.com/opencv/opencv/issues/5209
static void checkOpenCLVersion()
{
using namespace cv::ocl;
const Device& device = Device::getDefault();
//CV_Assert(!device.empty());
cl_device_id dev = (cl_device_id)device.ptr();
CV_Assert(dev);
cl_platform_id platform_id = 0;
size_t sz = 0;
cl_int status = clGetDeviceInfo(dev, CL_DEVICE_PLATFORM, sizeof(platform_id), &platform_id, &sz);
CV_Assert(status == CL_SUCCESS && sz == sizeof(cl_platform_id));
CV_Assert(platform_id);
PlatformInfo pi(&platform_id);
int versionMajor = pi.versionMajor();
int versionMinor = pi.versionMinor();
if (versionMajor < 1 || (versionMajor == 1 && versionMinor <= 1))
CV_Error_(cv::Error::OpenCLApiCallError,
("OpenCL: clCreateFromGLTexture requires OpenCL 1.2+ version: %d.%d - %s (%s)",
versionMajor, versionMinor, pi.name().c_str(), pi.version().c_str())
);
}
#endif
namespace ocl { namespace ocl {
Context& initializeContextFromGL() Context& initializeContextFromGL()
@ -1719,6 +1748,8 @@ void convertToGLTexture2D(InputArray src, Texture2D& texture)
Context& ctx = Context::getDefault(); Context& ctx = Context::getDefault();
cl_context context = (cl_context)ctx.ptr(); cl_context context = (cl_context)ctx.ptr();
checkOpenCLVersion(); // clCreateFromGLTexture requires OpenCL 1.2
UMat u = src.getUMat(); UMat u = src.getUMat();
// TODO Add support for roi // TODO Add support for roi
@ -1777,6 +1808,8 @@ void convertFromGLTexture2D(const Texture2D& texture, OutputArray dst)
Context& ctx = Context::getDefault(); Context& ctx = Context::getDefault();
cl_context context = (cl_context)ctx.ptr(); cl_context context = (cl_context)ctx.ptr();
checkOpenCLVersion(); // clCreateFromGLTexture requires OpenCL 1.2
// TODO Need to specify ACCESS_WRITE here somehow to prevent useless data copying! // TODO Need to specify ACCESS_WRITE here somehow to prevent useless data copying!
dst.create(texture.size(), textureType); dst.create(texture.size(), textureType);
UMat u = dst.getUMat(); UMat u = dst.getUMat();

@ -189,7 +189,7 @@ TEST(Core_OutputArrayCreate, _13772)
TEST(Core_String, find_last_of__with__empty_string) TEST(Core_String, find_last_of__with__empty_string)
{ {
cv::String s; cv::String s;
size_t p = s.find_last_of("q", 0); size_t p = s.find_last_of('q', 0);
// npos is not exported: EXPECT_EQ(cv::String::npos, p); // npos is not exported: EXPECT_EQ(cv::String::npos, p);
EXPECT_EQ(std::string::npos, p); EXPECT_EQ(std::string::npos, p);
} }

@ -206,7 +206,7 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("Test is disabled in OpenVINO 2020.4"); throw SkipTestException("Test is disabled in OpenVINO 2020.4");
#endif #endif
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000) // nGraph compilation failure #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000) // nGraph compilation failure
if (target == DNN_TARGET_MYRIAD) if (target == DNN_TARGET_MYRIAD)
throw SkipTestException(""); throw SkipTestException("");
#endif #endif
@ -241,7 +241,7 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv4_tiny)
{ {
if (backend == DNN_BACKEND_HALIDE) if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException(""); throw SkipTestException("");
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000) // nGraph compilation failure #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000) // nGraph compilation failure
if (target == DNN_TARGET_MYRIAD) if (target == DNN_TARGET_MYRIAD)
throw SkipTestException(""); throw SkipTestException("");
#endif #endif
@ -276,9 +276,9 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
throw SkipTestException("Test is disabled in OpenVINO 2019R2"); throw SkipTestException("Test is disabled in OpenVINO 2019R2");
#endif #endif
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000) #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) if (target == DNN_TARGET_MYRIAD)
throw SkipTestException("Test is disabled in OpenVINO 2021.1 / MYRIAD"); throw SkipTestException("Test is disabled in OpenVINO 2021.1+ / MYRIAD");
#endif #endif
if (backend == DNN_BACKEND_HALIDE || if (backend == DNN_BACKEND_HALIDE ||
(backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) || (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) ||

@ -620,7 +620,7 @@ namespace cv {
// read section // read section
read_net = false; read_net = false;
++layers_counter; ++layers_counter;
const size_t layer_type_size = line.find("]") - 1; const size_t layer_type_size = line.find(']') - 1;
CV_Assert(layer_type_size < line.size()); CV_Assert(layer_type_size < line.size());
std::string layer_type = line.substr(1, layer_type_size); std::string layer_type = line.substr(1, layer_type_size);
net->layers_cfg[layers_counter]["layer_type"] = layer_type; net->layers_cfg[layers_counter]["layer_type"] = layer_type;

@ -1688,16 +1688,7 @@ public:
umat_blobs.resize(n); umat_blobs.resize(n);
for (size_t i = 0; i < n; i++) for (size_t i = 0; i < n; i++)
{ {
if (use_half) inputs[i + 1].copyTo(umat_blobs[i]);
{
Mat matFP32;
convertFp16(inputs[i + 1], matFP32);
matFP32.copyTo(umat_blobs[i]);
}
else
{
inputs[i + 1].copyTo(umat_blobs[i]);
}
} }
inputs.resize(1); inputs.resize(1);
} }
@ -1708,7 +1699,10 @@ public:
umat_blobs.resize(n); umat_blobs.resize(n);
for (size_t i = 0; i < n; i++) for (size_t i = 0; i < n; i++)
{ {
blobs[i].copyTo(umat_blobs[i]); if (use_half)
convertFp16(blobs[i], umat_blobs[i]);
else
blobs[i].copyTo(umat_blobs[i]);
} }
} }
@ -1764,14 +1758,20 @@ public:
if (fusedWeights) if (fusedWeights)
{ {
weightsMat.copyTo(umat_blobs[0]); if (use_half)
convertFp16(weightsMat, umat_blobs[0]);
else
weightsMat.copyTo(umat_blobs[0]);
fusedWeights = false; fusedWeights = false;
} }
if (fusedBias) if (fusedBias)
{ {
if ( umat_blobs.size() < 2 ) if ( umat_blobs.size() < 2 )
umat_blobs.resize(2); umat_blobs.resize(2);
umat_blobs[1] = UMat(biasvec, true); if (use_half)
convertFp16(Mat(biasvec, true), umat_blobs[1]);
else
Mat(biasvec, true).copyTo(umat_blobs[1]);
convolutionOp->setBias(true); convolutionOp->setBias(true);
fusedBias = false; fusedBias = false;
} }

@ -274,8 +274,6 @@ class OCL4DNNConvSpatial
int32_t group_; int32_t group_;
bool bias_term_; bool bias_term_;
UMat swizzled_weights_umat; UMat swizzled_weights_umat;
UMat weights_half;
UMat bias_half;
UMat bottom_data2_; UMat bottom_data2_;
int32_t bottom_index_; int32_t bottom_index_;

@ -88,13 +88,13 @@ ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset,
size_t global_copy[2]; size_t global_copy[2];
global_copy[0] = width; global_copy[0] = width;
global_copy[1] = height; global_copy[1] = height;
oclk_gemm_copy.set(0, ocl::KernelArg::PtrReadOnly(buffer)); oclk_gemm_copy
oclk_gemm_copy.set(1, image); .args(
oclk_gemm_copy.set(2, offset); ocl::KernelArg::PtrReadOnly(buffer),
oclk_gemm_copy.set(3, width); image, offset,
oclk_gemm_copy.set(4, height); width, height,
oclk_gemm_copy.set(5, ld); ld)
oclk_gemm_copy.run(2, global_copy, NULL, false); .run(2, global_copy, NULL, false);
} }
} else { } else {
if (!padding) if (!padding)
@ -112,13 +112,13 @@ ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset,
global_copy[0] = padded_width; global_copy[0] = padded_width;
global_copy[1] = padded_height; global_copy[1] = padded_height;
oclk_gemm_copy.set(0, ocl::KernelArg::PtrReadOnly(buffer)); oclk_gemm_copy
oclk_gemm_copy.set(1, image); .args(
oclk_gemm_copy.set(2, offset); ocl::KernelArg::PtrReadOnly(buffer),
oclk_gemm_copy.set(3, width); image, offset,
oclk_gemm_copy.set(4, height); width, height,
oclk_gemm_copy.set(5, ld); ld)
.run(2, global_copy, NULL, false);
oclk_gemm_copy.run(2, global_copy, NULL, false); oclk_gemm_copy.run(2, global_copy, NULL, false);
} }
} }
@ -465,8 +465,12 @@ static bool ocl4dnnFastBufferGEMM(const CBLAS_TRANSPOSE TransA,
kernel_name += "_float"; kernel_name += "_float";
} }
bool isBetaZero = beta == 0;
String opts = format("-DTYPE=%d", halfPrecisionMode ? TYPE_HALF : TYPE_FLOAT); String opts = format("-DTYPE=%d", halfPrecisionMode ? TYPE_HALF : TYPE_FLOAT);
ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_buffer_oclsrc, opts); if (isBetaZero)
opts += " -DZERO_BETA=1";
size_t local[2] = {}; size_t local[2] = {};
size_t global[2] = {}; size_t global[2] = {};
if (TransA == CblasNoTrans && TransB != CblasNoTrans && is_small_batch) { if (TransA == CblasNoTrans && TransB != CblasNoTrans && is_small_batch) {
@ -496,27 +500,37 @@ static bool ocl4dnnFastBufferGEMM(const CBLAS_TRANSPOSE TransA,
local[1] = ly; local[1] = ly;
} }
int arg_idx = 0;
oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(A));
oclk_gemm_float.set(arg_idx++, offA);
oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(B));
oclk_gemm_float.set(arg_idx++, offB);
oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrWriteOnly(C));
oclk_gemm_float.set(arg_idx++, offC);
oclk_gemm_float.set(arg_idx++, M);
oclk_gemm_float.set(arg_idx++, N);
oclk_gemm_float.set(arg_idx++, K);
oclk_gemm_float.set(arg_idx++, (float)alpha);
oclk_gemm_float.set(arg_idx++, (float)beta);
bool ret = true; bool ret = true;
if (TransB == CblasNoTrans || TransA != CblasNoTrans) { if (TransB == CblasNoTrans || TransA != CblasNoTrans)
{
// _NN_
int stride = 256; int stride = 256;
for (int start_index = 0; start_index < K; start_index += stride) { for (int start_index = 0; start_index < K; start_index += stride) {
oclk_gemm_float.set(arg_idx, start_index); ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_buffer_oclsrc, opts);
ret = oclk_gemm_float.run(2, global, local, false); oclk_gemm_float.args(
ocl::KernelArg::PtrReadOnly(A), offA,
ocl::KernelArg::PtrReadOnly(B), offB,
isBetaZero ? ocl::KernelArg::PtrWriteOnly(C) : ocl::KernelArg::PtrReadWrite(C), offC,
M, N, K,
(float)alpha, (float)beta,
start_index
);
ret &= oclk_gemm_float.run(2, global, local, false);
} }
} else { }
else
{
// _NT_
//C.reshape(1,1).setTo(0xfe00 /*FP16 NAN*/); // stable one-line reproducer for https://github.com/opencv/opencv/issues/18937
//C.reshape(1,1).setTo(0); // non-optimal fixup (and not accurate)
ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_buffer_oclsrc, opts);
oclk_gemm_float.args(
ocl::KernelArg::PtrReadOnly(A), offA,
ocl::KernelArg::PtrReadOnly(B), offB,
isBetaZero ? ocl::KernelArg::PtrWriteOnly(C) : ocl::KernelArg::PtrReadWrite(C), offC,
M, N, K,
(float)alpha, (float)beta
);
ret = oclk_gemm_float.run(2, global, local, false); ret = oclk_gemm_float.run(2, global, local, false);
} }
return ret; return ret;

@ -588,16 +588,16 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
fused_eltwise_ = false; fused_eltwise_ = false;
} }
if (use_half_ && bias_half.empty() && !bias.empty()) if (use_half_ && !bias.empty())
convertFp16(bias, bias_half); CV_CheckTypeEQ(bias.type(), CV_16SC1, "");
if (use_half_ && weights_half.empty()) if (use_half_)
convertFp16(weight, weights_half); CV_CheckTypeEQ(weight.type(), CV_16SC1, "");
prepareKernel(bottom, top, weight, (use_half_) ? bias_half : bias, numImages); prepareKernel(bottom, top, weight, bias, numImages);
if (bestKernelConfig.empty()) if (bestKernelConfig.empty())
return false; return false;
return convolve(bottom, top, weight, (use_half_) ? bias_half : bias, numImages, bestKernelConfig); return convolve(bottom, top, weight, bias, numImages, bestKernelConfig);
} }
template<typename Dtype> template<typename Dtype>
@ -744,29 +744,26 @@ bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
kernel_h_ * (int)alignSize(kernel_w_, 2), kernel_h_ * (int)alignSize(kernel_w_, 2),
(use_half_) ? CV_16SC1 : CV_32FC1); (use_half_) ? CV_16SC1 : CV_32FC1);
UMat swizzled_weights_tmp;
if (use_half_)
swizzled_weights_tmp.create(shape(swizzled_weights_umat), CV_32F);
if (!interleave) { if (!interleave) {
cl_uint argIdx = 0;
int32_t channels = channels_ / group_; int32_t channels = channels_ / group_;
ocl::Kernel oclk_copy_weight(CL_KERNEL_SELECT("copyWeightsSwizzled"), ocl::Kernel oclk_copy_weight(
cv::ocl::dnn::conv_spatial_helper_oclsrc); use_half_ ? "copyWeightsSwizzled_half" : "copyWeightsSwizzled_float",
cv::ocl::dnn::conv_spatial_helper_oclsrc,
use_half_ ? "-DHALF_SUPPORT=1 -DDtype=half" : "-DDtype=float"
);
if (oclk_copy_weight.empty()) if (oclk_copy_weight.empty())
return false; return false;
oclk_copy_weight.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight)); oclk_copy_weight.args(
if (use_half_) ocl::KernelArg::PtrReadOnly(weight),
oclk_copy_weight.set(argIdx++, ocl::KernelArg::PtrWriteOnly(swizzled_weights_tmp)); ocl::KernelArg::PtrWriteOnly(swizzled_weights_umat),
else kernel_w_,
oclk_copy_weight.set(argIdx++, ocl::KernelArg::PtrWriteOnly(swizzled_weights_umat)); kernel_h_,
oclk_copy_weight.set(argIdx++, kernel_w_); channels,
oclk_copy_weight.set(argIdx++, kernel_h_); num_output_,
oclk_copy_weight.set(argIdx++, channels); swizzled_factor
oclk_copy_weight.set(argIdx++, num_output_); );
oclk_copy_weight.set(argIdx++, swizzled_factor);
size_t global_work_size_copy[3] = { size_t global_work_size_copy[3] = {
(size_t) (alignSize(num_output_, swizzled_factor) * channels * kernel_w_ * kernel_h_), 1, 1 }; (size_t) (alignSize(num_output_, swizzled_factor) * channels * kernel_w_ * kernel_h_), 1, 1 };
@ -778,13 +775,24 @@ bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
} }
} else { } else {
// assumption: kernel dimension is 2 // assumption: kernel dimension is 2
Mat weightMat = weight.getMat(ACCESS_READ); Mat weightMat;
Dtype* cpu_weight = (Dtype *)weightMat.ptr<float>();
Mat swizzledWeightMat; Mat swizzledWeightMat;
UMat weight_tmp; // FP32 in half mode, TODO implement FP16 repack
if (use_half_) if (use_half_)
swizzledWeightMat = swizzled_weights_tmp.getMat(ACCESS_WRITE); {
CV_CheckTypeEQ(weight.type(), CV_16SC1, "");
convertFp16(weight, weight_tmp);
weightMat = weight_tmp.getMat(ACCESS_READ);
swizzledWeightMat.create(shape(swizzled_weights_umat), CV_32F);
}
else else
{
weightMat = weight.getMat(ACCESS_READ);
swizzledWeightMat = swizzled_weights_umat.getMat(ACCESS_WRITE); swizzledWeightMat = swizzled_weights_umat.getMat(ACCESS_WRITE);
}
CV_CheckTypeEQ(weightMat.type(), CV_32FC1, "");
Dtype* cpu_weight = (Dtype *)weightMat.ptr<float>();
Dtype* cpu_swizzled_weight = (Dtype *)swizzledWeightMat.ptr<float>(); Dtype* cpu_swizzled_weight = (Dtype *)swizzledWeightMat.ptr<float>();
int interleavedRows = (kernel_w_ / 2) * 2; int interleavedRows = (kernel_w_ / 2) * 2;
@ -792,26 +800,28 @@ bool OCL4DNNConvSpatial<Dtype>::swizzleWeight(const UMat &weight,
int blockWidth = swizzled_factor; // should equal to simd size. int blockWidth = swizzled_factor; // should equal to simd size.
int rowAlignment = 32; int rowAlignment = 32;
size_t interleaved_filter_size = M_ * kernel_w_ * kernel_h_ * channels_ * sizeof(Dtype); size_t interleaved_filter_size = M_ * kernel_w_ * kernel_h_ * channels_ * sizeof(Dtype);
Dtype * tmpSwizzledWeight = reinterpret_cast<Dtype*>(malloc(interleaved_filter_size)); cv::AutoBuffer<Dtype, 0> tmpSwizzledWeight(interleaved_filter_size);
CHECK_EQ(tmpSwizzledWeight != NULL, true) << "Failed to allocate temporary swizzled weight";
for (int od = 0; od < M_; od++) for (int od = 0; od < M_; od++)
for (int id = 0; id < channels_; id++) for (int id = 0; id < channels_; id++)
for (int r = 0; r < kernel_h_; r++) for (int r = 0; r < kernel_h_; r++)
for (int c = 0; c < kernel_w_; c++) for (int c = 0; c < kernel_w_; c++)
tmpSwizzledWeight[((id * kernel_h_ + r)* kernel_w_ + c) * M_ + od] = tmpSwizzledWeight[((id * kernel_h_ + r)* kernel_w_ + c) * M_ + od] =
cpu_weight[((od * channels_ + id) * kernel_h_ + r)*kernel_w_+c]; cpu_weight[((od * channels_ + id) * kernel_h_ + r)*kernel_w_+c];
interleaveMatrix(cpu_swizzled_weight, interleaveMatrix(cpu_swizzled_weight,
tmpSwizzledWeight, tmpSwizzledWeight.data(),
kernel_w_ * kernel_h_ * channels_, M_, kernel_w_ * kernel_h_ * channels_, M_,
interleavedRows, interleavedRows,
nonInterleavedRows, nonInterleavedRows,
blockWidth, blockWidth,
rowAlignment); rowAlignment);
free(tmpSwizzledWeight);
}
if (use_half_) // unmap OpenCL buffers
convertFp16(swizzled_weights_tmp, swizzled_weights_umat); weightMat.release();
if (use_half_)
convertFp16(swizzledWeightMat, swizzled_weights_umat);
}
return true; return true;
} }
@ -1104,10 +1114,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
cl_uint argIdx = 0; cl_uint argIdx = 0;
setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx); setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx);
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom)); kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
if (use_half_) kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weights_half));
else
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
if (bias_term_) if (bias_term_)
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias)); kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias));
kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top)); kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
@ -1148,10 +1155,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx); setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx);
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom)); kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
kernel.set(argIdx++, image_offset); kernel.set(argIdx++, image_offset);
if (use_half_) kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weights_half));
else
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight));
kernel.set(argIdx++, kernel_offset); kernel.set(argIdx++, kernel_offset);
if (bias_term_) if (bias_term_)
kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias)); kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias));
@ -1956,7 +1960,7 @@ void OCL4DNNConvSpatial<Dtype>::prepareKernel(const UMat &bottom, UMat &top,
UMat benchData(1, numImages * top_dim_, (use_half_) ? CV_16SC1 : CV_32FC1); UMat benchData(1, numImages * top_dim_, (use_half_) ? CV_16SC1 : CV_32FC1);
calculateBenchmark(bottom, benchData, (use_half_) ? weights_half : weight, bias, numImages); calculateBenchmark(bottom, benchData, weight, bias, numImages);
if (run_auto_tuning_ || force_auto_tuning_) if (run_auto_tuning_ || force_auto_tuning_)
{ {

@ -28,10 +28,11 @@
#define INF_ENGINE_RELEASE_2020_3 2020030000 #define INF_ENGINE_RELEASE_2020_3 2020030000
#define INF_ENGINE_RELEASE_2020_4 2020040000 #define INF_ENGINE_RELEASE_2020_4 2020040000
#define INF_ENGINE_RELEASE_2021_1 2021010000 #define INF_ENGINE_RELEASE_2021_1 2021010000
#define INF_ENGINE_RELEASE_2021_2 2021020000
#ifndef INF_ENGINE_RELEASE #ifndef INF_ENGINE_RELEASE
#warning("IE version have not been provided via command-line. Using 2021.1 by default") #warning("IE version have not been provided via command-line. Using 2021.2 by default")
#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2021_1 #define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2021_2
#endif #endif
#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000)) #define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))

@ -39,9 +39,14 @@
// //
//M*/ //M*/
#ifdef HALF_SUPPORT
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16:enable
#endif
#endif
#define CONCAT(A,B) A##_##B #define CONCAT(A,B) A##_##B
#define TEMPLATE(name,type) CONCAT(name,type) #define TEMPLATE(name,type) CONCAT(name,type)
#define Dtype float
__kernel void TEMPLATE(copyWeightsSwizzled, Dtype) __kernel void TEMPLATE(copyWeightsSwizzled, Dtype)
(__global Dtype* weightIn, (__global Dtype* weightIn,

@ -90,6 +90,12 @@
#pragma OPENCL EXTENSION cl_intel_subgroups : enable #pragma OPENCL EXTENSION cl_intel_subgroups : enable
#endif #endif
#ifdef ZERO_BETA
#define BETA_ZERO_CHECK(b0, v) (b0)
#else
#define BETA_ZERO_CHECK(b0, v) (v)
#endif
#define VEC_SIZE 4 #define VEC_SIZE 4
#define LWG_HEIGHT 4 #define LWG_HEIGHT 4
#define TILE_M 8 #define TILE_M 8
@ -143,14 +149,14 @@ __kernel void TEMPLATE(gemm_buffer_NN, Dtype)(
int row6 = mad24(global_y, TILE_M, 6) < M ? 6 : border; int row6 = mad24(global_y, TILE_M, 6) < M ? 6 : border;
int row7 = mad24(global_y, TILE_M, 7) < M ? 7 : border; int row7 = mad24(global_y, TILE_M, 7) < M ? 7 : border;
Dtype4 dot00 = (start_index != 0) ? vload4(0, dst_write0) : beta * vload4(0, dst_write0); Dtype4 dot00 = (start_index != 0) ? vload4(0, dst_write0) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0));
Dtype4 dot01 = (start_index != 0) ? vload4(0, dst_write0 + 1 * N) : beta * vload4(0, dst_write0 + 1 * N); Dtype4 dot01 = (start_index != 0) ? vload4(0, dst_write0 + 1 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 1 * N));
Dtype4 dot02 = (start_index != 0) ? vload4(0, dst_write0 + 2 * N) : beta * vload4(0, dst_write0 + 2 * N); Dtype4 dot02 = (start_index != 0) ? vload4(0, dst_write0 + 2 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 2 * N));
Dtype4 dot03 = (start_index != 0) ? vload4(0, dst_write0 + 3 * N) : beta * vload4(0, dst_write0 + 3 * N); Dtype4 dot03 = (start_index != 0) ? vload4(0, dst_write0 + 3 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 3 * N));
Dtype4 dot04 = (start_index != 0) ? vload4(0, dst_write0 + 4 * N) : beta * vload4(0, dst_write0 + 4 * N); Dtype4 dot04 = (start_index != 0) ? vload4(0, dst_write0 + 4 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 4 * N));
Dtype4 dot05 = (start_index != 0) ? vload4(0, dst_write0 + 5 * N) : beta * vload4(0, dst_write0 + 5 * N); Dtype4 dot05 = (start_index != 0) ? vload4(0, dst_write0 + 5 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 5 * N));
Dtype4 dot06 = (start_index != 0) ? vload4(0, dst_write0 + 6 * N) : beta * vload4(0, dst_write0 + 6 * N); Dtype4 dot06 = (start_index != 0) ? vload4(0, dst_write0 + 6 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 6 * N));
Dtype4 dot07 = (start_index != 0) ? vload4(0, dst_write0 + 7 * N) : beta * vload4(0, dst_write0 + 7 * N); Dtype4 dot07 = (start_index != 0) ? vload4(0, dst_write0 + 7 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 7 * N));
int end_index = min(start_index + 256, K); int end_index = min(start_index + 256, K);
int w = start_index; int w = start_index;
@ -579,7 +585,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)(
output = (local_x == 5) ? _dot.s5 : output; \ output = (local_x == 5) ? _dot.s5 : output; \
output = (local_x == 6) ? _dot.s6 : output; \ output = (local_x == 6) ? _dot.s6 : output; \
output = (local_x == 7) ? _dot.s7 : output; \ output = (local_x == 7) ? _dot.s7 : output; \
dst_write0[0] = mad(output, alpha, beta * dst_write0[0]); \ dst_write0[0] = BETA_ZERO_CHECK(alpha * output, mad(output, alpha, beta * dst_write0[0])); \
dst_write0 += N; dst_write0 += N;
if(global_x < N && global_y * 8 < M) { if(global_x < N && global_y * 8 < M) {
@ -765,7 +771,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)(
output = (local_x == 5) ? _dot.s5 : output; \ output = (local_x == 5) ? _dot.s5 : output; \
output = (local_x == 6) ? _dot.s6 : output; \ output = (local_x == 6) ? _dot.s6 : output; \
output = (local_x == 7) ? _dot.s7 : output; \ output = (local_x == 7) ? _dot.s7 : output; \
dst_write0[0] = mad(output, alpha, beta * dst_write0[0]); \ dst_write0[0] = BETA_ZERO_CHECK(alpha * output, mad(output, alpha, beta * dst_write0[0])); \
dst_write0 += N; dst_write0 += N;
if(global_x < N && global_y * 8 < M) { if(global_x < N && global_y * 8 < M) {
@ -819,8 +825,9 @@ void TEMPLATE(gemm_buffer_NT_M_2_edgerows,Dtype)(
const Dtype4 b1 = {srca_read1[i*4], srca_read1[(i*4+1)], srca_read1[(i*4+2)], srca_read1[(i*4+3)]}; const Dtype4 b1 = {srca_read1[i*4], srca_read1[(i*4+1)], srca_read1[(i*4+2)], srca_read1[(i*4+3)]};
#pragma unroll #pragma unroll
for(int j = 0; j < rows; ++j) { for(int j = 0; j < rows; ++j) {
dot0[j] += b0 * vload4(i, srcb_read + j * K); Dtype4 a = vload4(i, srcb_read + j * K);
dot1[j] += b1 * vload4(i, srcb_read + j * K); dot0[j] += b0 * a;
dot1[j] += b1 * a;
} }
i += get_local_size(0); i += get_local_size(0);
@ -859,11 +866,19 @@ void TEMPLATE(gemm_buffer_NT_M_2_edgerows,Dtype)(
} }
} }
barrier(CLK_LOCAL_MEM_FENCE);
if(lid == 0) { if(lid == 0) {
#pragma unroll #pragma unroll
for(int j = 0; j < rows; ++j) { for(int j = 0; j < rows; ++j) {
dstc0[(x_gid * 4 + j)] = alpha * work_each0[j] + beta * dstc0[(x_gid * 4 + j)]; #ifdef ZERO_BETA
dstc1[(x_gid * 4 + j)] = alpha * work_each1[j] + beta * dstc1[(x_gid * 4 + j)]; Dtype a0 = alpha * work_each0[j];
Dtype a1 = alpha * work_each1[j];
#else
Dtype a0 = alpha * work_each0[j] + beta * dstc0[(x_gid * 4 + j)];
Dtype a1 = alpha * work_each1[j] + beta * dstc1[(x_gid * 4 + j)];
#endif
dstc0[(x_gid * 4 + j)] = a0;
dstc1[(x_gid * 4 + j)] = a1;
} }
} }
} }
@ -952,9 +967,15 @@ __kernel void TEMPLATE(gemm_buffer_NT_M_2,Dtype)(
} }
} }
if(lid == 0) { if(lid == 0)
{
#ifdef ZERO_BETA
dstc0[x_gid] = alpha * work0[0];
dstc1[x_gid] = alpha * work1[0];
#else
dstc0[x_gid] = alpha * work0[0] + beta * dstc0[x_gid]; dstc0[x_gid] = alpha * work0[0] + beta * dstc0[x_gid];
dstc1[x_gid] = alpha * work1[0] + beta * dstc1[x_gid]; dstc1[x_gid] = alpha * work1[0] + beta * dstc1[x_gid];
#endif
} }
} }
} }
@ -1058,10 +1079,17 @@ void TEMPLATE(gemm_buffer_NT_M_4_edgerows,Dtype)(
if(lid == 0) { if(lid == 0) {
#pragma unroll #pragma unroll
for(int j = 0; j < rows; ++j) { for(int j = 0; j < rows; ++j) {
#ifdef ZERO_BETA
dstc0[(x_gid * 4 + j)] = alpha * work_each0[j];
dstc1[(x_gid * 4 + j)] = alpha * work_each1[j];
dstc2[(x_gid * 4 + j)] = alpha * work_each2[j];
dstc3[(x_gid * 4 + j)] = alpha * work_each3[j];
#else
dstc0[(x_gid * 4 + j)] = alpha * work_each0[j] + beta * dstc0[(x_gid * 4 + j)]; dstc0[(x_gid * 4 + j)] = alpha * work_each0[j] + beta * dstc0[(x_gid * 4 + j)];
dstc1[(x_gid * 4 + j)] = alpha * work_each1[j] + beta * dstc1[(x_gid * 4 + j)]; dstc1[(x_gid * 4 + j)] = alpha * work_each1[j] + beta * dstc1[(x_gid * 4 + j)];
dstc2[(x_gid * 4 + j)] = alpha * work_each2[j] + beta * dstc2[(x_gid * 4 + j)]; dstc2[(x_gid * 4 + j)] = alpha * work_each2[j] + beta * dstc2[(x_gid * 4 + j)];
dstc3[(x_gid * 4 + j)] = alpha * work_each3[j] + beta * dstc3[(x_gid * 4 + j)]; dstc3[(x_gid * 4 + j)] = alpha * work_each3[j] + beta * dstc3[(x_gid * 4 + j)];
#endif
} }
} }
} }
@ -1179,10 +1207,17 @@ __kernel void TEMPLATE(gemm_buffer_NT_M_4,Dtype)(
} }
if(lid == 0) { if(lid == 0) {
#ifdef ZERO_BETA
dstc0[x_gid] = alpha * work0[0];
dstc1[x_gid] = alpha * work1[0];
dstc2[x_gid] = alpha * work2[0];
dstc3[x_gid] = alpha * work3[0];
#else
dstc0[x_gid] = alpha * work0[0] + beta * dstc0[x_gid]; dstc0[x_gid] = alpha * work0[0] + beta * dstc0[x_gid];
dstc1[x_gid] = alpha * work1[0] + beta * dstc1[x_gid]; dstc1[x_gid] = alpha * work1[0] + beta * dstc1[x_gid];
dstc2[x_gid] = alpha * work2[0] + beta * dstc2[x_gid]; dstc2[x_gid] = alpha * work2[0] + beta * dstc2[x_gid];
dstc3[x_gid] = alpha * work3[0] + beta * dstc3[x_gid]; dstc3[x_gid] = alpha * work3[0] + beta * dstc3[x_gid];
#endif
} }
} }
} }
@ -1320,6 +1355,16 @@ __kernel void TEMPLATE(gemm_buffer_NT_M_8,Dtype)(
} }
if(lid == 0) { if(lid == 0) {
#ifdef ZERO_BETA
dstc0[x_gid] = alpha * work0[0];
dstc1[x_gid] = alpha * work1[0];
dstc2[x_gid] = alpha * work2[0];
dstc3[x_gid] = alpha * work3[0];
dstc4[x_gid] = alpha * work4[0];
dstc5[x_gid] = alpha * work5[0];
dstc6[x_gid] = alpha * work6[0];
dstc7[x_gid] = alpha * work7[0];
#else
dstc0[x_gid] = alpha * work0[0] + beta * dstc0[x_gid]; dstc0[x_gid] = alpha * work0[0] + beta * dstc0[x_gid];
dstc1[x_gid] = alpha * work1[0] + beta * dstc1[x_gid]; dstc1[x_gid] = alpha * work1[0] + beta * dstc1[x_gid];
dstc2[x_gid] = alpha * work2[0] + beta * dstc2[x_gid]; dstc2[x_gid] = alpha * work2[0] + beta * dstc2[x_gid];
@ -1328,6 +1373,7 @@ __kernel void TEMPLATE(gemm_buffer_NT_M_8,Dtype)(
dstc5[x_gid] = alpha * work5[0] + beta * dstc5[x_gid]; dstc5[x_gid] = alpha * work5[0] + beta * dstc5[x_gid];
dstc6[x_gid] = alpha * work6[0] + beta * dstc6[x_gid]; dstc6[x_gid] = alpha * work6[0] + beta * dstc6[x_gid];
dstc7[x_gid] = alpha * work7[0] + beta * dstc7[x_gid]; dstc7[x_gid] = alpha * work7[0] + beta * dstc7[x_gid];
#endif
} }
} }
#undef SLM_SIZE #undef SLM_SIZE

@ -389,7 +389,7 @@ Pin parsePin(const std::string &name)
{ {
Pin pin(name); Pin pin(name);
size_t delimiter_pos = name.find_first_of(":"); size_t delimiter_pos = name.find_first_of(':');
if (delimiter_pos != std::string::npos) if (delimiter_pos != std::string::npos)
{ {
pin.name = name.substr(0, delimiter_pos); pin.name = name.substr(0, delimiter_pos);

@ -656,7 +656,7 @@ TEST_P(Test_Darknet_nets, YOLOv4_tiny)
target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB
); );
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000) // nGraph compilation failure #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000) // nGraph compilation failure
if (target == DNN_TARGET_MYRIAD) if (target == DNN_TARGET_MYRIAD)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif #endif

@ -756,9 +756,6 @@ TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias)
TEST_P(Test_ONNX_layers, GatherMultiOutput) TEST_P(Test_ONNX_layers, GatherMultiOutput)
{ {
if (cvtest::skipUnstableTests && backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("Skip unstable test: https://github.com/opencv/opencv/issues/18937");
#if defined(INF_ENGINE_RELEASE) #if defined(INF_ENGINE_RELEASE)
if (target == DNN_TARGET_MYRIAD) if (target == DNN_TARGET_MYRIAD)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE);

@ -162,7 +162,9 @@ namespace
// calc histogram // calc histogram
int tileHist[histSize] = {0, }; cv::AutoBuffer<int> _tileHist(histSize);
int* tileHist = _tileHist.data();
std::fill(tileHist, tileHist + histSize, 0);
int height = tileROI.height; int height = tileROI.height;
const size_t sstep = src_.step / sizeof(T); const size_t sstep = src_.step / sizeof(T);

@ -2167,7 +2167,8 @@ public:
virtual void operator() (const Range& range) const CV_OVERRIDE virtual void operator() (const Range& range) const CV_OVERRIDE
{ {
const int BLOCK_SZ = 64; const int BLOCK_SZ = 64;
short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ]; AutoBuffer<short, 0> __XY(BLOCK_SZ * BLOCK_SZ * 2), __A(BLOCK_SZ * BLOCK_SZ);
short *XY = __XY.data(), *A = __A.data();
const int AB_BITS = MAX(10, (int)INTER_BITS); const int AB_BITS = MAX(10, (int)INTER_BITS);
const int AB_SCALE = 1 << AB_BITS; const int AB_SCALE = 1 << AB_BITS;
int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, x1, y1; int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, x1, y1;

@ -750,9 +750,9 @@ pyrDown_( const Mat& _src, Mat& _dst, int borderType )
Size ssize = _src.size(), dsize = _dst.size(); Size ssize = _src.size(), dsize = _dst.size();
int cn = _src.channels(); int cn = _src.channels();
int tabL[CV_CN_MAX*(PD_SZ+2)], tabR[CV_CN_MAX*(PD_SZ+2)]; AutoBuffer<int> _tabM(dsize.width * cn), _tabL(cn * (PD_SZ + 2)),
AutoBuffer<int> _tabM(dsize.width*cn); _tabR(cn * (PD_SZ + 2));
int* tabM = _tabM.data(); int *tabM = _tabM.data(), *tabL = _tabL.data(), *tabR = _tabR.data();
CV_Assert( ssize.width > 0 && ssize.height > 0 && CV_Assert( ssize.width > 0 && ssize.height > 0 &&
std::abs(dsize.width*2 - ssize.width) <= 2 && std::abs(dsize.width*2 - ssize.width) <= 2 &&

@ -2,9 +2,24 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory // It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html. // of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp" #include "test_precomp.hpp"
#include <opencv2/core/utils/logger.hpp>
#if defined(HAVE_HPX) #if defined(HAVE_HPX)
#include <hpx/hpx_main.hpp> #include <hpx/hpx_main.hpp>
#endif #endif
CV_TEST_MAIN("highgui") static
void initTests()
{
#ifndef WINRT // missing getenv
const std::vector<cv::VideoCaptureAPIs> backends = cv::videoio_registry::getStreamBackends();
const char* requireFFmpeg = getenv("OPENCV_TEST_VIDEOIO_BACKEND_REQUIRE_FFMPEG");
if (requireFFmpeg && !isBackendAvailable(cv::CAP_FFMPEG, backends))
{
CV_LOG_FATAL(NULL, "OpenCV-Test: required FFmpeg backend is not available (broken plugin?). STOP.");
exit(1);
}
#endif
}
CV_TEST_MAIN("highgui", initTests())

Loading…
Cancel
Save