Merge pull request #3453 from mshabunin:doxygen-others

10 years ago · f61db41d25
parent 46858c4328 03e213ccae
commit f61db41d25
152 changed files with 11232 additions and 1262 deletions
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@ -146,46 +146,76 @@ if(BUILD_DOCS AND HAVE_SPHINX)
 endif()

 # ========= Doxygen docs =========
+
+macro(make_reference result modules_list black_list)
+    set(_res)
+    foreach(m ${${modules_list}})
+        list(FIND ${black_list} ${m} _pos)
+        if(${_pos} EQUAL -1)
+            set(_res "${_res} @ref ${m} | ${m} \n")
+        endif()
+    endforeach()
+    set(${result} ${_res})
+endmacro()
+
 if(BUILD_DOCS AND HAVE_DOXYGEN)
-  # documented modules list
-  set(candidates)
-  list(APPEND candidates ${BASE_MODULES} ${EXTRA_MODULES})
-  # blacklisted modules
-  ocv_list_filterout(candidates "^ts$")
+  # not documented modules list
+  list(APPEND blacklist "ts" "java" "python2" "python3" "world")

  # gathering headers
-  set(all_headers) # files and dirs to process
-  set(all_images) # image search paths
-  set(reflist) # modules reference
-  foreach(m ${candidates})
-    set(reflist "${reflist} \n- @subpage ${m}")
-    set(all_headers ${all_headers} "${OPENCV_MODULE_opencv_${m}_HEADERS}")
-    set(docs_dir "${OPENCV_MODULE_opencv_${m}_LOCATION}/doc")
-    if(EXISTS ${docs_dir})
-      set(all_images ${all_images} ${docs_dir})
-      set(all_headers ${all_headers} ${docs_dir})
+  set(paths_include)
+  set(paths_doc)
+  set(paths_bib)
+  set(deps)
+  foreach(m ${BASE_MODULES} ${EXTRA_MODULES})
+    list(FIND blacklist ${m} _pos)
+    if(${_pos} EQUAL -1)
+      # include folder
+      set(header_dir "${OPENCV_MODULE_opencv_${m}_LOCATION}/include")
+      if(EXISTS "${header_dir}")
+        list(APPEND paths_include "${header_dir}")
+        list(APPEND deps ${header_dir})
+      endif()
+      # doc folder
+      set(docs_dir "${OPENCV_MODULE_opencv_${m}_LOCATION}/doc")
+      if(EXISTS "${docs_dir}")
+        list(APPEND paths_doc "${docs_dir}")
+        list(APPEND deps ${docs_dir})
+      endif()
+      # BiBTeX file
+      set(bib_file "${docs_dir}/${m}.bib")
+      if(EXISTS "${bib_file}")
+        set(paths_bib "${paths_bib} ${bib_file}")
+        list(APPEND deps ${bib_file})
+      endif()
    endif()
  endforeach()

+
  # additional config
  set(doxyfile "${CMAKE_CURRENT_BINARY_DIR}/Doxyfile")
  set(rootfile "${CMAKE_CURRENT_BINARY_DIR}/root.markdown")
-  set(all_headers ${all_headers} ${rootfile})
-  string(REGEX REPLACE ";" " \\\\\\n" CMAKE_DOXYGEN_INPUT_LIST "${all_headers}")
-  string(REGEX REPLACE ";" " \\\\\\n" CMAKE_DOXYGEN_IMAGE_PATH "${all_images}")
+  set(bibfile "${CMAKE_CURRENT_SOURCE_DIR}/opencv.bib")
+  string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INPUT_LIST "${rootfile} ; ${paths_include} ; ${paths_doc}")
+  string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_IMAGE_PATH "${paths_doc}")
+  string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_EXAMPLE_PATH  "${CMAKE_SOURCE_DIR}/samples/cpp ; ${paths_doc}")
  set(CMAKE_DOXYGEN_LAYOUT "${CMAKE_CURRENT_SOURCE_DIR}/DoxygenLayout.xml")
  set(CMAKE_DOXYGEN_OUTPUT_PATH "doxygen")
-  set(CMAKE_DOXYGEN_MODULES_REFERENCE "${reflist}")
-  set(CMAKE_DOXYGEN_EXAMPLE_PATH "${CMAKE_SOURCE_DIR}/samples/cpp")
+  set(CMAKE_EXTRA_BIB_FILES "${bibfile} ${paths_bib}")
+
+  # generate references
+  make_reference(CMAKE_DOXYGEN_MAIN_REFERENCE BASE_MODULES blacklist)
+  make_reference(CMAKE_DOXYGEN_EXTRA_REFERENCE EXTRA_MODULES blacklist)

  # writing file
  configure_file(Doxyfile.in ${doxyfile} @ONLY)
  configure_file(root.markdown.in ${rootfile} @ONLY)
  configure_file(mymath.sty "${CMAKE_DOXYGEN_OUTPUT_PATH}/html/mymath.sty" @ONLY)
+  configure_file(mymath.sty "${CMAKE_DOXYGEN_OUTPUT_PATH}/latex/mymath.sty" @ONLY)

  add_custom_target(doxygen
    COMMAND ${DOXYGEN_BUILD} ${doxyfile}
-    DEPENDS ${doxyfile} ${all_headers} ${all_images})
+DEPENDS ${doxyfile} ${rootfile} ${bibfile} ${deps})
 endif()

 if(HAVE_DOC_GENERATOR)
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@ -85,7 +85,7 @@ SHOW_FILES             = YES
 SHOW_NAMESPACES        = YES
 FILE_VERSION_FILTER    =
 LAYOUT_FILE            = @CMAKE_DOXYGEN_LAYOUT@
-CITE_BIB_FILES         = @CMAKE_CURRENT_SOURCE_DIR@/opencv.bib
+CITE_BIB_FILES         = @CMAKE_EXTRA_BIB_FILES@
 QUIET                  = YES
 WARNINGS               = YES
 WARN_IF_UNDOCUMENTED   = YES
@ -99,7 +99,7 @@ FILE_PATTERNS          =
 RECURSIVE              = YES
 EXCLUDE                =
 EXCLUDE_SYMLINKS       = NO
-EXCLUDE_PATTERNS       =
+EXCLUDE_PATTERNS       = *.inl.hpp *.impl.hpp *_detail.hpp */cudev/**/detail/*.hpp
 EXCLUDE_SYMBOLS        = cv::DataType<*> int
 EXAMPLE_PATH           = @CMAKE_DOXYGEN_EXAMPLE_PATH@
 EXAMPLE_PATTERNS       = *
@ -119,7 +119,7 @@ REFERENCES_LINK_SOURCE = YES
 SOURCE_TOOLTIPS        = YES
 USE_HTAGS              = NO
 VERBATIM_HEADERS       = NO
-ALPHABETICAL_INDEX     = NO
+ALPHABETICAL_INDEX     = YES
 COLS_IN_ALPHA_INDEX    = 5
 IGNORE_PREFIX          =
 GENERATE_HTML          = YES
@ -222,6 +222,7 @@ INCLUDE_FILE_PATTERNS  =
 PREDEFINED             = __cplusplus=1 \
                         HAVE_IPP_A=1 \
                         CVAPI(x)=x \
+                         CV_DOXYGEN= \
                         CV_EXPORTS= \
                         CV_EXPORTS_W= \
                         CV_EXPORTS_W_SIMPLE= \
@ -241,7 +242,8 @@ PREDEFINED             = __cplusplus=1 \
                         CV_INLINE= \
                         CV_NORETURN= \
                         CV_DEFAULT(x)=" = x" \
-                         CV_NEON=1
+                         CV_NEON=1 \
+                         FLANN_DEPRECATED=
 EXPAND_AS_DEFINED      =
 SKIP_FUNCTION_MACROS   = YES
 TAGFILES               =
--- a/doc/disabled_doc_warnings.txt
+++ b/doc/disabled_doc_warnings.txt
@ -0,0 +1,2 @@
+# doxygen citelist build workaround
+citelist : .*Unexpected new line character.*
--- a/doc/mymath.js
+++ b/doc/mymath.js
@ -8,7 +8,8 @@ MathJax.Hub.Config(
          forkthree: ["\\left\\{ \\begin{array}{l l} #1 & \\mbox{#2}\\\\ #3 & \\mbox{#4}\\\\ #5 & \\mbox{#6}\\\\ \\end{array} \\right.", 6],
          vecthree: ["\\begin{bmatrix} #1\\\\ #2\\\\ #3 \\end{bmatrix}", 3],
          vecthreethree: ["\\begin{bmatrix} #1 & #2 & #3\\\\ #4 & #5 & #6\\\\ #7 & #8 & #9 \\end{bmatrix}", 9],
-          hdotsfor: ["\\dots", 1]
+          hdotsfor: ["\\dots", 1],
+          mathbbm: ["\\mathbb{#1}", 1]
      }
  }
 }
--- a/doc/mymath.sty
+++ b/doc/mymath.sty
@ -3,6 +3,7 @@
 \usepackage{euler}
 \usepackage{amssymb}
 \usepackage{amsmath}
+\usepackage{bbm}

 \newcommand{\matTT}[9]{
 \[
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
--- a/doc/root.markdown.in
+++ b/doc/root.markdown.in
@ -3,9 +3,14 @@ OpenCV modules {#mainpage}

@subpage intro

-Module name   | Folder
------------- | -------------
-@ref core     | core
-@ref imgproc  | imgproc
+### Main modules

-<!-- @CMAKE_DOXYGEN_MODULES_REFERENCE@ -->
+ Module name   | Folder
+-------------- | -------------
+@CMAKE_DOXYGEN_MAIN_REFERENCE@
+
+### Extra modules
+
+ Module name   | Folder
+-------------- | -------------
+@CMAKE_DOXYGEN_EXTRA_REFERENCE@
--- a/modules/androidcamera/include/camera_activity.hpp
+++ b/modules/androidcamera/include/camera_activity.hpp
@ -3,6 +3,12 @@

 #include <camera_properties.h>

+/** @defgroup androidcamera Android Camera Support
+*/
+
+//! @addtogroup androidcamera
+//! @{
+
 class CameraActivity
 {
 public:
@ -44,4 +50,6 @@ private:
    int frameHeight;
 };

+//! @}
+
 #endif
--- a/modules/androidcamera/include/camera_properties.h
+++ b/modules/androidcamera/include/camera_properties.h
@ -1,6 +1,9 @@
 #ifndef CAMERA_PROPERTIES_H
 #define CAMERA_PROPERTIES_H

+//! @addtogroup androidcamera
+//! @{
+
 enum {
    ANDROID_CAMERA_PROPERTY_FRAMEWIDTH = 0,
    ANDROID_CAMERA_PROPERTY_FRAMEHEIGHT = 1,
@ -67,4 +70,6 @@ enum {
    ANDROID_CAMERA_FOCUS_DISTANCE_FAR_INDEX
 };

+//! @}
+
 #endif // CAMERA_PROPERTIES_H
--- a/modules/calib3d/include/opencv2/calib3d.hpp
+++ b/modules/calib3d/include/opencv2/calib3d.hpp
--- a/modules/calib3d/include/opencv2/calib3d/calib3d_c.h
+++ b/modules/calib3d/include/opencv2/calib3d/calib3d_c.h
@ -50,6 +50,10 @@
 extern "C" {
 #endif

+/** @addtogroup calib3d_c
+  @{
+  */
+
 /****************************************************************************************\
 *                      Camera Calibration, Pose Estimation and Stereo                    *
 \****************************************************************************************/
@ -371,6 +375,8 @@ CVAPI(void)  cvReprojectImageTo3D( const CvArr* disparityImage,
                                   CvArr* _3dImage, const CvMat* Q,
                                   int handleMissingValues CV_DEFAULT(0) );

+/** @} calib3d_c */
+
 #ifdef __cplusplus
 } // extern "C"

--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@ -75,6 +75,9 @@
    @defgroup core_opengl OpenGL interoperability
    @defgroup core_ipp Intel IPP Asynchronous C/C++ Converters
    @defgroup core_optim Optimization Algorithms
+    @defgroup core_directx DirectX interoperability
+    @defgroup core_eigen Eigen support
+    @defgroup core_opencl OpenCL support
@}
 */

--- a/modules/core/include/opencv2/core/cuda.hpp
+++ b/modules/core/include/opencv2/core/cuda.hpp
@ -51,13 +51,6 @@
 #include "opencv2/core.hpp"
 #include "opencv2/core/cuda_types.hpp"

-/**
-@defgroup cuda CUDA-accelerated Computer Vision
-@{
-    @defgroup cuda_struct Data structures
-@}
- */
-
 namespace cv { namespace cuda {

 //! @addtogroup cuda_struct
@ -65,8 +58,28 @@ namespace cv { namespace cuda {

 //////////////////////////////// GpuMat ///////////////////////////////

-//! Smart pointer for GPU memory with reference counting.
-//! Its interface is mostly similar with cv::Mat.
+/** @brief Base storage class for GPU memory with reference counting.
+
+Its interface matches the Mat interface with the following limitations:
+
+-   no arbitrary dimensions support (only 2D)
+-   no functions that return references to their data (because references on GPU are not valid for
+    CPU)
+-   no expression templates technique support
+
+Beware that the latter limitation may lead to overloaded matrix operators that cause memory
+allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
+passed directly to the kernel.
+
+@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
+aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
+
+@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
+on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
+release function returns error if the CUDA context has been destroyed before.
+
+@sa Mat
+ */
 class CV_EXPORTS GpuMat
 {
 public:
@ -277,11 +290,28 @@ public:
    Allocator* allocator;
 };

-//! creates continuous matrix
+/** @brief Creates a continuous matrix.
+
+@param rows Row count.
+@param cols Column count.
+@param type Type of the matrix.
+@param arr Destination matrix. This parameter changes only if it has a proper type and area (
+\f$\texttt{rows} \times \texttt{cols}\f$ ).
+
+Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
+end of each row.
+ */
 CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);

-//! ensures that size of the given matrix is not less than (rows, cols) size
-//! and matrix type is match specified one too
+/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
+
+@param rows Minimum desired number of rows.
+@param cols Minimum desired number of columns.
+@param type Desired matrix type.
+@param arr Destination matrix.
+
+The function does not reallocate memory if the matrix has proper attributes already.
+ */
 CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);

 CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat);
@ -292,10 +322,21 @@ CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCou

 //////////////////////////////// CudaMem ////////////////////////////////

-//! CudaMem is limited cv::Mat with page locked memory allocation.
-//! Page locked memory is only needed for async and faster coping to GPU.
-//! It is convertable to cv::Mat header without reference counting
-//! so you can use it with other opencv functions.
+/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
+
+Its interface is also Mat-like but with additional memory type parameters.
+
+-   **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
+    uploading/downloading data from/to GPU.
+-   **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
+    address space, if supported.
+-   **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
+    used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
+    utilization.
+
+@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
+Pinned Memory APIs* document or *CUDA C Programming Guide*.
+ */
 class CV_EXPORTS CudaMem
 {
 public:
@ -335,7 +376,13 @@ public:
    //! returns matrix header with disabled reference counting for CudaMem data.
    Mat createMatHeader() const;

-    //! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
+    /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
+    for it.
+
+    This can be done only if memory was allocated with the SHARED flag and if it is supported by the
+    hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
+    eliminates an extra copy.
+     */
    GpuMat createGpuMatHeader() const;

    // Please see cv::Mat for descriptions
@ -363,17 +410,28 @@ public:
    AllocType alloc_type;
 };

-//! page-locks the matrix m memory and maps it for the device(s)
+/** @brief Page-locks the memory of matrix and maps it for the device(s).
+
+@param m Input matrix.
+ */
 CV_EXPORTS void registerPageLocked(Mat& m);

-//! unmaps the memory of matrix m, and makes it pageable again
+/** @brief Unmaps the memory of matrix and makes it pageable again.
+
+@param m Input matrix.
+ */
 CV_EXPORTS void unregisterPageLocked(Mat& m);

 ///////////////////////////////// Stream //////////////////////////////////

-//! Encapculates Cuda Stream. Provides interface for async coping.
-//! Passed to each function that supports async kernel execution.
-//! Reference counting is enabled.
+/** @brief This class encapsulates a queue of asynchronous calls.
+
+@note Currently, you may face problems if an operation is enqueued twice with different data. Some
+functions use the constant GPU memory, and next call may update the memory before the previous one
+has been finished. But calling different operations asynchronously is safe because each operation
+has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
+also safe. :
+ */
 class CV_EXPORTS Stream
 {
    typedef void (Stream::*bool_type)() const;
@ -385,16 +443,26 @@ public:
    //! creates a new asynchronous stream
    Stream();

-    //! queries an asynchronous stream for completion status
+    /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
+    */
    bool queryIfComplete() const;

-    //! waits for stream tasks to complete
+    /** @brief Blocks the current CPU thread until all operations in the stream are complete.
+    */
    void waitForCompletion();

-    //! makes a compute stream wait on an event
+    /** @brief Makes a compute stream wait on an event.
+    */
    void waitEvent(const Event& event);

-    //! adds a callback to be called on the host after all currently enqueued items in the stream have completed
+    /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
+    completed.
+
+    @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
+    that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
+    Callbacks without a mandated order (in independent streams) execute in undefined order and may be
+    serialized.
+     */
    void enqueueHostCallback(StreamCallback callback, void* userData);

    //! return Stream object for default CUDA stream
@ -446,21 +514,41 @@ private:
    friend struct EventAccessor;
 };

+//! @} cuda_struct
+
 //////////////////////////////// Initialization & Info ////////////////////////

-//! this is the only function that do not throw exceptions if the library is compiled without CUDA
+//! @addtogroup cuda_init
+//! @{
+
+/** @brief Returns the number of installed CUDA-enabled devices.
+
+Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
+this function returns 0.
+ */
 CV_EXPORTS int getCudaEnabledDeviceCount();

-//! set device to be used for GPU executions for the calling host thread
+/** @brief Sets a device and initializes it for the current thread.
+
+@param device System index of a CUDA device starting with 0.
+
+If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
+ */
 CV_EXPORTS void setDevice(int device);

-//! returns which device is currently being used for the calling host thread
+/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
+ */
 CV_EXPORTS int getDevice();

-//! explicitly destroys and cleans up all resources associated with the current device in the current process
-//! any subsequent API call to this device will reinitialize the device
+/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
+process.
+
+Any subsequent API call to this device will reinitialize the device.
+ */
 CV_EXPORTS void resetDevice();

+/** @brief Enumeration providing CUDA computing features.
+ */
 enum FeatureSet
 {
    FEATURE_SET_COMPUTE_10 = 10,
@ -482,12 +570,27 @@ enum FeatureSet
 //! checks whether current device supports the given feature
 CV_EXPORTS bool deviceSupports(FeatureSet feature_set);

-//! information about what GPU archs this OpenCV CUDA module was compiled for
+/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
+built for.
+
+According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
+capability can always be compiled to binary code of greater or equal compute capability".
+ */
 class CV_EXPORTS TargetArchs
 {
 public:
+    /** @brief The following method checks whether the module was built with the support of the given feature:
+
+    @param feature_set Features to be checked. See :ocvcuda::FeatureSet.
+     */
    static bool builtWith(FeatureSet feature_set);

+    /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
+    code for the given architecture(s):
+
+    @param major Major compute capability version.
+    @param minor Minor compute capability version.
+     */
    static bool has(int major, int minor);
    static bool hasPtx(int major, int minor);
    static bool hasBin(int major, int minor);
@ -498,17 +601,25 @@ public:
    static bool hasEqualOrGreaterBin(int major, int minor);
 };

-//! information about the given GPU.
+/** @brief Class providing functionality for querying the specified GPU properties.
+ */
 class CV_EXPORTS DeviceInfo
 {
 public:
    //! creates DeviceInfo object for the current GPU
    DeviceInfo();

-    //! creates DeviceInfo object for the given GPU
+    /** @brief The constructors.
+
+    @param device_id System index of the CUDA device starting with 0.
+
+    Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
+    constructs an object for the current device.
+     */
    DeviceInfo(int device_id);

-    //! device number.
+    /** @brief Returns system index of the CUDA device starting with 0.
+    */
    int deviceID() const;

    //! ASCII string identifying device
@ -680,10 +791,19 @@ public:
    size_t freeMemory() const;
    size_t totalMemory() const;

-    //! checks whether device supports the given feature
+    /** @brief Provides information on CUDA feature support.
+
+    @param feature_set Features to be checked. See cuda::FeatureSet.
+
+    This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
+     */
    bool supports(FeatureSet feature_set) const;

-    //! checks whether the CUDA module can be run on the given device
+    /** @brief Checks the CUDA module and device compatibility.
+
+    This function returns true if the CUDA module can be run on the specified device. Otherwise, it
+    returns false .
+     */
    bool isCompatible() const;

 private:
@ -693,7 +813,7 @@ private:
 CV_EXPORTS void printCudaDeviceInfo(int device);
 CV_EXPORTS void printShortCudaDeviceInfo(int device);

-//! @}
+//! @} cuda_init

 }} // namespace cv { namespace cuda {

--- a/modules/core/include/opencv2/core/cuda_stream_accessor.hpp
+++ b/modules/core/include/opencv2/core/cuda_stream_accessor.hpp
@ -66,6 +66,11 @@ namespace cv
        class Stream;
        class Event;

+        /** @brief Class that enables getting cudaStream_t from cuda::Stream
+
+        because it is the only public header that depends on the CUDA Runtime API. Including it
+        brings a dependency to your code.
+         */
        struct StreamAccessor
        {
            CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
--- a/modules/core/include/opencv2/core/cuda_types.hpp
+++ b/modules/core/include/opencv2/core/cuda_types.hpp
@ -89,6 +89,11 @@ namespace cv
            size_t size;
        };

+        /** @brief Structure similar to cuda::PtrStepSz but containing only a pointer and row step.
+
+        Width and height fields are excluded due to performance reasons. The structure is intended
+        for internal use or for users who write device code.
+         */
        template <typename T> struct PtrStep : public DevPtr<T>
        {
            __CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
@ -104,6 +109,12 @@ namespace cv
            __CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
        };

+        /** @brief Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compiled code (CUDA
+        kernels).
+
+        Typically, it is used internally by OpenCV and by users who write device code. You can call
+        its members from both host and device code.
+         */
        template <typename T> struct PtrStepSz : public PtrStep<T>
        {
            __CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
--- a/modules/cuda/doc/introduction.markdown
+++ b/modules/cuda/doc/introduction.markdown
@ -0,0 +1,85 @@
+CUDA Module Introduction {#cuda_intro}
+========================
+
+General Information
+-------------------
+
+The OpenCV CUDA module is a set of classes and functions to utilize CUDA computational capabilities.
+It is implemented using NVIDIA\* CUDA\* Runtime API and supports only NVIDIA GPUs. The OpenCV CUDA
+module includes utility functions, low-level vision primitives, and high-level algorithms. The
+utility functions and low-level primitives provide a powerful infrastructure for developing fast
+vision algorithms taking advantage of CUDA whereas the high-level functionality includes some
+state-of-the-art algorithms (such as stereo correspondence, face and people detectors, and others)
+ready to be used by the application developers.
+
+The CUDA module is designed as a host-level API. This means that if you have pre-compiled OpenCV
+CUDA binaries, you are not required to have the CUDA Toolkit installed or write any extra code to
+make use of the CUDA.
+
+The OpenCV CUDA module is designed for ease of use and does not require any knowledge of CUDA.
+Though, such a knowledge will certainly be useful to handle non-trivial cases or achieve the highest
+performance. It is helpful to understand the cost of various operations, what the GPU does, what the
+preferred data formats are, and so on. The CUDA module is an effective instrument for quick
+implementation of CUDA-accelerated computer vision algorithms. However, if your algorithm involves
+many simple operations, then, for the best possible performance, you may still need to write your
+own kernels to avoid extra write and read operations on the intermediate results.
+
+To enable CUDA support, configure OpenCV using CMake with WITH\_CUDA=ON . When the flag is set and
+if CUDA is installed, the full-featured OpenCV CUDA module is built. Otherwise, the module is still
+built but at runtime all functions from the module throw Exception with CV\_GpuNotSupported error
+code, except for cuda::getCudaEnabledDeviceCount(). The latter function returns zero GPU count in
+this case. Building OpenCV without CUDA support does not perform device code compilation, so it does
+not require the CUDA Toolkit installed. Therefore, using the cuda::getCudaEnabledDeviceCount()
+function, you can implement a high-level algorithm that will detect GPU presence at runtime and
+choose an appropriate implementation (CPU or GPU) accordingly.
+
+Compilation for Different NVIDIA\* Platforms
+--------------------------------------------
+
+NVIDIA\* compiler enables generating binary code (cubin and fatbin) and intermediate code (PTX).
+Binary code often implies a specific GPU architecture and generation, so the compatibility with
+other GPUs is not guaranteed. PTX is targeted for a virtual platform that is defined entirely by the
+set of capabilities or features. Depending on the selected virtual platform, some of the
+instructions are emulated or disabled, even if the real hardware supports all the features.
+
+At the first call, the PTX code is compiled to binary code for the particular GPU using a JIT
+compiler. When the target GPU has a compute capability (CC) lower than the PTX code, JIT fails. By
+default, the OpenCV CUDA module includes:
+
+\*
+   Binaries for compute capabilities 1.3 and 2.0 (controlled by CUDA\_ARCH\_BIN in CMake)
+
+\*
+   PTX code for compute capabilities 1.1 and 1.3 (controlled by CUDA\_ARCH\_PTX in CMake)
+
+This means that for devices with CC 1.3 and 2.0 binary images are ready to run. For all newer
+platforms, the PTX code for 1.3 is JIT'ed to a binary image. For devices with CC 1.1 and 1.2, the
+PTX for 1.1 is JIT'ed. For devices with CC 1.0, no code is available and the functions throw
+Exception. For platforms where JIT compilation is performed first, the run is slow.
+
+On a GPU with CC 1.0, you can still compile the CUDA module and most of the functions will run
+flawlessly. To achieve this, add "1.0" to the list of binaries, for example,
+CUDA\_ARCH\_BIN="1.0 1.3 2.0" . The functions that cannot be run on CC 1.0 GPUs throw an exception.
+
+You can always determine at runtime whether the OpenCV GPU-built binaries (or PTX code) are
+compatible with your GPU. The function cuda::DeviceInfo::isCompatible returns the compatibility
+status (true/false).
+
+Utilizing Multiple GPUs
+-----------------------
+
+In the current version, each of the OpenCV CUDA algorithms can use only a single GPU. So, to utilize
+multiple GPUs, you have to manually distribute the work between GPUs. Switching active devie can be
+done using cuda::setDevice() function. For more details please read Cuda C Programming Guide.
+
+While developing algorithms for multiple GPUs, note a data passing overhead. For primitive functions
+and small images, it can be significant, which may eliminate all the advantages of having multiple
+GPUs. But for high-level algorithms, consider using multi-GPU acceleration. For example, the Stereo
+Block Matching algorithm has been successfully parallelized using the following algorithm:
+
+1.  Split each image of the stereo pair into two horizontal overlapping stripes.
+2.  Process each pair of stripes (from the left and right images) on a separate Fermi\* GPU.
+3.  Merge the results into a single disparity map.
+
+With this algorithm, a dual GPU gave a 180% performance increase comparing to the single Fermi GPU.
+For a source code example, see <https://github.com/Itseez/opencv/tree/master/samples/gpu/>.
--- a/modules/cuda/include/opencv2/cuda.hpp
+++ b/modules/cuda/include/opencv2/cuda.hpp
@ -49,10 +49,25 @@

 #include "opencv2/core/cuda.hpp"

+/**
+@defgroup cuda CUDA-accelerated Computer Vision
+    @ref cuda_intro "Introduction page"
+    @{
+        @defgroup cuda_init Initalization and Information
+        @defgroup cuda_struct Data Structures
+        @defgroup cuda_calib3d Camera Calibration and 3D Reconstruction
+        @defgroup cuda_objdetect Object Detection
+    @}
+
+ */
+
 namespace cv { namespace cuda {

 //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////

+//! @addtogroup cuda_objdetect
+//! @{
+
 struct CV_EXPORTS HOGConfidence
 {
   double scale;
@ -61,31 +76,92 @@ struct CV_EXPORTS HOGConfidence
   std::vector<double> part_scores[4];
 };

+/** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector.
+
+Interfaces of all methods are kept similar to the CPU HOG descriptor and detector analogues as much
+as possible.
+
+@note
+   -   An example applying the HOG descriptor for people detection can be found at
+        opencv_source_code/samples/cpp/peopledetect.cpp
+    -   A CUDA example applying the HOG descriptor for people detection can be found at
+        opencv_source_code/samples/gpu/hog.cpp
+    -   (Python) An example applying the HOG descriptor for people detection can be found at
+        opencv_source_code/samples/python2/peopledetect.py
+ */
 struct CV_EXPORTS HOGDescriptor
 {
    enum { DEFAULT_WIN_SIGMA = -1 };
    enum { DEFAULT_NLEVELS = 64 };
    enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };

+    /** @brief Creates the HOG descriptor and detector.
+
+    @param win_size Detection window size. Align to block size and block stride.
+    @param block_size Block size in pixels. Align to cell size. Only (16,16) is supported for now.
+    @param block_stride Block stride. It must be a multiple of cell size.
+    @param cell_size Cell size. Only (8, 8) is supported for now.
+    @param nbins Number of bins. Only 9 bins per cell are supported for now.
+    @param win_sigma Gaussian smoothing window parameter.
+    @param threshold_L2hys L2-Hys normalization method shrinkage.
+    @param gamma_correction Flag to specify whether the gamma correction preprocessing is required or
+    not.
+    @param nlevels Maximum number of detection window increases.
+     */
    HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
                  Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
                  int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
                  double threshold_L2hys=0.2, bool gamma_correction=true,
                  int nlevels=DEFAULT_NLEVELS);

+    /** @brief Returns the number of coefficients required for the classification.
+     */
    size_t getDescriptorSize() const;
+    /** @brief Returns the block histogram size.
+    */
    size_t getBlockHistogramSize() const;

+    /** @brief Sets coefficients for the linear SVM classifier.
+    */
    void setSVMDetector(const std::vector<float>& detector);

+    /** @brief Returns coefficients of the classifier trained for people detection (for default window size).
+    */
    static std::vector<float> getDefaultPeopleDetector();
+    /** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows).
+    */
    static std::vector<float> getPeopleDetector48x96();
+    /** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows).
+    */
    static std::vector<float> getPeopleDetector64x128();

+    /** @brief Performs object detection without a multi-scale window.
+
+    @param img Source image. CV_8UC1 and CV_8UC4 types are supported for now.
+    @param found_locations Left-top corner points of detected objects boundaries.
+    @param hit_threshold Threshold for the distance between features and SVM classifying plane.
+    Usually it is 0 and should be specfied in the detector coefficients (as the last free
+    coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
+    manually here.
+    @param win_stride Window stride. It must be a multiple of block stride.
+    @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
+     */
    void detect(const GpuMat& img, std::vector<Point>& found_locations,
                double hit_threshold=0, Size win_stride=Size(),
                Size padding=Size());

+    /** @brief Performs object detection with a multi-scale window.
+
+    @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
+    @param found_locations Detected objects boundaries.
+    @param hit_threshold Threshold for the distance between features and SVM classifying plane. See
+    cuda::HOGDescriptor::detect for details.
+    @param win_stride Window stride. It must be a multiple of block stride.
+    @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
+    @param scale0 Coefficient of the detection window increase.
+    @param group_threshold Coefficient to regulate the similarity threshold. When detected, some
+    objects can be covered by many rectangles. 0 means not to perform grouping. See groupRectangles .
+     */
    void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
                          double hit_threshold=0, Size win_stride=Size(),
                          Size padding=Size(), double scale0=1.05,
@ -98,6 +174,17 @@ struct CV_EXPORTS HOGDescriptor
                                                                    double hit_threshold, Size win_stride, Size padding,
                                                                    std::vector<HOGConfidence> &conf_out, int group_threshold);

+    /** @brief Returns block descriptors computed for the whole image.
+
+    @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
+    @param win_stride Window stride. It must be a multiple of block stride.
+    @param descriptors 2D array of descriptors.
+    @param descr_format Descriptor storage format:
+    -   **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
+    -   **DESCR_FORMAT_COL_BY_COL** - Column-major order.
+
+    The function is mainly used to learn the classifier.
+     */
    void getDescriptors(const GpuMat& img, Size win_stride,
                        GpuMat& descriptors,
                        int descr_format=DESCR_FORMAT_COL_BY_COL);
@ -145,20 +232,82 @@ protected:

 //////////////////////////// CascadeClassifier ////////////////////////////

-// The cascade classifier class for object detection: supports old haar and new lbp xlm formats and nvbin for haar cascades olny.
+/** @brief Cascade classifier class used for object detection. Supports HAAR and LBP cascades. :
+
+@note
+   -   A cascade classifier example can be found at
+        opencv_source_code/samples/gpu/cascadeclassifier.cpp
+    -   A Nvidea API specific cascade classifier example can be found at
+        opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp
+ */
 class CV_EXPORTS CascadeClassifier_CUDA
 {
 public:
    CascadeClassifier_CUDA();
+    /** @brief Loads the classifier from a file. Cascade type is detected automatically by constructor parameter.
+
+    @param filename Name of the file from which the classifier is loaded. Only the old haar classifier
+    (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
+    type of OpenCV XML cascade supported for LBP.
+     */
    CascadeClassifier_CUDA(const String& filename);
    ~CascadeClassifier_CUDA();

+    /** @brief Checks whether the classifier is loaded or not.
+    */
    bool empty() const;
+    /** @brief Loads the classifier from a file. The previous content is destroyed.
+
+    @param filename Name of the file from which the classifier is loaded. Only the old haar classifier
+    (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
+    type of OpenCV XML cascade supported for LBP.
+     */
    bool load(const String& filename);
+    /** @brief Destroys the loaded classifier.
+    */
    void release();

-    /* returns number of detected objects */
+    /** @overload */
    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
+    /** @brief Detects objects of different sizes in the input image.
+
+    @param image Matrix of type CV_8U containing an image where objects should be detected.
+    @param objectsBuf Buffer to store detected objects (rectangles). If it is empty, it is allocated
+    with the default size. If not empty, the function searches not more than N objects, where
+    N = sizeof(objectsBufer's data)/sizeof(cv::Rect).
+    @param maxObjectSize Maximum possible object size. Objects larger than that are ignored. Used for
+    second signature and supported only for LBP cascades.
+    @param scaleFactor Parameter specifying how much the image size is reduced at each image scale.
+    @param minNeighbors Parameter specifying how many neighbors each candidate rectangle should have
+    to retain it.
+    @param minSize Minimum possible object size. Objects smaller than that are ignored.
+
+    The detected objects are returned as a list of rectangles.
+
+    The function returns the number of detected objects, so you can retrieve them as in the following
+    example:
+    @code
+        cuda::CascadeClassifier_CUDA cascade_gpu(...);
+
+        Mat image_cpu = imread(...)
+        GpuMat image_gpu(image_cpu);
+
+        GpuMat objbuf;
+        int detections_number = cascade_gpu.detectMultiScale( image_gpu,
+                  objbuf, 1.2, minNeighbors);
+
+        Mat obj_host;
+        // download only detected number of rectangles
+        objbuf.colRange(0, detections_number).download(obj_host);
+
+        Rect* faces = obj_host.ptr<Rect>();
+        for(int i = 0; i < detections_num; ++i)
+           cv::rectangle(image_cpu, faces[i], Scalar(255));
+
+        imshow("Faces", image_cpu);
+    @endcode
+    @sa CascadeClassifier::detectMultiScale
+     */
    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);

    bool findLargestObject;
@ -174,8 +323,13 @@ private:
    friend class CascadeClassifier_CUDA_LBP;
 };

+//! @} cuda_objdetect
+
 //////////////////////////// Labeling ////////////////////////////

+//! @addtogroup cuda
+//! @{
+
 //!performs labeling via graph cuts of a 2D regular 4-connected graph.
 CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels,
                         GpuMat& buf, Stream& stream = Stream::Null());
@ -192,8 +346,13 @@ CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Sc
 //! performs connected componnents labeling.
 CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, int flags = 0, Stream& stream = Stream::Null());

+//! @}
+
 //////////////////////////// Calib3d ////////////////////////////

+//! @addtogroup cuda_calib3d
+//! @{
+
 CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
                                GpuMat& dst, Stream& stream = Stream::Null());

@ -201,13 +360,34 @@ CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tve
                              const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
                              Stream& stream = Stream::Null());

+/** @brief Finds the object pose from 3D-2D point correspondences.
+
+@param object Single-row matrix of object points.
+@param image Single-row matrix of image points.
+@param camera_mat 3x3 matrix of intrinsic camera parameters.
+@param dist_coef Distortion coefficients. See undistortPoints for details.
+@param rvec Output 3D rotation vector.
+@param tvec Output 3D translation vector.
+@param use_extrinsic_guess Flag to indicate that the function must use rvec and tvec as an
+initial transformation guess. It is not supported for now.
+@param num_iters Maximum number of RANSAC iterations.
+@param max_dist Euclidean distance threshold to detect whether point is inlier or not.
+@param min_inlier_count Flag to indicate that the function must stop if greater or equal number
+of inliers is achieved. It is not supported for now.
+@param inliers Output vector of inlier indices.
+ */
 CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
                               const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false,
                               int num_iters=100, float max_dist=8.0, int min_inlier_count=100,
                               std::vector<int>* inliers=NULL);

+//! @}
+
 //////////////////////////// VStab ////////////////////////////

+//! @addtogroup cuda
+//! @{
+
 //! removes points (CV_32FC2, single row matrix) with zero mask value
 CV_EXPORTS void compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask);

@ -215,6 +395,8 @@ CV_EXPORTS void calcWobbleSuppressionMaps(
        int left, int idx, int right, Size size, const Mat &ml, const Mat &mr,
        GpuMat &mapx, GpuMat &mapy);

+//! @}
+
 }} // namespace cv { namespace cuda {

 #endif /* __OPENCV_CUDA_HPP__ */
--- a/modules/cudaarithm/include/opencv2/cudaarithm.hpp
+++ b/modules/cudaarithm/include/opencv2/cudaarithm.hpp
--- a/modules/cudabgsegm/include/opencv2/cudabgsegm.hpp
+++ b/modules/cudabgsegm/include/opencv2/cudabgsegm.hpp
@ -50,11 +50,33 @@
 #include "opencv2/core/cuda.hpp"
 #include "opencv2/video/background_segm.hpp"

+/**
+  @addtogroup cuda
+  @{
+    @defgroup cudabgsegm Background Segmentation
+  @}
+ */
+
 namespace cv { namespace cuda {

+//! @addtogroup cudabgsegm
+//! @{
+
 ////////////////////////////////////////////////////
 // MOG

+/** @brief Gaussian Mixture-based Background/Foreground Segmentation Algorithm.
+
+The class discriminates between foreground and background pixels by building and maintaining a model
+of the background. Any pixel which does not fit this model is then deemed to be foreground. The
+class implements algorithm described in @cite MOG2001 .
+
+@sa BackgroundSubtractorMOG
+
+@note
+   -   An example on gaussian mixture based background/foreground segmantation can be found at
+        opencv_source_code/samples/gpu/bgfg_segm.cpp
+ */
 class CV_EXPORTS BackgroundSubtractorMOG : public cv::BackgroundSubtractor
 {
 public:
@ -78,6 +100,14 @@ public:
    virtual void setNoiseSigma(double noiseSigma) = 0;
 };

+/** @brief Creates mixture-of-gaussian background subtractor
+
+@param history Length of the history.
+@param nmixtures Number of Gaussian mixtures.
+@param backgroundRatio Background ratio.
+@param noiseSigma Noise strength (standard deviation of the brightness or each color channel). 0
+means some automatic value.
+ */
 CV_EXPORTS Ptr<cuda::BackgroundSubtractorMOG>
    createBackgroundSubtractorMOG(int history = 200, int nmixtures = 5,
                                  double backgroundRatio = 0.7, double noiseSigma = 0);
@ -85,6 +115,14 @@ CV_EXPORTS Ptr<cuda::BackgroundSubtractorMOG>
 ////////////////////////////////////////////////////
 // MOG2

+/** @brief Gaussian Mixture-based Background/Foreground Segmentation Algorithm.
+
+The class discriminates between foreground and background pixels by building and maintaining a model
+of the background. Any pixel which does not fit this model is then deemed to be foreground. The
+class implements algorithm described in @cite Zivkovic2004 .
+
+@sa BackgroundSubtractorMOG2
+ */
 class CV_EXPORTS BackgroundSubtractorMOG2 : public cv::BackgroundSubtractorMOG2
 {
 public:
@ -96,6 +134,15 @@ public:
    virtual void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const = 0;
 };

+/** @brief Creates MOG2 Background Subtractor
+
+@param history Length of the history.
+@param varThreshold Threshold on the squared Mahalanobis distance between the pixel and the model
+to decide whether a pixel is well described by the background model. This parameter does not
+affect the background update.
+@param detectShadows If true, the algorithm will detect shadows and mark them. It decreases the
+speed a bit, so if you do not need this feature, set the parameter to false.
+ */
 CV_EXPORTS Ptr<cuda::BackgroundSubtractorMOG2>
    createBackgroundSubtractorMOG2(int history = 500, double varThreshold = 16,
                                   bool detectShadows = true);
@ -103,6 +150,12 @@ CV_EXPORTS Ptr<cuda::BackgroundSubtractorMOG2>
 ////////////////////////////////////////////////////
 // GMG

+/** @brief Background/Foreground Segmentation Algorithm.
+
+The class discriminates between foreground and background pixels by building and maintaining a model
+of the background. Any pixel which does not fit this model is then deemed to be foreground. The
+class implements algorithm described in @cite Gold2012 .
+ */
 class CV_EXPORTS BackgroundSubtractorGMG : public cv::BackgroundSubtractor
 {
 public:
@ -140,54 +193,71 @@ public:
    virtual void setMaxVal(double val) = 0;
 };

+/** @brief Creates GMG Background Subtractor
+
+@param initializationFrames Number of frames of video to use to initialize histograms.
+@param decisionThreshold Value above which pixel is determined to be FG.
+ */
 CV_EXPORTS Ptr<cuda::BackgroundSubtractorGMG>
    createBackgroundSubtractorGMG(int initializationFrames = 120, double decisionThreshold = 0.8);

 ////////////////////////////////////////////////////
 // FGD

-/**
- * Foreground Object Detection from Videos Containing Complex Background.
- * Liyuan Li, Weimin Huang, Irene Y.H. Gu, and Qi Tian.
- * ACM MM2003 9p
+/** @brief The class discriminates between foreground and background pixels by building and maintaining a model
+of the background.
+
+Any pixel which does not fit this model is then deemed to be foreground. The class implements
+algorithm described in @cite FGD2003 .
+@sa BackgroundSubtractor
 */
 class CV_EXPORTS BackgroundSubtractorFGD : public cv::BackgroundSubtractor
 {
 public:
+    /** @brief Returns the output foreground regions calculated by findContours.
+
+    @param foreground_regions Output array (CPU memory).
+     */
    virtual void getForegroundRegions(OutputArrayOfArrays foreground_regions) = 0;
 };

 struct CV_EXPORTS FGDParams
 {
-    int Lc;  // Quantized levels per 'color' component. Power of two, typically 32, 64 or 128.
-    int N1c; // Number of color vectors used to model normal background color variation at a given pixel.
-    int N2c; // Number of color vectors retained at given pixel.  Must be > N1c, typically ~ 5/3 of N1c.
-    // Used to allow the first N1c vectors to adapt over time to changing background.
+    int Lc;  //!< Quantized levels per 'color' component. Power of two, typically 32, 64 or 128.
+    int N1c; //!< Number of color vectors used to model normal background color variation at a given pixel.
+    int N2c; //!< Number of color vectors retained at given pixel.  Must be > N1c, typically ~ 5/3 of N1c.
+    //!< Used to allow the first N1c vectors to adapt over time to changing background.

-    int Lcc;  // Quantized levels per 'color co-occurrence' component.  Power of two, typically 16, 32 or 64.
-    int N1cc; // Number of color co-occurrence vectors used to model normal background color variation at a given pixel.
-    int N2cc; // Number of color co-occurrence vectors retained at given pixel.  Must be > N1cc, typically ~ 5/3 of N1cc.
-    // Used to allow the first N1cc vectors to adapt over time to changing background.
+    int Lcc;  //!< Quantized levels per 'color co-occurrence' component.  Power of two, typically 16, 32 or 64.
+    int N1cc; //!< Number of color co-occurrence vectors used to model normal background color variation at a given pixel.
+    int N2cc; //!< Number of color co-occurrence vectors retained at given pixel.  Must be > N1cc, typically ~ 5/3 of N1cc.
+    //!< Used to allow the first N1cc vectors to adapt over time to changing background.

-    bool is_obj_without_holes; // If TRUE we ignore holes within foreground blobs. Defaults to TRUE.
-    int perform_morphing;     // Number of erode-dilate-erode foreground-blob cleanup iterations.
-    // These erase one-pixel junk blobs and merge almost-touching blobs. Default value is 1.
+    bool is_obj_without_holes; //!< If TRUE we ignore holes within foreground blobs. Defaults to TRUE.
+    int perform_morphing;     //!< Number of erode-dilate-erode foreground-blob cleanup iterations.
+    //!< These erase one-pixel junk blobs and merge almost-touching blobs. Default value is 1.

-    float alpha1; // How quickly we forget old background pixel values seen. Typically set to 0.1.
-    float alpha2; // "Controls speed of feature learning". Depends on T. Typical value circa 0.005.
-    float alpha3; // Alternate to alpha2, used (e.g.) for quicker initial convergence. Typical value 0.1.
+    float alpha1; //!< How quickly we forget old background pixel values seen. Typically set to 0.1.
+    float alpha2; //!< "Controls speed of feature learning". Depends on T. Typical value circa 0.005.
+    float alpha3; //!< Alternate to alpha2, used (e.g.) for quicker initial convergence. Typical value 0.1.

-    float delta;   // Affects color and color co-occurrence quantization, typically set to 2.
-    float T;       // A percentage value which determines when new features can be recognized as new background. (Typically 0.9).
-    float minArea; // Discard foreground blobs whose bounding box is smaller than this threshold.
+    float delta;   //!< Affects color and color co-occurrence quantization, typically set to 2.
+    float T;       //!< A percentage value which determines when new features can be recognized as new background. (Typically 0.9).
+    float minArea; //!< Discard foreground blobs whose bounding box is smaller than this threshold.

-    // default Params
+    //! default Params
    FGDParams();
 };

+/** @brief Creates FGD Background Subtractor
+
+@param params Algorithm's parameters. See @cite FGD2003 for explanation.
+ */
 CV_EXPORTS Ptr<cuda::BackgroundSubtractorFGD>
    createBackgroundSubtractorFGD(const FGDParams& params = FGDParams());

+//! @}
+
 }} // namespace cv { namespace cuda {

 #endif /* __OPENCV_CUDABGSEGM_HPP__ */
--- a/modules/cudacodec/include/opencv2/cudacodec.hpp
+++ b/modules/cudacodec/include/opencv2/cudacodec.hpp
@ -50,8 +50,18 @@

 #include "opencv2/core/cuda.hpp"

+/**
+  @addtogroup cuda
+  @{
+    @defgroup cudacodec Video Encoding/Decoding
+  @}
+ */
+
 namespace cv { namespace cudacodec {

+//! @addtogroup cudacodec
+//! @{
+
 ////////////////////////////////// Video Encoding //////////////////////////////////

 // Works only under Windows.
@ -68,35 +78,53 @@ enum SurfaceFormat
    SF_GRAY = SF_BGR
 };

+/** @brief Different parameters for CUDA video encoder.
+ */
 struct CV_EXPORTS EncoderParams
 {
-    int P_Interval;      // NVVE_P_INTERVAL,
-    int IDR_Period;      // NVVE_IDR_PERIOD,
-    int DynamicGOP;      // NVVE_DYNAMIC_GOP,
-    int RCType;          // NVVE_RC_TYPE,
-    int AvgBitrate;      // NVVE_AVG_BITRATE,
-    int PeakBitrate;     // NVVE_PEAK_BITRATE,
-    int QP_Level_Intra;  // NVVE_QP_LEVEL_INTRA,
-    int QP_Level_InterP; // NVVE_QP_LEVEL_INTER_P,
-    int QP_Level_InterB; // NVVE_QP_LEVEL_INTER_B,
-    int DeblockMode;     // NVVE_DEBLOCK_MODE,
-    int ProfileLevel;    // NVVE_PROFILE_LEVEL,
-    int ForceIntra;      // NVVE_FORCE_INTRA,
-    int ForceIDR;        // NVVE_FORCE_IDR,
-    int ClearStat;       // NVVE_CLEAR_STAT,
-    int DIMode;          // NVVE_SET_DEINTERLACE,
-    int Presets;         // NVVE_PRESETS,
-    int DisableCabac;    // NVVE_DISABLE_CABAC,
-    int NaluFramingType; // NVVE_CONFIGURE_NALU_FRAMING_TYPE
-    int DisableSPSPPS;   // NVVE_DISABLE_SPS_PPS
+    int P_Interval;      //!< NVVE_P_INTERVAL,
+    int IDR_Period;      //!< NVVE_IDR_PERIOD,
+    int DynamicGOP;      //!< NVVE_DYNAMIC_GOP,
+    int RCType;          //!< NVVE_RC_TYPE,
+    int AvgBitrate;      //!< NVVE_AVG_BITRATE,
+    int PeakBitrate;     //!< NVVE_PEAK_BITRATE,
+    int QP_Level_Intra;  //!< NVVE_QP_LEVEL_INTRA,
+    int QP_Level_InterP; //!< NVVE_QP_LEVEL_INTER_P,
+    int QP_Level_InterB; //!< NVVE_QP_LEVEL_INTER_B,
+    int DeblockMode;     //!< NVVE_DEBLOCK_MODE,
+    int ProfileLevel;    //!< NVVE_PROFILE_LEVEL,
+    int ForceIntra;      //!< NVVE_FORCE_INTRA,
+    int ForceIDR;        //!< NVVE_FORCE_IDR,
+    int ClearStat;       //!< NVVE_CLEAR_STAT,
+    int DIMode;          //!< NVVE_SET_DEINTERLACE,
+    int Presets;         //!< NVVE_PRESETS,
+    int DisableCabac;    //!< NVVE_DISABLE_CABAC,
+    int NaluFramingType; //!< NVVE_CONFIGURE_NALU_FRAMING_TYPE
+    int DisableSPSPPS;   //!< NVVE_DISABLE_SPS_PPS

    EncoderParams();
+    /** @brief Constructors.
+
+    @param configFile Config file name.
+
+    Creates default parameters or reads parameters from config file.
+     */
    explicit EncoderParams(const String& configFile);

+    /** @brief Reads parameters from config file.
+
+    @param configFile Config file name.
+     */
    void load(const String& configFile);
+    /** @brief Saves parameters to config file.
+
+    @param configFile Config file name.
+     */
    void save(const String& configFile) const;
 };

+/** @brief Callbacks for CUDA video encoder.
+ */
 class CV_EXPORTS EncoderCallBack
 {
 public:
@ -109,41 +137,109 @@ public:

    virtual ~EncoderCallBack() {}

-    //! callback function to signal the start of bitstream that is to be encoded
-    //! callback must allocate host buffer for CUDA encoder and return pointer to it and it's size
+    /** @brief Callback function to signal the start of bitstream that is to be encoded.
+
+    Callback must allocate buffer for CUDA encoder and return pointer to it and it's size.
+     */
    virtual uchar* acquireBitStream(int* bufferSize) = 0;

-    //! callback function to signal that the encoded bitstream is ready to be written to file
+    /** @brief Callback function to signal that the encoded bitstream is ready to be written to file.
+    */
    virtual void releaseBitStream(unsigned char* data, int size) = 0;

-    //! callback function to signal that the encoding operation on the frame has started
+    /** @brief Callback function to signal that the encoding operation on the frame has started.
+
+    @param frameNumber
+    @param picType Specify frame type (I-Frame, P-Frame or B-Frame).
+     */
    virtual void onBeginFrame(int frameNumber, PicType picType) = 0;

-    //! callback function signals that the encoding operation on the frame has finished
+    /** @brief Callback function signals that the encoding operation on the frame has finished.
+
+    @param frameNumber
+    @param picType Specify frame type (I-Frame, P-Frame or B-Frame).
+     */
    virtual void onEndFrame(int frameNumber, PicType picType) = 0;
 };

+/** @brief Video writer interface.
+
+The implementation uses H264 video codec.
+
+@note Currently only Windows platform is supported.
+
+@note
+   -   An example on how to use the videoWriter class can be found at
+        opencv_source_code/samples/gpu/video_writer.cpp
+ */
 class CV_EXPORTS VideoWriter
 {
 public:
    virtual ~VideoWriter() {}

-    //! writes the next frame from GPU memory
+    /** @brief Writes the next video frame.
+
+    @param frame The written frame.
+    @param lastFrame Indicates that it is end of stream. The parameter can be ignored.
+
+    The method write the specified image to video file. The image must have the same size and the same
+    surface format as has been specified when opening the video writer.
+     */
    virtual void write(InputArray frame, bool lastFrame = false) = 0;

    virtual EncoderParams getEncoderParams() const = 0;
 };

-//! create VideoWriter for specified output file (only AVI file format is supported)
+/** @brief Creates video writer.
+
+@param fileName Name of the output video file. Only AVI file format is supported.
+@param frameSize Size of the input video frames.
+@param fps Framerate of the created video stream.
+@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
+SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
+encoding, frames with other formats will be used as is.
+
+The constructors initialize video writer. FFMPEG is used to write videos. User can implement own
+multiplexing with cudacodec::EncoderCallBack .
+ */
 CV_EXPORTS Ptr<VideoWriter> createVideoWriter(const String& fileName, Size frameSize, double fps, SurfaceFormat format = SF_BGR);
+/** @overload
+@param fileName Name of the output video file. Only AVI file format is supported.
+@param frameSize Size of the input video frames.
+@param fps Framerate of the created video stream.
+@param params Encoder parameters. See cudacodec::EncoderParams .
+@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
+SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
+encoding, frames with other formats will be used as is.
+*/
 CV_EXPORTS Ptr<VideoWriter> createVideoWriter(const String& fileName, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);

-//! create VideoWriter for user-defined callbacks
+/** @overload
+@param encoderCallback Callbacks for video encoder. See cudacodec::EncoderCallBack . Use it if you
+want to work with raw video stream.
+@param frameSize Size of the input video frames.
+@param fps Framerate of the created video stream.
+@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
+SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
+encoding, frames with other formats will be used as is.
+*/
 CV_EXPORTS Ptr<VideoWriter> createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, SurfaceFormat format = SF_BGR);
+/** @overload
+@param encoderCallback Callbacks for video encoder. See cudacodec::EncoderCallBack . Use it if you
+want to work with raw video stream.
+@param frameSize Size of the input video frames.
+@param fps Framerate of the created video stream.
+@param params Encoder parameters. See cudacodec::EncoderParams .
+@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
+SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
+encoding, frames with other formats will be used as is.
+*/
 CV_EXPORTS Ptr<VideoWriter> createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);

 ////////////////////////////////// Video Decoding //////////////////////////////////////////

+/** @brief Video codecs supported by cudacodec::VideoReader .
+ */
 enum Codec
 {
    MPEG1 = 0,
@ -155,13 +251,15 @@ enum Codec
    H264_SVC,
    H264_MVC,

-    Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   // Y,U,V (4:2:0)
-    Uncompressed_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   // Y,V,U (4:2:0)
-    Uncompressed_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   // Y,UV  (4:2:0)
-    Uncompressed_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   // YUYV/YUY2 (4:2:2)
-    Uncompressed_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    // UYVY (4:2:2)
+    Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   //!< Y,U,V (4:2:0)
+    Uncompressed_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   //!< Y,V,U (4:2:0)
+    Uncompressed_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   //!< Y,UV  (4:2:0)
+    Uncompressed_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   //!< YUYV/YUY2 (4:2:2)
+    Uncompressed_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    //!< UYVY (4:2:2)
 };

+/** @brief Chroma formats supported by cudacodec::VideoReader .
+ */
 enum ChromaFormat
 {
    Monochrome = 0,
@ -170,6 +268,8 @@ enum ChromaFormat
    YUV444
 };

+/** @brief Struct providing information about video file format. :
+ */
 struct FormatInfo
 {
    Codec codec;
@ -178,29 +278,65 @@ struct FormatInfo
    int height;
 };

+/** @brief Video reader interface.
+
+@note
+   -   An example on how to use the videoReader class can be found at
+        opencv_source_code/samples/gpu/video_reader.cpp
+ */
 class CV_EXPORTS VideoReader
 {
 public:
    virtual ~VideoReader() {}

+    /** @brief Grabs, decodes and returns the next video frame.
+
+    If no frames has been grabbed (there are no more frames in video file), the methods return false .
+    The method throws Exception if error occurs.
+     */
    virtual bool nextFrame(OutputArray frame) = 0;

+    /** @brief Returns information about video file format.
+    */
    virtual FormatInfo format() const = 0;
 };

+/** @brief Interface for video demultiplexing. :
+
+User can implement own demultiplexing by implementing this interface.
+ */
 class CV_EXPORTS RawVideoSource
 {
 public:
    virtual ~RawVideoSource() {}

+    /** @brief Returns next packet with RAW video frame.
+
+    @param data Pointer to frame data.
+    @param size Size in bytes of current frame.
+    @param endOfFile Indicates that it is end of stream.
+     */
    virtual bool getNextPacket(unsigned char** data, int* size, bool* endOfFile) = 0;

+    /** @brief Returns information about video file format.
+    */
    virtual FormatInfo format() const = 0;
 };

+/** @brief Creates video reader.
+
+@param filename Name of the input video file.
+
+FFMPEG is used to read videos. User can implement own demultiplexing with cudacodec::RawVideoSource
+ */
 CV_EXPORTS Ptr<VideoReader> createVideoReader(const String& filename);
+/** @overload
+@param source RAW video source implemented by user.
+*/
 CV_EXPORTS Ptr<VideoReader> createVideoReader(const Ptr<RawVideoSource>& source);

+//! @}
+
 }} // namespace cv { namespace cudacodec {

 #endif /* __OPENCV_CUDACODEC_HPP__ */
--- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
+++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
@ -50,150 +50,175 @@
 #include "opencv2/core/cuda.hpp"
 #include "opencv2/cudafilters.hpp"

+/**
+  @addtogroup cuda
+  @{
+    @defgroup cudafeatures2d Feature Detection and Description
+  @}
+ */
+
 namespace cv { namespace cuda {

+//! @addtogroup cudafeatures2d
+//! @{
+
+/** @brief Brute-force descriptor matcher.
+
+For each descriptor in the first set, this matcher finds the closest descriptor in the second set
+by trying each one. This descriptor matcher supports masking permissible matches between descriptor
+sets.
+
+The class BFMatcher_CUDA has an interface similar to the class DescriptorMatcher. It has two groups
+of match methods: for matching descriptors of one image with another image or with an image set.
+Also, all functions have an alternative to save results either to the GPU memory or to the CPU
+memory.
+
+@sa DescriptorMatcher, BFMatcher
+ */
 class CV_EXPORTS BFMatcher_CUDA
 {
 public:
    explicit BFMatcher_CUDA(int norm = cv::NORM_L2);

-    // Add descriptors to train descriptor collection
+    //! Add descriptors to train descriptor collection
    void add(const std::vector<GpuMat>& descCollection);

-    // Get train descriptors collection
+    //! Get train descriptors collection
    const std::vector<GpuMat>& getTrainDescriptors() const;

-    // Clear train descriptors collection
+    //! Clear train descriptors collection
    void clear();

-    // Return true if there are not train descriptors in collection
+    //! Return true if there are not train descriptors in collection
    bool empty() const;

-    // Return true if the matcher supports mask in match methods
+    //! Return true if the matcher supports mask in match methods
    bool isMaskSupported() const;

-    // Find one best match for each query descriptor
+    //! Find one best match for each query descriptor
    void matchSingle(const GpuMat& query, const GpuMat& train,
        GpuMat& trainIdx, GpuMat& distance,
        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());

-    // Download trainIdx and distance and convert it to CPU vector with DMatch
+    //! Download trainIdx and distance and convert it to CPU vector with DMatch
    static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches);
-    // Convert trainIdx and distance to vector with DMatch
+    //! Convert trainIdx and distance to vector with DMatch
    static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches);

-    // Find one best match for each query descriptor
+    //! Find one best match for each query descriptor
    void match(const GpuMat& query, const GpuMat& train, std::vector<DMatch>& matches, const GpuMat& mask = GpuMat());

-    // Make gpu collection of trains and masks in suitable format for matchCollection function
+    //! Make gpu collection of trains and masks in suitable format for matchCollection function
    void makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, const std::vector<GpuMat>& masks = std::vector<GpuMat>());

-    // Find one best match from train collection for each query descriptor
+    //! Find one best match from train collection for each query descriptor
    void matchCollection(const GpuMat& query, const GpuMat& trainCollection,
        GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
        const GpuMat& masks = GpuMat(), Stream& stream = Stream::Null());

-    // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
+    //! Download trainIdx, imgIdx and distance and convert it to vector with DMatch
    static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches);
-    // Convert trainIdx, imgIdx and distance to vector with DMatch
+    //! Convert trainIdx, imgIdx and distance to vector with DMatch
    static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches);

-    // Find one best match from train collection for each query descriptor.
+    //! Find one best match from train collection for each query descriptor.
    void match(const GpuMat& query, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks = std::vector<GpuMat>());

-    // Find k best matches for each query descriptor (in increasing order of distances)
+    //! Find k best matches for each query descriptor (in increasing order of distances)
    void knnMatchSingle(const GpuMat& query, const GpuMat& train,
        GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());

-    // Download trainIdx and distance and convert it to vector with DMatch
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
+    //! Download trainIdx and distance and convert it to vector with DMatch
+    //! compactResult is used when mask is not empty. If compactResult is false matches
+    //! vector will have the same size as queryDescriptors rows. If compactResult is true
+    //! matches vector will not contain matches for fully masked out query descriptors.
    static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-    // Convert trainIdx and distance to vector with DMatch
+    //! Convert trainIdx and distance to vector with DMatch
    static void knnMatchConvert(const Mat& trainIdx, const Mat& distance,
        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);

-    // Find k best matches for each query descriptor (in increasing order of distances).
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
+    //! Find k best matches for each query descriptor (in increasing order of distances).
+    //! compactResult is used when mask is not empty. If compactResult is false matches
+    //! vector will have the same size as queryDescriptors rows. If compactResult is true
+    //! matches vector will not contain matches for fully masked out query descriptors.
    void knnMatch(const GpuMat& query, const GpuMat& train,
        std::vector< std::vector<DMatch> >& matches, int k, const GpuMat& mask = GpuMat(),
        bool compactResult = false);

-    // Find k best matches from train collection for each query descriptor (in increasing order of distances)
+    //! Find k best matches from train collection for each query descriptor (in increasing order of distances)
    void knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection,
        GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
        const GpuMat& maskCollection = GpuMat(), Stream& stream = Stream::Null());

-    // Download trainIdx and distance and convert it to vector with DMatch
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
+    //! Download trainIdx and distance and convert it to vector with DMatch
+    //! compactResult is used when mask is not empty. If compactResult is false matches
+    //! vector will have the same size as queryDescriptors rows. If compactResult is true
+    //! matches vector will not contain matches for fully masked out query descriptors.
+    //! @see BFMatcher_CUDA::knnMatchDownload
    static void knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-    // Convert trainIdx and distance to vector with DMatch
+    //! Convert trainIdx and distance to vector with DMatch
+    //! @see BFMatcher_CUDA::knnMatchConvert
    static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);

-    // Find k best matches  for each query descriptor (in increasing order of distances).
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
+    //! Find k best matches  for each query descriptor (in increasing order of distances).
+    //! compactResult is used when mask is not empty. If compactResult is false matches
+    //! vector will have the same size as queryDescriptors rows. If compactResult is true
+    //! matches vector will not contain matches for fully masked out query descriptors.
    void knnMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, int k,
        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);

-    // Find best matches for each query descriptor which have distance less than maxDistance.
-    // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
-    // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
-    // because it didn't have enough memory.
-    // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
-    // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-    // Matches doesn't sorted.
+    //! Find best matches for each query descriptor which have distance less than maxDistance.
+    //! nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
+    //! carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
+    //! because it didn't have enough memory.
+    //! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
+    //! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+    //! Matches doesn't sorted.
    void radiusMatchSingle(const GpuMat& query, const GpuMat& train,
        GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());

-    // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
-    // matches will be sorted in increasing order of distances.
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
+    //! Download trainIdx, nMatches and distance and convert it to vector with DMatch.
+    //! matches will be sorted in increasing order of distances.
+    //! compactResult is used when mask is not empty. If compactResult is false matches
+    //! vector will have the same size as queryDescriptors rows. If compactResult is true
+    //! matches vector will not contain matches for fully masked out query descriptors.
    static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-    // Convert trainIdx, nMatches and distance to vector with DMatch.
+    //! Convert trainIdx, nMatches and distance to vector with DMatch.
    static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);

-    // Find best matches for each query descriptor which have distance less than maxDistance
-    // in increasing order of distances).
+    //! Find best matches for each query descriptor which have distance less than maxDistance
+    //! in increasing order of distances).
    void radiusMatch(const GpuMat& query, const GpuMat& train,
        std::vector< std::vector<DMatch> >& matches, float maxDistance,
        const GpuMat& mask = GpuMat(), bool compactResult = false);

-    // Find best matches for each query descriptor which have distance less than maxDistance.
-    // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
-    // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-    // Matches doesn't sorted.
+    //! Find best matches for each query descriptor which have distance less than maxDistance.
+    //! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
+    //! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+    //! Matches doesn't sorted.
    void radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), Stream& stream = Stream::Null());

-    // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
-    // matches will be sorted in increasing order of distances.
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
+    //! Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
+    //! matches will be sorted in increasing order of distances.
+    //! compactResult is used when mask is not empty. If compactResult is false matches
+    //! vector will have the same size as queryDescriptors rows. If compactResult is true
+    //! matches vector will not contain matches for fully masked out query descriptors.
    static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,
        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-    // Convert trainIdx, nMatches and distance to vector with DMatch.
+    //! Convert trainIdx, nMatches and distance to vector with DMatch.
    static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);

-    // Find best matches from train collection for each query descriptor which have distance less than
-    // maxDistance (in increasing order of distances).
+    //! Find best matches from train collection for each query descriptor which have distance less than
+    //! maxDistance (in increasing order of distances).
    void radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance,
        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);

@ -203,6 +228,8 @@ private:
    std::vector<GpuMat> trainDescCollection;
 };

+/** @brief Class used for corner detection using the FAST algorithm. :
+ */
 class CV_EXPORTS FAST_CUDA
 {
 public:
@ -213,23 +240,45 @@ public:
        ROWS_COUNT
    };

-    // all features have same size
+    //! all features have same size
    static const int FEATURE_SIZE = 7;

+    /** @brief Constructor.
+
+    @param threshold Threshold on difference between intensity of the central pixel and pixels on a
+    circle around this pixel.
+    @param nonmaxSuppression If it is true, non-maximum suppression is applied to detected corners
+    (keypoints).
+    @param keypointsRatio Inner buffer size for keypoints store is determined as (keypointsRatio \*
+    image_width \* image_height).
+     */
    explicit FAST_CUDA(int threshold, bool nonmaxSuppression = true, double keypointsRatio = 0.05);

-    //! finds the keypoints using FAST detector
-    //! supports only CV_8UC1 images
+    /** @brief Finds the keypoints using FAST detector.
+
+    @param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are
+    supported.
+    @param mask Optional input mask that marks the regions where we should detect features.
+    @param keypoints The output vector of keypoints. Can be stored both in CPU and GPU memory. For GPU
+    memory:
+    -   keypoints.ptr\<Vec2s\>(LOCATION_ROW)[i] will contain location of i'th point
+    -   keypoints.ptr\<float\>(RESPONSE_ROW)[i] will contain response of i'th point (if non-maximum
+    suppression is applied)
+     */
    void operator ()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
+    /** @overload */
    void operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);

-    //! download keypoints from device to host memory
+    /** @brief Download keypoints from GPU to CPU memory.
+    */
    static void downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);

-    //! convert keypoints to KeyPoint vector
+    /** @brief Converts keypoints from CUDA representation to vector of KeyPoint.
+    */
    static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);

-    //! release temporary buffer's memory
+    /** @brief Releases inner buffer memory.
+    */
    void release();

    bool nonmaxSuppression;
@ -239,13 +288,22 @@ public:
    //! max keypoints = keypointsRatio * img.size().area()
    double keypointsRatio;

-    //! find keypoints and compute it's response if nonmaxSuppression is true
-    //! return count of detected keypoints
+    /** @brief Find keypoints and compute it's response if nonmaxSuppression is true.
+
+    @param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are
+    supported.
+    @param mask Optional input mask that marks the regions where we should detect features.
+
+    The function returns count of detected keypoints.
+     */
    int calcKeyPointsLocation(const GpuMat& image, const GpuMat& mask);

-    //! get final array of keypoints
-    //! performs nonmax suppression if needed
-    //! return final count of keypoints
+    /** @brief Gets final array of keypoints.
+
+    @param keypoints The output vector of keypoints.
+
+    The function performs non-max suppression if needed and returns final count of keypoints.
+     */
    int getKeyPoints(GpuMat& keypoints);

 private:
@ -257,6 +315,8 @@ private:
    GpuMat d_keypoints_;
 };

+/** @brief Class for extracting ORB features and descriptors from an image. :
+ */
 class CV_EXPORTS ORB_CUDA
 {
 public:
@ -276,28 +336,51 @@ public:
        DEFAULT_FAST_THRESHOLD = 20
    };

-    //! Constructor
+    /** @brief Constructor.
+
+    @param nFeatures The number of desired features.
+    @param scaleFactor Coefficient by which we divide the dimensions from one scale pyramid level to
+    the next.
+    @param nLevels The number of levels in the scale pyramid.
+    @param edgeThreshold How far from the boundary the points should be.
+    @param firstLevel The level at which the image is given. If 1, that means we will also look at the
+    image scaleFactor times bigger.
+    @param WTA_K
+    @param scoreType
+    @param patchSize
+     */
    explicit ORB_CUDA(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31,
                     int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31);

-    //! Compute the ORB features on an image
-    //! image - the image to compute the features (supports only CV_8UC1 images)
-    //! mask - the mask to apply
-    //! keypoints - the resulting keypoints
+    /** @overload */
    void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
+    /** @overload */
    void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);

-    //! Compute the ORB features and descriptors on an image
-    //! image - the image to compute the features (supports only CV_8UC1 images)
-    //! mask - the mask to apply
-    //! keypoints - the resulting keypoints
-    //! descriptors - descriptors array
+    /** @brief Detects keypoints and computes descriptors for them.
+
+    @param image Input 8-bit grayscale image.
+    @param mask Optional input mask that marks the regions where we should detect features.
+    @param keypoints The input/output vector of keypoints. Can be stored both in CPU and GPU memory.
+    For GPU memory:
+    -   keypoints.ptr\<float\>(X_ROW)[i] contains x coordinate of the i'th feature.
+    -   keypoints.ptr\<float\>(Y_ROW)[i] contains y coordinate of the i'th feature.
+    -   keypoints.ptr\<float\>(RESPONSE_ROW)[i] contains the response of the i'th feature.
+    -   keypoints.ptr\<float\>(ANGLE_ROW)[i] contains orientation of the i'th feature.
+    -   keypoints.ptr\<float\>(OCTAVE_ROW)[i] contains the octave of the i'th feature.
+    -   keypoints.ptr\<float\>(SIZE_ROW)[i] contains the size of the i'th feature.
+    @param descriptors Computed descriptors. if blurForDescriptor is true, image will be blurred
+    before descriptors calculation.
+     */
    void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors);
+    /** @overload */
    void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors);

-    //! download keypoints from device to host memory
+    /** @brief Download keypoints from GPU to CPU memory.
+    */
    static void downloadKeyPoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
-    //! convert keypoints to KeyPoint vector
+    /** @brief Converts keypoints from CUDA representation to vector of KeyPoint.
+    */
    static void convertKeyPoints(const Mat& d_keypoints, std::vector<KeyPoint>& keypoints);

    //! returns the descriptor size in bytes
@ -309,7 +392,8 @@ public:
        fastDetector_.nonmaxSuppression = nonmaxSuppression;
    }

-    //! release temporary buffer's memory
+    /** @brief Releases inner buffer memory.
+    */
    void release();

    //! if true, image will be blurred before descriptors calculation
@ -335,10 +419,10 @@ private:
    int scoreType_;
    int patchSize_;

-    // The number of desired features per scale
+    //! The number of desired features per scale
    std::vector<size_t> n_features_per_level_;

-    // Points to compute BRIEF descriptors from
+    //! Points to compute BRIEF descriptors from
    GpuMat pattern_;

    std::vector<GpuMat> imagePyr_;
@ -356,6 +440,8 @@ private:
    GpuMat d_keypoints_;
 };

+//! @}
+
 }} // namespace cv { namespace cuda {

 #endif /* __OPENCV_CUDAFEATURES2D_HPP__ */
--- a/modules/cudafilters/include/opencv2/cudafilters.hpp
+++ b/modules/cudafilters/include/opencv2/cudafilters.hpp
@ -50,65 +50,189 @@
 #include "opencv2/core/cuda.hpp"
 #include "opencv2/imgproc.hpp"

+/**
+  @addtogroup cuda
+  @{
+    @defgroup cudafilters Image Filtering
+
+Functions and classes described in this section are used to perform various linear or non-linear
+filtering operations on 2D images.
+
+@note
+   -   An example containing all basic morphology operators like erode and dilate can be found at
+        opencv_source_code/samples/gpu/morphology.cpp
+
+  @}
+ */
+
 namespace cv { namespace cuda {

+//! @addtogroup cudafilters
+//! @{
+
+/** @brief Common interface for all CUDA filters :
+ */
 class CV_EXPORTS Filter : public Algorithm
 {
 public:
+    /** @brief Applies the specified filter to the image.
+
+    @param src Input image.
+    @param dst Output image.
+    @param stream Stream for the asynchronous version.
+     */
    virtual void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0;
 };

 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Box Filter

-//! creates a normalized 2D box filter
-//! supports CV_8UC1, CV_8UC4 types
+/** @brief Creates a normalized 2D box filter.
+
+@param srcType Input image type. Only CV_8UC1 and CV_8UC4 are supported for now.
+@param dstType Output image type. Only the same type as src is supported for now.
+@param ksize Kernel size.
+@param anchor Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel
+center.
+@param borderMode Pixel extrapolation method. For details, see borderInterpolate .
+@param borderVal Default border value.
+
+@sa boxFilter
+ */
 CV_EXPORTS Ptr<Filter> createBoxFilter(int srcType, int dstType, Size ksize, Point anchor = Point(-1,-1),
                                       int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0));

 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Linear Filter

-//! Creates a non-separable linear 2D filter
-//! supports 1 and 4 channel CV_8U, CV_16U and CV_32F input
+/** @brief Creates a non-separable linear 2D filter.
+
+@param srcType Input image type. Supports CV_8U , CV_16U and CV_32F one and four channel image.
+@param dstType Output image type. Only the same type as src is supported for now.
+@param kernel 2D array of filter coefficients.
+@param anchor Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel
+center.
+@param borderMode Pixel extrapolation method. For details, see borderInterpolate .
+@param borderVal Default border value.
+
+@sa filter2D
+ */
 CV_EXPORTS Ptr<Filter> createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor = Point(-1,-1),
                                          int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0));

 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Laplacian Filter

-//! creates a Laplacian operator
-//! supports only ksize = 1 and ksize = 3
+/** @brief Creates a Laplacian operator.
+
+@param srcType Input image type. Supports CV_8U , CV_16U and CV_32F one and four channel image.
+@param dstType Output image type. Only the same type as src is supported for now.
+@param ksize Aperture size used to compute the second-derivative filters (see getDerivKernels). It
+must be positive and odd. Only ksize = 1 and ksize = 3 are supported.
+@param scale Optional scale factor for the computed Laplacian values. By default, no scaling is
+applied (see getDerivKernels ).
+@param borderMode Pixel extrapolation method. For details, see borderInterpolate .
+@param borderVal Default border value.
+
+@sa Laplacian
+ */
 CV_EXPORTS Ptr<Filter> createLaplacianFilter(int srcType, int dstType, int ksize = 1, double scale = 1,
                                             int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0));

 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Separable Linear Filter

-//! creates a separable linear filter
+/** @brief Creates a separable linear filter.
+
+@param srcType Source array type.
+@param dstType Destination array type.
+@param rowKernel Horizontal filter coefficients. Support kernels with size \<= 32 .
+@param columnKernel Vertical filter coefficients. Support kernels with size \<= 32 .
+@param anchor Anchor position within the kernel. Negative values mean that anchor is positioned at
+the aperture center.
+@param rowBorderMode Pixel extrapolation method in the vertical direction For details, see
+borderInterpolate.
+@param columnBorderMode Pixel extrapolation method in the horizontal direction.
+
+@sa sepFilter2D
+ */
 CV_EXPORTS Ptr<Filter> createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel,
                                                   Point anchor = Point(-1,-1), int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1);

 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Deriv Filter

-//! creates a generalized Deriv operator
+/** @brief Creates a generalized Deriv operator.
+
+@param srcType Source image type.
+@param dstType Destination array type.
+@param dx Derivative order in respect of x.
+@param dy Derivative order in respect of y.
+@param ksize Aperture size. See getDerivKernels for details.
+@param normalize Flag indicating whether to normalize (scale down) the filter coefficients or not.
+See getDerivKernels for details.
+@param scale Optional scale factor for the computed derivative values. By default, no scaling is
+applied. For details, see getDerivKernels .
+@param rowBorderMode Pixel extrapolation method in the vertical direction. For details, see
+borderInterpolate.
+@param columnBorderMode Pixel extrapolation method in the horizontal direction.
+ */
 CV_EXPORTS Ptr<Filter> createDerivFilter(int srcType, int dstType, int dx, int dy,
                                         int ksize, bool normalize = false, double scale = 1,
                                         int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1);

-//! creates a Sobel operator
+/** @brief Creates a Sobel operator.
+
+@param srcType Source image type.
+@param dstType Destination array type.
+@param dx Derivative order in respect of x.
+@param dy Derivative order in respect of y.
+@param ksize Size of the extended Sobel kernel. Possible values are 1, 3, 5 or 7.
+@param scale Optional scale factor for the computed derivative values. By default, no scaling is
+applied. For details, see getDerivKernels .
+@param rowBorderMode Pixel extrapolation method in the vertical direction. For details, see
+borderInterpolate.
+@param columnBorderMode Pixel extrapolation method in the horizontal direction.
+
+@sa Sobel
+ */
 CV_EXPORTS Ptr<Filter> createSobelFilter(int srcType, int dstType, int dx, int dy, int ksize = 3,
                                         double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1);

-//! creates a vertical or horizontal Scharr operator
+/** @brief Creates a vertical or horizontal Scharr operator.
+
+@param srcType Source image type.
+@param dstType Destination array type.
+@param dx Order of the derivative x.
+@param dy Order of the derivative y.
+@param scale Optional scale factor for the computed derivative values. By default, no scaling is
+applied. See getDerivKernels for details.
+@param rowBorderMode Pixel extrapolation method in the vertical direction. For details, see
+borderInterpolate.
+@param columnBorderMode Pixel extrapolation method in the horizontal direction.
+
+@sa Scharr
+ */
 CV_EXPORTS Ptr<Filter> createScharrFilter(int srcType, int dstType, int dx, int dy,
                                          double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1);

 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Gaussian Filter

-//! creates a Gaussian filter
+/** @brief Creates a Gaussian filter.
+
+@param srcType Source image type.
+@param dstType Destination array type.
+@param ksize Aperture size. See getGaussianKernel for details.
+@param sigma1 Gaussian sigma in the horizontal direction. See getGaussianKernel for details.
+@param sigma2 Gaussian sigma in the vertical direction. If 0, then
+\f$\texttt{sigma2}\leftarrow\texttt{sigma1}\f$ .
+@param rowBorderMode Pixel extrapolation method in the vertical direction. For details, see
+borderInterpolate.
+@param columnBorderMode Pixel extrapolation method in the horizontal direction.
+
+@sa GaussianBlur
+ */
 CV_EXPORTS Ptr<Filter> createGaussianFilter(int srcType, int dstType, Size ksize,
                                            double sigma1, double sigma2 = 0,
                                            int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1);
@ -116,19 +240,49 @@ CV_EXPORTS Ptr<Filter> createGaussianFilter(int srcType, int dstType, Size ksize
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Morphology Filter

-//! creates a 2D morphological filter
-//! supports CV_8UC1 and CV_8UC4 types
+/** @brief Creates a 2D morphological filter.
+
+@param op Type of morphological operation. The following types are possible:
+-   **MORPH_ERODE** erode
+-   **MORPH_DILATE** dilate
+-   **MORPH_OPEN** opening
+-   **MORPH_CLOSE** closing
+-   **MORPH_GRADIENT** morphological gradient
+-   **MORPH_TOPHAT** "top hat"
+-   **MORPH_BLACKHAT** "black hat"
+@param srcType Input/output image type. Only CV_8UC1 and CV_8UC4 are supported.
+@param kernel 2D 8-bit structuring element for the morphological operation.
+@param anchor Anchor position within the structuring element. Negative values mean that the anchor
+is at the center.
+@param iterations Number of times erosion and dilation to be applied.
+
+@sa morphologyEx
+ */
 CV_EXPORTS Ptr<Filter> createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor = Point(-1, -1), int iterations = 1);

 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Image Rank Filter

-//! result pixel value is the maximum of pixel values under the rectangular mask region
+/** @brief Creates the maximum filter.
+
+@param srcType Input/output image type. Only CV_8UC1 and CV_8UC4 are supported.
+@param ksize Kernel size.
+@param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center.
+@param borderMode Pixel extrapolation method. For details, see borderInterpolate .
+@param borderVal Default border value.
+ */
 CV_EXPORTS Ptr<Filter> createBoxMaxFilter(int srcType, Size ksize,
                                          Point anchor = Point(-1, -1),
                                          int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0));

-//! result pixel value is the maximum of pixel values under the rectangular mask region
+/** @brief Creates the minimum filter.
+
+@param srcType Input/output image type. Only CV_8UC1 and CV_8UC4 are supported.
+@param ksize Kernel size.
+@param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center.
+@param borderMode Pixel extrapolation method. For details, see borderInterpolate .
+@param borderVal Default border value.
+ */
 CV_EXPORTS Ptr<Filter> createBoxMinFilter(int srcType, Size ksize,
                                          Point anchor = Point(-1, -1),
                                          int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0));
@ -136,14 +290,30 @@ CV_EXPORTS Ptr<Filter> createBoxMinFilter(int srcType, Size ksize,
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // 1D Sum Filter

-//! creates a horizontal 1D box filter
-//! supports only CV_8UC1 source type and CV_32FC1 sum type
+/** @brief Creates a horizontal 1D box filter.
+
+@param srcType Input image type. Only CV_8UC1 type is supported for now.
+@param dstType Output image type. Only CV_32FC1 type is supported for now.
+@param ksize Kernel size.
+@param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center.
+@param borderMode Pixel extrapolation method. For details, see borderInterpolate .
+@param borderVal Default border value.
+ */
 CV_EXPORTS Ptr<Filter> createRowSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0));

-//! creates a vertical 1D box filter
-//! supports only CV_8UC1 sum type and CV_32FC1 dst type
+/** @brief Creates a vertical 1D box filter.
+
+@param srcType Input image type. Only CV_8UC1 type is supported for now.
+@param dstType Output image type. Only CV_32FC1 type is supported for now.
+@param ksize Kernel size.
+@param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center.
+@param borderMode Pixel extrapolation method. For details, see borderInterpolate .
+@param borderVal Default border value.
+ */
 CV_EXPORTS Ptr<Filter> createColumnSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0));

+//! @}
+
 }} // namespace cv { namespace cuda {

 #endif /* __OPENCV_CUDAFILTERS_HPP__ */
--- a/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp
+++ b/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp
@ -50,16 +50,48 @@
 #include "opencv2/core/cuda.hpp"
 #include "opencv2/imgproc.hpp"

+/**
+  @addtogroup cuda
+  @{
+    @defgroup cudaimgproc Image Processing
+    @{
+      @defgroup cudaimgproc_color Color space processing
+      @defgroup cudaimgproc_hist Histogram Calculation
+      @defgroup cudaimgproc_hough Hough Transform
+      @defgroup cudaimgproc_feature Feature Detection
+    @}
+  @}
+*/
+
 namespace cv { namespace cuda {

+//! @addtogroup cudaimgproc
+//! @{
+
 /////////////////////////// Color Processing ///////////////////////////

-//! converts image from one color space to another
+//! @addtogroup cudaimgproc_color
+//! @{
+
+/** @brief Converts an image from one color space to another.
+
+@param src Source image with CV_8U , CV_16U , or CV_32F depth and 1, 3, or 4 channels.
+@param dst Destination image.
+@param code Color space conversion code. For details, see cvtColor .
+@param dcn Number of channels in the destination image. If the parameter is 0, the number of the
+channels is derived automatically from src and the code .
+@param stream Stream for the asynchronous version.
+
+3-channel color spaces (like HSV, XYZ, and so on) can be stored in a 4-channel image for better
+performance.
+
+@sa cvtColor
+ */
 CV_EXPORTS void cvtColor(InputArray src, OutputArray dst, int code, int dcn = 0, Stream& stream = Stream::Null());

 enum
 {
-    // Bayer Demosaicing (Malvar, He, and Cutler)
+    //! Bayer Demosaicing (Malvar, He, and Cutler)
    COLOR_BayerBG2BGR_MHT = 256,
    COLOR_BayerGB2BGR_MHT = 257,
    COLOR_BayerRG2BGR_MHT = 258,
@ -75,105 +107,228 @@ enum
    COLOR_BayerRG2GRAY_MHT = 262,
    COLOR_BayerGR2GRAY_MHT = 263
 };
+
+/** @brief Converts an image from Bayer pattern to RGB or grayscale.
+
+@param src Source image (8-bit or 16-bit single channel).
+@param dst Destination image.
+@param code Color space conversion code (see the description below).
+@param dcn Number of channels in the destination image. If the parameter is 0, the number of the
+channels is derived automatically from src and the code .
+@param stream Stream for the asynchronous version.
+
+The function can do the following transformations:
+
+-   Demosaicing using bilinear interpolation
+
+    > -   COLOR_BayerBG2GRAY , COLOR_BayerGB2GRAY , COLOR_BayerRG2GRAY , COLOR_BayerGR2GRAY
+    > -   COLOR_BayerBG2BGR , COLOR_BayerGB2BGR , COLOR_BayerRG2BGR , COLOR_BayerGR2BGR
+
+-   Demosaicing using Malvar-He-Cutler algorithm (@cite MHT2011)
+
+    > -   COLOR_BayerBG2GRAY_MHT , COLOR_BayerGB2GRAY_MHT , COLOR_BayerRG2GRAY_MHT ,
+    >     COLOR_BayerGR2GRAY_MHT
+    > -   COLOR_BayerBG2BGR_MHT , COLOR_BayerGB2BGR_MHT , COLOR_BayerRG2BGR_MHT ,
+    >     COLOR_BayerGR2BGR_MHT
+
+@sa cvtColor
+ */
 CV_EXPORTS void demosaicing(InputArray src, OutputArray dst, int code, int dcn = -1, Stream& stream = Stream::Null());

-//! swap channels
-//! dstOrder - Integer array describing how channel values are permutated. The n-th entry
-//!            of the array contains the number of the channel that is stored in the n-th channel of
-//!            the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR
-//!            channel order.
+/** @brief Exchanges the color channels of an image in-place.
+
+@param image Source image. Supports only CV_8UC4 type.
+@param dstOrder Integer array describing how channel values are permutated. The n-th entry of the
+array contains the number of the channel that is stored in the n-th channel of the output image.
+E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR channel order.
+@param stream Stream for the asynchronous version.
+
+The methods support arbitrary permutations of the original channels, including replication.
+ */
 CV_EXPORTS void swapChannels(InputOutputArray image, const int dstOrder[4], Stream& stream = Stream::Null());

-//! Routines for correcting image color gamma
+/** @brief Routines for correcting image color gamma.
+
+@param src Source image (3- or 4-channel 8 bit).
+@param dst Destination image.
+@param forward true for forward gamma correction or false for inverse gamma correction.
+@param stream Stream for the asynchronous version.
+ */
 CV_EXPORTS void gammaCorrection(InputArray src, OutputArray dst, bool forward = true, Stream& stream = Stream::Null());

 enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL,
       ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL};

-//! Composite two images using alpha opacity values contained in each image
-//! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types
+/** @brief Composites two images using alpha opacity values contained in each image.
+
+@param img1 First image. Supports CV_8UC4 , CV_16UC4 , CV_32SC4 and CV_32FC4 types.
+@param img2 Second image. Must have the same size and the same type as img1 .
+@param dst Destination image.
+@param alpha_op Flag specifying the alpha-blending operation:
+-   **ALPHA_OVER**
+-   **ALPHA_IN**
+-   **ALPHA_OUT**
+-   **ALPHA_ATOP**
+-   **ALPHA_XOR**
+-   **ALPHA_PLUS**
+-   **ALPHA_OVER_PREMUL**
+-   **ALPHA_IN_PREMUL**
+-   **ALPHA_OUT_PREMUL**
+-   **ALPHA_ATOP_PREMUL**
+-   **ALPHA_XOR_PREMUL**
+-   **ALPHA_PLUS_PREMUL**
+-   **ALPHA_PREMUL**
+@param stream Stream for the asynchronous version.
+
+@note
+   -   An example demonstrating the use of alphaComp can be found at
+        opencv_source_code/samples/gpu/alpha_comp.cpp
+ */
 CV_EXPORTS void alphaComp(InputArray img1, InputArray img2, OutputArray dst, int alpha_op, Stream& stream = Stream::Null());

+//! @} cudaimgproc_color
+
 ////////////////////////////// Histogram ///////////////////////////////

-//! Calculates histogram for 8u one channel image
-//! Output hist will have one row, 256 cols and CV32SC1 type.
+//! @addtogroup cudaimgproc_hist
+//! @{
+
+/** @brief Calculates histogram for one channel 8-bit image.
+
+@param src Source image with CV_8UC1 type.
+@param hist Destination histogram with one row, 256 columns, and the CV_32SC1 type.
+@param stream Stream for the asynchronous version.
+ */
 CV_EXPORTS void calcHist(InputArray src, OutputArray hist, Stream& stream = Stream::Null());

-//! normalizes the grayscale image brightness and contrast by normalizing its histogram
+/** @brief Equalizes the histogram of a grayscale image.
+
+@param src Source image with CV_8UC1 type.
+@param dst Destination image.
+@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
+@param stream Stream for the asynchronous version.
+
+@sa equalizeHist
+ */
 CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, InputOutputArray buf, Stream& stream = Stream::Null());

+/** @overload */
 static inline void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
 {
    GpuMat buf;
    cuda::equalizeHist(src, dst, buf, stream);
 }

+/** @brief Base class for Contrast Limited Adaptive Histogram Equalization. :
+ */
 class CV_EXPORTS CLAHE : public cv::CLAHE
 {
 public:
    using cv::CLAHE::apply;
+    /** @brief Equalizes the histogram of a grayscale image using Contrast Limited Adaptive Histogram Equalization.
+
+    @param src Source image with CV_8UC1 type.
+    @param dst Destination image.
+    @param stream Stream for the asynchronous version.
+     */
    virtual void apply(InputArray src, OutputArray dst, Stream& stream) = 0;
 };
+
+/** @brief Creates implementation for cuda::CLAHE .
+
+@param clipLimit Threshold for contrast limiting.
+@param tileGridSize Size of grid for histogram equalization. Input image will be divided into
+equally sized rectangular tiles. tileGridSize defines the number of tiles in row and column.
+ */
 CV_EXPORTS Ptr<cuda::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));

-//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.
+/** @brief Computes levels with even distribution.
+
+@param levels Destination array. levels has 1 row, nLevels columns, and the CV_32SC1 type.
+@param nLevels Number of computed levels. nLevels must be at least 2.
+@param lowerLevel Lower boundary value of the lowest level.
+@param upperLevel Upper boundary value of the greatest level.
+ */
 CV_EXPORTS void evenLevels(OutputArray levels, int nLevels, int lowerLevel, int upperLevel);

-//! Calculates histogram with evenly distributed bins for signle channel source.
-//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types.
-//! Output hist will have one row and histSize cols and CV_32SC1 type.
+/** @brief Calculates a histogram with evenly distributed bins.
+
+@param src Source image. CV_8U, CV_16U, or CV_16S depth and 1 or 4 channels are supported. For
+a four-channel image, all channels are processed separately.
+@param hist Destination histogram with one row, histSize columns, and the CV_32S type.
+@param histSize Size of the histogram.
+@param lowerLevel Lower boundary of lowest-level bin.
+@param upperLevel Upper boundary of highest-level bin.
+@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
+@param stream Stream for the asynchronous version.
+ */
 CV_EXPORTS void histEven(InputArray src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());

+/** @overload */
 static inline void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
 {
    GpuMat buf;
    cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
 }

-//! Calculates histogram with evenly distributed bins for four-channel source.
-//! All channels of source are processed separately.
-//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types.
-//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type.
+/** @overload */
 CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());

+/** @overload */
 static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null())
 {
    GpuMat buf;
    cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
 }

-//! Calculates histogram with bins determined by levels array.
-//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
-//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types.
-//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type.
+/** @brief Calculates a histogram with bins determined by the levels array.
+
+@param src Source image. CV_8U , CV_16U , or CV_16S depth and 1 or 4 channels are supported.
+For a four-channel image, all channels are processed separately.
+@param hist Destination histogram with one row, (levels.cols-1) columns, and the CV_32SC1 type.
+@param levels Number of levels in the histogram.
+@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
+@param stream Stream for the asynchronous version.
+ */
 CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, InputOutputArray buf, Stream& stream = Stream::Null());

+/** @overload */
 static inline void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null())
 {
    GpuMat buf;
    cuda::histRange(src, hist, levels, buf, stream);
 }

-//! Calculates histogram with bins determined by levels array.
-//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
-//! All channels of source are processed separately.
-//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types.
-//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type.
+/** @overload */
 CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream = Stream::Null());

+/** @overload */
 static inline void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null())
 {
    GpuMat buf;
    cuda::histRange(src, hist, levels, buf, stream);
 }

+//! @} cudaimgproc_hist
+
 //////////////////////////////// Canny ////////////////////////////////

+/** @brief Base class for Canny Edge Detector. :
+ */
 class CV_EXPORTS CannyEdgeDetector : public Algorithm
 {
 public:
+    /** @brief Finds edges in an image using the @cite Canny86 algorithm.
+
+    @param image Single-channel 8-bit input image.
+    @param edges Output edge map. It has the same size and type as image .
+     */
    virtual void detect(InputArray image, OutputArray edges) = 0;
+    /** @overload
+    @param dx First derivative of image in the vertical direction. Support only CV_32S type.
+    @param dy First derivative of image in the horizontal direction. Support only CV_32S type.
+    @param edges Output edge map. It has the same size and type as image .
+    */
    virtual void detect(InputArray dx, InputArray dy, OutputArray edges) = 0;

    virtual void setLowThreshold(double low_thresh) = 0;
@ -189,6 +344,16 @@ public:
    virtual bool getL2Gradient() const = 0;
 };

+/** @brief Creates implementation for cuda::CannyEdgeDetector .
+
+@param low_thresh First threshold for the hysteresis procedure.
+@param high_thresh Second threshold for the hysteresis procedure.
+@param apperture_size Aperture size for the Sobel operator.
+@param L2gradient Flag indicating whether a more accurate \f$L_2\f$ norm
+\f$=\sqrt{(dI/dx)^2 + (dI/dy)^2}\f$ should be used to compute the image gradient magnitude (
+L2gradient=true ), or a faster default \f$L_1\f$ norm \f$=|dI/dx|+|dI/dy|\f$ is enough ( L2gradient=false
+).
+ */
 CV_EXPORTS Ptr<CannyEdgeDetector> createCannyEdgeDetector(double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);

 /////////////////////////// Hough Transform ////////////////////////////
@ -196,10 +361,32 @@ CV_EXPORTS Ptr<CannyEdgeDetector> createCannyEdgeDetector(double low_thresh, dou
 //////////////////////////////////////
 // HoughLines

+//! @addtogroup cudaimgproc_hough
+//! @{
+
+/** @brief Base class for lines detector algorithm. :
+ */
 class CV_EXPORTS HoughLinesDetector : public Algorithm
 {
 public:
+    /** @brief Finds lines in a binary image using the classical Hough transform.
+
+    @param src 8-bit, single-channel binary source image.
+    @param lines Output vector of lines. Each line is represented by a two-element vector
+    \f$(\rho, \theta)\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of
+    the image). \f$\theta\f$ is the line rotation angle in radians (
+    \f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ).
+
+    @sa HoughLines
+     */
    virtual void detect(InputArray src, OutputArray lines) = 0;
+
+    /** @brief Downloads results from cuda::HoughLinesDetector::detect to host memory.
+
+    @param d_lines Result of cuda::HoughLinesDetector::detect .
+    @param h_lines Output host array.
+    @param h_votes Optional output array for line's votes.
+     */
    virtual void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray()) = 0;

    virtual void setRho(float rho) = 0;
@ -218,16 +405,35 @@ public:
    virtual int getMaxLines() const = 0;
 };

+/** @brief Creates implementation for cuda::HoughLinesDetector .
+
+@param rho Distance resolution of the accumulator in pixels.
+@param theta Angle resolution of the accumulator in radians.
+@param threshold Accumulator threshold parameter. Only those lines are returned that get enough
+votes ( \f$>\texttt{threshold}\f$ ).
+@param doSort Performs lines sort by votes.
+@param maxLines Maximum number of output lines.
+ */
 CV_EXPORTS Ptr<HoughLinesDetector> createHoughLinesDetector(float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);


 //////////////////////////////////////
 // HoughLinesP

-//! finds line segments in the black-n-white image using probabilistic Hough transform
+/** @brief Base class for line segments detector algorithm. :
+ */
 class CV_EXPORTS HoughSegmentDetector : public Algorithm
 {
 public:
+    /** @brief Finds line segments in a binary image using the probabilistic Hough transform.
+
+    @param src 8-bit, single-channel binary source image.
+    @param lines Output vector of lines. Each line is represented by a 4-element vector
+    \f$(x_1, y_1, x_2, y_2)\f$ , where \f$(x_1,y_1)\f$ and \f$(x_2, y_2)\f$ are the ending points of each detected
+    line segment.
+
+    @sa HoughLinesP
+     */
    virtual void detect(InputArray src, OutputArray lines) = 0;

    virtual void setRho(float rho) = 0;
@ -246,14 +452,32 @@ public:
    virtual int getMaxLines() const = 0;
 };

+/** @brief Creates implementation for cuda::HoughSegmentDetector .
+
+@param rho Distance resolution of the accumulator in pixels.
+@param theta Angle resolution of the accumulator in radians.
+@param minLineLength Minimum line length. Line segments shorter than that are rejected.
+@param maxLineGap Maximum allowed gap between points on the same line to link them.
+@param maxLines Maximum number of output lines.
+ */
 CV_EXPORTS Ptr<HoughSegmentDetector> createHoughSegmentDetector(float rho, float theta, int minLineLength, int maxLineGap, int maxLines = 4096);

 //////////////////////////////////////
 // HoughCircles

+/** @brief Base class for circles detector algorithm. :
+ */
 class CV_EXPORTS HoughCirclesDetector : public Algorithm
 {
 public:
+    /** @brief Finds circles in a grayscale image using the Hough transform.
+
+    @param src 8-bit, single-channel grayscale input image.
+    @param circles Output vector of found circles. Each vector is encoded as a 3-element
+    floating-point vector \f$(x, y, radius)\f$ .
+
+    @sa HoughCircles
+     */
    virtual void detect(InputArray src, OutputArray circles) = 0;

    virtual void setDp(float dp) = 0;
@ -278,85 +502,257 @@ public:
    virtual int getMaxCircles() const = 0;
 };

+/** @brief Creates implementation for cuda::HoughCirclesDetector .
+
+@param dp Inverse ratio of the accumulator resolution to the image resolution. For example, if
+dp=1 , the accumulator has the same resolution as the input image. If dp=2 , the accumulator has
+half as big width and height.
+@param minDist Minimum distance between the centers of the detected circles. If the parameter is
+too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is
+too large, some circles may be missed.
+@param cannyThreshold The higher threshold of the two passed to Canny edge detector (the lower one
+is twice smaller).
+@param votesThreshold The accumulator threshold for the circle centers at the detection stage. The
+smaller it is, the more false circles may be detected.
+@param minRadius Minimum circle radius.
+@param maxRadius Maximum circle radius.
+@param maxCircles Maximum number of output circles.
+ */
 CV_EXPORTS Ptr<HoughCirclesDetector> createHoughCirclesDetector(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);

 //////////////////////////////////////
 // GeneralizedHough

-//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122.
-//! Detects position only without traslation and rotation
+/** @brief Creates implementation for generalized hough transform from @cite Ballard1981 .
+ */
 CV_EXPORTS Ptr<GeneralizedHoughBallard> createGeneralizedHoughBallard();

-//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038.
-//! Detects position, traslation and rotation
+/** @brief Creates implementation for generalized hough transform from @cite Guil1999 .
+ */
 CV_EXPORTS Ptr<GeneralizedHoughGuil> createGeneralizedHoughGuil();

+//! @} cudaimgproc_hough
+
 ////////////////////////// Corners Detection ///////////////////////////

+//! @addtogroup cudaimgproc_feature
+//! @{
+
+/** @brief Base class for Cornerness Criteria computation. :
+ */
 class CV_EXPORTS CornernessCriteria : public Algorithm
 {
 public:
+    /** @brief Computes the cornerness criteria at each image pixel.
+
+    @param src Source image.
+    @param dst Destination image containing cornerness values. It will have the same size as src and
+    CV_32FC1 type.
+    @param stream Stream for the asynchronous version.
+     */
    virtual void compute(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0;
 };

-//! computes Harris cornerness criteria at each image pixel
+/** @brief Creates implementation for Harris cornerness criteria.
+
+@param srcType Input source type. Only CV_8UC1 and CV_32FC1 are supported for now.
+@param blockSize Neighborhood size.
+@param ksize Aperture parameter for the Sobel operator.
+@param k Harris detector free parameter.
+@param borderType Pixel extrapolation method. Only BORDER_REFLECT101 and BORDER_REPLICATE are
+supported for now.
+
+@sa cornerHarris
+ */
 CV_EXPORTS Ptr<CornernessCriteria> createHarrisCorner(int srcType, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);

-//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
+/** @brief Creates implementation for the minimum eigen value of a 2x2 derivative covariation matrix (the
+cornerness criteria).
+
+@param srcType Input source type. Only CV_8UC1 and CV_32FC1 are supported for now.
+@param blockSize Neighborhood size.
+@param ksize Aperture parameter for the Sobel operator.
+@param borderType Pixel extrapolation method. Only BORDER_REFLECT101 and BORDER_REPLICATE are
+supported for now.
+
+@sa cornerMinEigenVal
+ */
 CV_EXPORTS Ptr<CornernessCriteria> createMinEigenValCorner(int srcType, int blockSize, int ksize, int borderType = BORDER_REFLECT101);

 ////////////////////////// Corners Detection ///////////////////////////

+/** @brief Base class for Corners Detector. :
+ */
 class CV_EXPORTS CornersDetector : public Algorithm
 {
 public:
-    //! return 1 rows matrix with CV_32FC2 type
+    /** @brief Determines strong corners on an image.
+
+    @param image Input 8-bit or floating-point 32-bit, single-channel image.
+    @param corners Output vector of detected corners (1-row matrix with CV_32FC2 type with corners
+    positions).
+    @param mask Optional region of interest. If the image is not empty (it needs to have the type
+    CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected.
+     */
    virtual void detect(InputArray image, OutputArray corners, InputArray mask = noArray()) = 0;
 };

+/** @brief Creates implementation for cuda::CornersDetector .
+
+@param srcType Input source type. Only CV_8UC1 and CV_32FC1 are supported for now.
+@param maxCorners Maximum number of corners to return. If there are more corners than are found,
+the strongest of them is returned.
+@param qualityLevel Parameter characterizing the minimal accepted quality of image corners. The
+parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue
+(see cornerMinEigenVal ) or the Harris function response (see cornerHarris ). The corners with the
+quality measure less than the product are rejected. For example, if the best corner has the
+quality measure = 1500, and the qualityLevel=0.01 , then all the corners with the quality measure
+less than 15 are rejected.
+@param minDistance Minimum possible Euclidean distance between the returned corners.
+@param blockSize Size of an average block for computing a derivative covariation matrix over each
+pixel neighborhood. See cornerEigenValsAndVecs .
+@param useHarrisDetector Parameter indicating whether to use a Harris detector (see cornerHarris)
+or cornerMinEigenVal.
+@param harrisK Free parameter of the Harris detector.
+ */
 CV_EXPORTS Ptr<CornersDetector> createGoodFeaturesToTrackDetector(int srcType, int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
                                                                  int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);

+//! @} cudaimgproc_feature
+
 ///////////////////////////// Mean Shift //////////////////////////////

-//! Does mean shift filtering on GPU.
+/** @brief Performs mean-shift filtering for each point of the source image.
+
+@param src Source image. Only CV_8UC4 images are supported for now.
+@param dst Destination image containing the color of mapped points. It has the same size and type
+as src .
+@param sp Spatial window radius.
+@param sr Color window radius.
+@param criteria Termination criteria. See TermCriteria.
+@param stream
+
+It maps each point of the source image into another point. As a result, you have a new color and new
+position of each point.
+ */
 CV_EXPORTS void meanShiftFiltering(InputArray src, OutputArray dst, int sp, int sr,
                                   TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
                                   Stream& stream = Stream::Null());

-//! Does mean shift procedure on GPU.
+/** @brief Performs a mean-shift procedure and stores information about processed points (their colors and
+positions) in two images.
+
+@param src Source image. Only CV_8UC4 images are supported for now.
+@param dstr Destination image containing the color of mapped points. The size and type is the same
+as src .
+@param dstsp Destination image containing the position of mapped points. The size is the same as
+src size. The type is CV_16SC2 .
+@param sp Spatial window radius.
+@param sr Color window radius.
+@param criteria Termination criteria. See TermCriteria.
+@param stream
+
+@sa cuda::meanShiftFiltering
+ */
 CV_EXPORTS void meanShiftProc(InputArray src, OutputArray dstr, OutputArray dstsp, int sp, int sr,
                              TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
                              Stream& stream = Stream::Null());

-//! Does mean shift segmentation with elimination of small regions.
+/** @brief Performs a mean-shift segmentation of the source image and eliminates small segments.
+
+@param src Source image. Only CV_8UC4 images are supported for now.
+@param dst Segmented image with the same size and type as src (host memory).
+@param sp Spatial window radius.
+@param sr Color window radius.
+@param minsize Minimum segment size. Smaller segments are merged.
+@param criteria Termination criteria. See TermCriteria.
+ */
 CV_EXPORTS void meanShiftSegmentation(InputArray src, OutputArray dst, int sp, int sr, int minsize,
                                      TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));

 /////////////////////////// Match Template ////////////////////////////

-//! computes the proximity map for the raster template and the image where the template is searched for
+/** @brief Base class for Template Matching. :
+ */
 class CV_EXPORTS TemplateMatching : public Algorithm
 {
 public:
+    /** @brief Computes a proximity map for a raster template and an image where the template is searched for.
+
+    @param image Source image.
+    @param templ Template image with the size and type the same as image .
+    @param result Map containing comparison results ( CV_32FC1 ). If image is *W x H* and templ is *w
+    x h*, then result must be *W-w+1 x H-h+1*.
+    @param stream Stream for the asynchronous version.
+     */
    virtual void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null()) = 0;
 };

+/** @brief Creates implementation for cuda::TemplateMatching .
+
+@param srcType Input source type. CV_32F and CV_8U depth images (1..4 channels) are supported
+for now.
+@param method Specifies the way to compare the template with the image.
+@param user_block_size You can use field user_block_size to set specific block size. If you
+leave its default value Size(0,0) then automatic estimation of block size will be used (which is
+optimized for speed). By varying user_block_size you can reduce memory requirements at the cost
+of speed.
+
+The following methods are supported for the CV_8U depth images for now:
+
+-   CV_TM_SQDIFF
+-   CV_TM_SQDIFF_NORMED
+-   CV_TM_CCORR
+-   CV_TM_CCORR_NORMED
+-   CV_TM_CCOEFF
+-   CV_TM_CCOEFF_NORMED
+
+The following methods are supported for the CV_32F images for now:
+
+-   CV_TM_SQDIFF
+-   CV_TM_CCORR
+
+@sa matchTemplate
+ */
 CV_EXPORTS Ptr<TemplateMatching> createTemplateMatching(int srcType, int method, Size user_block_size = Size());

 ////////////////////////// Bilateral Filter ///////////////////////////

-//! Performa bilateral filtering of passsed image
+/** @brief Performs bilateral filtering of passed image
+
+@param src Source image. Supports only (channles != 2 && depth() != CV_8S && depth() != CV_32S
+&& depth() != CV_64F).
+@param dst Destination imagwe.
+@param kernel_size Kernel window size.
+@param sigma_color Filter sigma in the color space.
+@param sigma_spatial Filter sigma in the coordinate space.
+@param borderMode Border type. See borderInterpolate for details. BORDER_REFLECT101 ,
+BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now.
+@param stream Stream for the asynchronous version.
+
+@sa bilateralFilter
+ */
 CV_EXPORTS void bilateralFilter(InputArray src, OutputArray dst, int kernel_size, float sigma_color, float sigma_spatial,
                                int borderMode = BORDER_DEFAULT, Stream& stream = Stream::Null());

 ///////////////////////////// Blending ////////////////////////////////

-//! performs linear blending of two images
-//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
+/** @brief Performs linear blending of two images.
+
+@param img1 First image. Supports only CV_8U and CV_32F depth.
+@param img2 Second image. Must have the same size and the same type as img1 .
+@param weights1 Weights for first image. Must have tha same size as img1 . Supports only CV_32F
+type.
+@param weights2 Weights for second image. Must have tha same size as img2 . Supports only CV_32F
+type.
+@param result Destination image.
+@param stream Stream for the asynchronous version.
+ */
 CV_EXPORTS void blendLinear(InputArray img1, InputArray img2, InputArray weights1, InputArray weights2,
                            OutputArray result, Stream& stream = Stream::Null());

+//! @}
+
 }} // namespace cv { namespace cuda {

 #endif /* __OPENCV_CUDAIMGPROC_HPP__ */
--- a/modules/cudalegacy/include/opencv2/cudalegacy.hpp
+++ b/modules/cudalegacy/include/opencv2/cudalegacy.hpp
@ -49,4 +49,11 @@
 #include "opencv2/cudalegacy/NCVHaarObjectDetection.hpp"
 #include "opencv2/cudalegacy/NCVBroxOpticalFlow.hpp"

+/**
+  @addtogroup cuda
+  @{
+    @defgroup cudalegacy Legacy support
+  @}
+*/
+
 #endif /* __OPENCV_CUDALEGACY_HPP__ */
--- a/modules/cudalegacy/include/opencv2/cudalegacy/NCV.hpp
+++ b/modules/cudalegacy/include/opencv2/cudalegacy/NCV.hpp
@ -60,6 +60,8 @@
 //
 //==============================================================================

+//! @addtogroup cudalegacy
+//! @{

 /**
 * Compile-time assert namespace
@ -203,6 +205,7 @@ struct NcvPoint2D32u
    __host__ __device__ NcvPoint2D32u(Ncv32u x_, Ncv32u y_) : x(x_), y(y_) {}
 };

+//! @cond IGNORED

 NCV_CT_ASSERT(sizeof(NcvBool) <= 4);
 NCV_CT_ASSERT(sizeof(Ncv64s) == 8);
@ -221,6 +224,7 @@ NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
 NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
 NCV_CT_ASSERT(sizeof(NcvPoint2D32u) == 2 * sizeof(Ncv32u));

+//! @endcond

 //==============================================================================
 //
@ -1023,6 +1027,6 @@ CV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Nc
    NCVMatrixAlloc<type> name(alloc, width, height); \
    ncvAssertReturn(name.isMemAllocated(), err);

-
+//! @}

 #endif // _ncv_hpp_
--- a/modules/cudalegacy/include/opencv2/cudalegacy/NCVBroxOpticalFlow.hpp
+++ b/modules/cudalegacy/include/opencv2/cudalegacy/NCVBroxOpticalFlow.hpp
@ -62,6 +62,9 @@

 #include "opencv2/cudalegacy/NCV.hpp"

+//! @addtogroup cudalegacy
+//! @{
+
 /// \brief Model and solver parameters
 struct NCVBroxOpticalFlowDescriptor
 {
@ -89,6 +92,7 @@ struct NCVBroxOpticalFlowDescriptor
 /// \param [in]  frame1            frame to track
 /// \param [out] u                 flow horizontal component (along \b x axis)
 /// \param [out] v                 flow vertical component (along \b y axis)
+/// \param       stream
 /// \return                        computation status
 /////////////////////////////////////////////////////////////////////////////////////////

@ -101,4 +105,6 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
                             NCVMatrix<Ncv32f> &v,
                             cudaStream_t stream);

+//! @}
+
 #endif
--- a/modules/cudalegacy/include/opencv2/cudalegacy/NCVHaarObjectDetection.hpp
+++ b/modules/cudalegacy/include/opencv2/cudalegacy/NCVHaarObjectDetection.hpp
@ -61,6 +61,8 @@

 #include "opencv2/cudalegacy/NCV.hpp"

+//! @addtogroup cudalegacy
+//! @{

 //==============================================================================
 //
@ -456,6 +458,6 @@ CV_EXPORTS NCVStatus ncvHaarStoreNVBIN_host(const cv::String &filename,
                                             NCVVector<HaarClassifierNode128> &h_HaarNodes,
                                             NCVVector<HaarFeature64> &h_HaarFeatures);

-
+//! @}

 #endif // _ncvhaarobjectdetection_hpp_
--- a/modules/cudalegacy/include/opencv2/cudalegacy/NCVPyramid.hpp
+++ b/modules/cudalegacy/include/opencv2/cudalegacy/NCVPyramid.hpp
@ -48,6 +48,8 @@
 #include "opencv2/cudalegacy/NCV.hpp"
 #include "opencv2/core/cuda/common.hpp"

+//! @cond IGNORED
+
 namespace cv { namespace cuda { namespace device
 {
    namespace pyramid
@ -106,4 +108,6 @@ private:

 #endif //_WIN32

+//! @endcond
+
 #endif //_ncvpyramid_hpp_
--- a/modules/cudalegacy/include/opencv2/cudalegacy/NPP_staging.hpp
+++ b/modules/cudalegacy/include/opencv2/cudalegacy/NPP_staging.hpp
@ -45,19 +45,14 @@

 #include "opencv2/cudalegacy/NCV.hpp"

-
-/**
-* \file NPP_staging.hpp
-* NPP Staging Library
-*/
-
+//! @addtogroup cudalegacy
+//! @{

 /** \defgroup core_npp NPPST Core
 * Basic functions for CUDA streams management.
 * @{
 */

-
 /**
 * Gets an active CUDA stream used by NPPST
 * NOT THREAD SAFE
@ -168,6 +163,7 @@ NCVStatus nppiStInterpolateFrames(const NppStInterpolationState *pState);
 * \param nSrcStep          [IN]  Source image line step
 * \param pDst              [OUT] Destination image pointer (CUDA device memory)
 * \param dstSize           [OUT] Destination image size
+ * \param nDstStep
 * \param oROI              [IN]  Region of interest in the source image
 * \param borderType        [IN]  Type of border
 * \param pKernel           [IN]  Pointer to row kernel values (CUDA device memory)
@ -201,6 +197,7 @@ NCVStatus nppiStFilterRowBorder_32f_C1R(const Ncv32f *pSrc,
 * \param nSrcStep          [IN]  Source image line step
 * \param pDst              [OUT] Destination image pointer (CUDA device memory)
 * \param dstSize           [OUT] Destination image size
+ * \param nDstStep          [IN]
 * \param oROI              [IN]  Region of interest in the source image
 * \param borderType        [IN]  Type of border
 * \param pKernel           [IN]  Pointer to column kernel values (CUDA device memory)
@ -228,7 +225,7 @@ NCVStatus nppiStFilterColumnBorder_32f_C1R(const Ncv32f *pSrc,
 /** Size of buffer required for vector image warping.
 *
 * \param srcSize           [IN]  Source image size
- * \param nStep             [IN]  Source image line step
+ * \param nSrcStep          [IN]  Source image line step
 * \param hpSize            [OUT] Where to store computed size (host memory)
 *
 * \return NCV status code
@ -285,6 +282,7 @@ NCVStatus nppiStVectorWarp_PSF1x1_32f_C1(const Ncv32f *pSrc,
 * \param pU                [IN]  Pointer to horizontal displacement field (CUDA device memory)
 * \param pV                [IN]  Pointer to vertical displacement field (CUDA device memory)
 * \param nVFStep           [IN]  Displacement field line step
+ * \param pBuffer
 * \param timeScale         [IN]  Value by which displacement field will be scaled for warping
 * \param pDst              [OUT] Destination image pointer (CUDA device memory)
 *
@ -903,5 +901,6 @@ NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen,

 /*@}*/

+//! @}

 #endif // _npp_staging_hpp_
--- a/modules/cudalegacy/include/opencv2/cudalegacy/private.hpp
+++ b/modules/cudalegacy/include/opencv2/cudalegacy/private.hpp
@ -56,6 +56,8 @@

 #include "opencv2/cudalegacy.hpp"

+//! @cond IGNORED
+
 namespace cv { namespace cuda
 {
    class NppStStreamHandler
@ -89,4 +91,6 @@ namespace cv { namespace cuda

 #define ncvSafeCall(expr)  cv::cuda::checkNcvError(expr, __FILE__, __LINE__, CV_Func)

+//! @endcond
+
 #endif // __OPENCV_CORE_CUDALEGACY_PRIVATE_HPP__
--- a/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp
+++ b/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp
@ -49,8 +49,21 @@

 #include "opencv2/core/cuda.hpp"

+/**
+  @addtogroup cuda
+  @{
+    @defgroup cudaoptflow Optical Flow
+  @}
+ */
+
 namespace cv { namespace cuda {

+//! @addtogroup cudaoptflow
+//! @{
+
+/** @brief Class computing the optical flow for two images using Brox et al Optical Flow algorithm
+(@cite Brox2004). :
+ */
 class CV_EXPORTS BroxOpticalFlow
 {
 public:
@ -88,16 +101,58 @@ public:
    GpuMat buf;
 };

+/** @brief Class used for calculating an optical flow.
+
+The class can calculate an optical flow for a sparse feature set or dense optical flow using the
+iterative Lucas-Kanade method with pyramids.
+
+@sa calcOpticalFlowPyrLK
+
+@note
+   -   An example of the Lucas Kanade optical flow algorithm can be found at
+        opencv_source_code/samples/gpu/pyrlk_optical_flow.cpp
+ */
 class CV_EXPORTS PyrLKOpticalFlow
 {
 public:
    PyrLKOpticalFlow();

+    /** @brief Calculate an optical flow for a sparse feature set.
+
+    @param prevImg First 8-bit input image (supports both grayscale and color images).
+    @param nextImg Second input image of the same size and the same type as prevImg .
+    @param prevPts Vector of 2D points for which the flow needs to be found. It must be one row matrix
+    with CV_32FC2 type.
+    @param nextPts Output vector of 2D points (with single-precision floating-point coordinates)
+    containing the calculated new positions of input features in the second image. When useInitialFlow
+    is true, the vector must have the same size as in the input.
+    @param status Output status vector (CV_8UC1 type). Each element of the vector is set to 1 if the
+    flow for the corresponding features has been found. Otherwise, it is set to 0.
+    @param err Output vector (CV_32FC1 type) that contains the difference between patches around the
+    original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not
+    needed.
+
+    @sa calcOpticalFlowPyrLK
+     */
    void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
        GpuMat& status, GpuMat* err = 0);

+    /** @brief Calculate dense optical flow.
+
+    @param prevImg First 8-bit grayscale input image.
+    @param nextImg Second input image of the same size and the same type as prevImg .
+    @param u Horizontal component of the optical flow of the same size as input images, 32-bit
+    floating-point, single-channel
+    @param v Vertical component of the optical flow of the same size as input images, 32-bit
+    floating-point, single-channel
+    @param err Output vector (CV_32FC1 type) that contains the difference between patches around the
+    original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not
+    needed.
+     */
    void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);

+    /** @brief Releases inner buffers memory.
+    */
    void releaseMemory();

    Size winSize;
@ -115,6 +170,8 @@ private:
    GpuMat vPyr_[2];
 };

+/** @brief Class computing a dense optical flow using the Gunnar Farneback’s algorithm. :
+ */
 class CV_EXPORTS FarnebackOpticalFlow
 {
 public:
@ -139,8 +196,20 @@ public:
    double polySigma;
    int flags;

+    /** @brief Computes a dense optical flow using the Gunnar Farneback’s algorithm.
+
+    @param frame0 First 8-bit gray-scale input image
+    @param frame1 Second 8-bit gray-scale input image
+    @param flowx Flow horizontal component
+    @param flowy Flow vertical component
+    @param s Stream
+
+    @sa calcOpticalFlowFarneback
+     */
    void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());

+    /** @brief Releases unused auxiliary memory buffers.
+     */
    void releaseMemory()
    {
        frames_[0].release();
@ -295,20 +364,22 @@ private:
    GpuMat extended_I1;
 };

-//! Interpolate frames (images) using provided optical flow (displacement field).
-//! frame0   - frame 0 (32-bit floating point images, single channel)
-//! frame1   - frame 1 (the same type and size)
-//! fu       - forward horizontal displacement
-//! fv       - forward vertical displacement
-//! bu       - backward horizontal displacement
-//! bv       - backward vertical displacement
-//! pos      - new frame position
-//! newFrame - new frame
-//! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 GpuMat;
-//!            occlusion masks            0, occlusion masks            1,
-//!            interpolated forward flow  0, interpolated forward flow  1,
-//!            interpolated backward flow 0, interpolated backward flow 1
-//!
+/** @brief Interpolates frames (images) using provided optical flow (displacement field).
+
+@param frame0 First frame (32-bit floating point images, single channel).
+@param frame1 Second frame. Must have the same type and size as frame0 .
+@param fu Forward horizontal displacement.
+@param fv Forward vertical displacement.
+@param bu Backward horizontal displacement.
+@param bv Backward vertical displacement.
+@param pos New frame position.
+@param newFrame Output image.
+@param buf Temporary buffer, will have width x 6\*height size, CV_32FC1 type and contain 6
+GpuMat: occlusion masks for first frame, occlusion masks for second, interpolated forward
+horizontal flow, interpolated forward vertical flow, interpolated backward horizontal flow,
+interpolated backward vertical flow.
+@param stream Stream for the asynchronous version.
+ */
 CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1,
                                  const GpuMat& fu, const GpuMat& fv,
                                  const GpuMat& bu, const GpuMat& bv,
@ -317,6 +388,8 @@ CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1,

 CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors);

+//! @}
+
 }} // namespace cv { namespace cuda {

 #endif /* __OPENCV_CUDAOPTFLOW_HPP__ */
--- a/modules/cudastereo/include/opencv2/cudastereo.hpp
+++ b/modules/cudastereo/include/opencv2/cudastereo.hpp
@ -50,11 +50,25 @@
 #include "opencv2/core/cuda.hpp"
 #include "opencv2/calib3d.hpp"

+/**
+  @addtogroup cuda
+  @{
+    @defgroup cudastereo Stereo Correspondence
+  @}
+ */
+
 namespace cv { namespace cuda {

+//! @addtogroup cudastereo
+//! @{
+
 /////////////////////////////////////////
 // StereoBM

+/** @brief Class computing stereo correspondence (disparity map) using the block matching algorithm. :
+
+@sa StereoBM
+ */
 class CV_EXPORTS StereoBM : public cv::StereoBM
 {
 public:
@ -63,20 +77,70 @@ public:
    virtual void compute(InputArray left, InputArray right, OutputArray disparity, Stream& stream) = 0;
 };

+/** @brief Creates StereoBM object.
+
+@param numDisparities the disparity search range. For each pixel algorithm will find the best
+disparity from 0 (default minimum disparity) to numDisparities. The search range can then be
+shifted by changing the minimum disparity.
+@param blockSize the linear size of the blocks compared by the algorithm. The size should be odd
+(as the block is centered at the current pixel). Larger block size implies smoother, though less
+accurate disparity map. Smaller block size gives more detailed disparity map, but there is higher
+chance for algorithm to find a wrong correspondence.
+ */
 CV_EXPORTS Ptr<cuda::StereoBM> createStereoBM(int numDisparities = 64, int blockSize = 19);

 /////////////////////////////////////////
 // StereoBeliefPropagation

-//! "Efficient Belief Propagation for Early Vision" P.Felzenszwalb
+/** @brief Class computing stereo correspondence using the belief propagation algorithm. :
+
+The class implements algorithm described in @cite Felzenszwalb2006 . It can compute own data cost
+(using a truncated linear model) or use a user-provided data cost.
+
+@note
+   StereoBeliefPropagation requires a lot of memory for message storage:
+
+    \f[width \_ step  \cdot height  \cdot ndisp  \cdot 4  \cdot (1 + 0.25)\f]
+
+    and for data cost storage:
+
+    \f[width\_step \cdot height \cdot ndisp \cdot (1 + 0.25 + 0.0625 +  \dotsm + \frac{1}{4^{levels}})\f]
+
+    width_step is the number of bytes in a line including padding.
+
+StereoBeliefPropagation uses a truncated linear model for the data cost and discontinuity terms:
+
+\f[DataCost = data \_ weight  \cdot \min ( \lvert Img_Left(x,y)-Img_Right(x-d,y)  \rvert , max \_ data \_ term)\f]
+
+\f[DiscTerm =  \min (disc \_ single \_ jump  \cdot \lvert f_1-f_2  \rvert , max \_ disc \_ term)\f]
+
+For more details, see @cite Felzenszwalb2006 .
+
+By default, StereoBeliefPropagation uses floating-point arithmetics and the CV_32FC1 type for
+messages. But it can also use fixed-point arithmetics and the CV_16SC1 message type for better
+performance. To avoid an overflow in this case, the parameters must satisfy the following
+requirement:
+
+\f[10  \cdot 2^{levels-1}  \cdot max \_ data \_ term < SHRT \_ MAX\f]
+
+@sa StereoMatcher
+ */
 class CV_EXPORTS StereoBeliefPropagation : public cv::StereoMatcher
 {
 public:
    using cv::StereoMatcher::compute;

+    /** @overload */
    virtual void compute(InputArray left, InputArray right, OutputArray disparity, Stream& stream) = 0;

-    //! version for user specified data term
+    /** @brief Enables the stereo correspondence operator that finds the disparity for the specified data cost.
+
+    @param data User-specified data cost, a matrix of msg_type type and
+    Size(\<image columns\>\*ndisp, \<image rows\>) size.
+    @param disparity Output disparity map. If disparity is empty, the output type is CV_16SC1 .
+    Otherwise, the type is retained.
+    @param stream Stream for the asynchronous version.
+     */
    virtual void compute(InputArray data, OutputArray disparity, Stream& stream = Stream::Null()) = 0;

    //! number of BP iterations on each level
@ -107,18 +171,48 @@ public:
    virtual int getMsgType() const = 0;
    virtual void setMsgType(int msg_type) = 0;

+    /** @brief Uses a heuristic method to compute the recommended parameters ( ndisp, iters and levels ) for the
+    specified image size ( width and height ).
+     */
    static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels);
 };

+/** @brief Creates StereoBeliefPropagation object.
+
+@param ndisp Number of disparities.
+@param iters Number of BP iterations on each level.
+@param levels Number of levels.
+@param msg_type Type for messages. CV_16SC1 and CV_32FC1 types are supported.
+ */
 CV_EXPORTS Ptr<cuda::StereoBeliefPropagation>
    createStereoBeliefPropagation(int ndisp = 64, int iters = 5, int levels = 5, int msg_type = CV_32F);

 /////////////////////////////////////////
 // StereoConstantSpaceBP

-//! "A Constant-Space Belief Propagation Algorithm for Stereo Matching"
-//! Qingxiong Yang, Liang Wang, Narendra Ahuja
-//! http://vision.ai.uiuc.edu/~qyang6/
+/** @brief Class computing stereo correspondence using the constant space belief propagation algorithm. :
+
+The class implements algorithm described in @cite Yang2010 . StereoConstantSpaceBP supports both local
+minimum and global minimum data cost initialization algorithms. For more details, see the paper
+mentioned above. By default, a local algorithm is used. To enable a global algorithm, set
+use_local_init_data_cost to false .
+
+StereoConstantSpaceBP uses a truncated linear model for the data cost and discontinuity terms:
+
+\f[DataCost = data \_ weight  \cdot \min ( \lvert I_2-I_1  \rvert , max \_ data \_ term)\f]
+
+\f[DiscTerm =  \min (disc \_ single \_ jump  \cdot \lvert f_1-f_2  \rvert , max \_ disc \_ term)\f]
+
+For more details, see @cite Yang2010 .
+
+By default, StereoConstantSpaceBP uses floating-point arithmetics and the CV_32FC1 type for
+messages. But it can also use fixed-point arithmetics and the CV_16SC1 message type for better
+performance. To avoid an overflow in this case, the parameters must satisfy the following
+requirement:
+
+\f[10  \cdot 2^{levels-1}  \cdot max \_ data \_ term < SHRT \_ MAX\f]
+
+ */
 class CV_EXPORTS StereoConstantSpaceBP : public cuda::StereoBeliefPropagation
 {
 public:
@ -129,23 +223,40 @@ public:
    virtual bool getUseLocalInitDataCost() const = 0;
    virtual void setUseLocalInitDataCost(bool use_local_init_data_cost) = 0;

+    /** @brief Uses a heuristic method to compute parameters (ndisp, iters, levelsand nrplane) for the specified
+    image size (widthand height).
+     */
    static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane);
 };

+/** @brief Creates StereoConstantSpaceBP object.
+
+@param ndisp Number of disparities.
+@param iters Number of BP iterations on each level.
+@param levels Number of levels.
+@param nr_plane Number of disparity levels on the first level.
+@param msg_type Type for messages. CV_16SC1 and CV_32FC1 types are supported.
+ */
 CV_EXPORTS Ptr<cuda::StereoConstantSpaceBP>
    createStereoConstantSpaceBP(int ndisp = 128, int iters = 8, int levels = 4, int nr_plane = 4, int msg_type = CV_32F);

 /////////////////////////////////////////
 // DisparityBilateralFilter

-//! Disparity map refinement using joint bilateral filtering given a single color image.
-//! Qingxiong Yang, Liang Wang, Narendra Ahuja
-//! http://vision.ai.uiuc.edu/~qyang6/
+/** @brief Class refining a disparity map using joint bilateral filtering. :
+
+The class implements @cite Yang2010 algorithm.
+ */
 class CV_EXPORTS DisparityBilateralFilter : public cv::Algorithm
 {
 public:
-    //! the disparity map refinement operator. Refine disparity map using joint bilateral filtering given a single color image.
-    //! disparity must have CV_8U or CV_16S type, image must have CV_8UC1 or CV_8UC3 type.
+    /** @brief Refines a disparity map using joint bilateral filtering.
+
+    @param disparity Input disparity map. CV_8UC1 and CV_16SC1 types are supported.
+    @param image Input image. CV_8UC1 and CV_8UC3 types are supported.
+    @param dst Destination disparity map. It has the same size and type as disparity .
+    @param stream Stream for the asynchronous version.
+     */
    virtual void apply(InputArray disparity, InputArray image, OutputArray dst, Stream& stream = Stream::Null()) = 0;

    virtual int getNumDisparities() const = 0;
@ -170,24 +281,48 @@ public:
    virtual void setSigmaRange(double sigma_range) = 0;
 };

+/** @brief Creates DisparityBilateralFilter object.
+
+@param ndisp Number of disparities.
+@param radius Filter radius.
+@param iters Number of iterations.
+ */
 CV_EXPORTS Ptr<cuda::DisparityBilateralFilter>
    createDisparityBilateralFilter(int ndisp = 64, int radius = 3, int iters = 1);

 /////////////////////////////////////////
 // Utility

-//! Reprojects disparity image to 3D space.
-//! Supports CV_8U and CV_16S types of input disparity.
-//! The output is a 3- or 4-channel floating-point matrix.
-//! Each element of this matrix will contain the 3D coordinates of the point (x,y,z,1), computed from the disparity map.
-//! Q is the 4x4 perspective transformation matrix that can be obtained with cvStereoRectify.
+/** @brief Reprojects a disparity image to 3D space.
+
+@param disp Input disparity image. CV_8U and CV_16S types are supported.
+@param xyzw Output 3- or 4-channel floating-point image of the same size as disp . Each element of
+xyzw(x,y) contains 3D coordinates (x,y,z) or (x,y,z,1) of the point (x,y) , computed from the
+disparity map.
+@param Q \f$4 \times 4\f$ perspective transformation matrix that can be obtained via stereoRectify .
+@param dst_cn The number of channels for output image. Can be 3 or 4.
+@param stream Stream for the asynchronous version.
+
+@sa reprojectImageTo3D
+ */
 CV_EXPORTS void reprojectImageTo3D(InputArray disp, OutputArray xyzw, InputArray Q, int dst_cn = 4, Stream& stream = Stream::Null());

-//! Does coloring of disparity image: [0..ndisp) -> [0..240, 1, 1] in HSV.
-//! Supported types of input disparity: CV_8U, CV_16S.
-//! Output disparity has CV_8UC4 type in BGRA format (alpha = 255).
+/** @brief Colors a disparity image.
+
+@param src_disp Source disparity image. CV_8UC1 and CV_16SC1 types are supported.
+@param dst_disp Output disparity image. It has the same size as src_disp . The type is CV_8UC4
+in BGRA format (alpha = 255).
+@param ndisp Number of disparities.
+@param stream Stream for the asynchronous version.
+
+This function draws a colored disparity map by converting disparity values from [0..ndisp) interval
+first to HSV color space (where different disparity values correspond to different hues) and then
+converting the pixels to RGB for visualization.
+ */
 CV_EXPORTS void drawColorDisp(InputArray src_disp, OutputArray dst_disp, int ndisp, Stream& stream = Stream::Null());

+//! @}
+
 }} // namespace cv { namespace cuda {

 #endif /* __OPENCV_CUDASTEREO_HPP__ */
--- a/modules/cudawarping/include/opencv2/cudawarping.hpp
+++ b/modules/cudawarping/include/opencv2/cudawarping.hpp
@ -50,54 +50,178 @@
 #include "opencv2/core/cuda.hpp"
 #include "opencv2/imgproc.hpp"

+/**
+  @addtogroup cuda
+  @{
+    @defgroup cudawarping Image Warping
+  @}
+ */
+
 namespace cv { namespace cuda {

-//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]]
-//! supports only CV_32FC1 map type
+//! @addtogroup cudawarping
+//! @{
+
+/** @brief Applies a generic geometrical transformation to an image.
+
+@param src Source image.
+@param dst Destination image with the size the same as xmap and the type the same as src .
+@param xmap X values. Only CV_32FC1 type is supported.
+@param ymap Y values. Only CV_32FC1 type is supported.
+@param interpolation Interpolation method (see resize ). INTER_NEAREST , INTER_LINEAR and
+INTER_CUBIC are supported for now.
+@param borderMode Pixel extrapolation method (see borderInterpolate ). BORDER_REFLECT101 ,
+BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now.
+@param borderValue Value used in case of a constant border. By default, it is 0.
+@param stream Stream for the asynchronous version.
+
+The function transforms the source image using the specified map:
+
+\f[\texttt{dst} (x,y) =  \texttt{src} (xmap(x,y), ymap(x,y))\f]
+
+Values of pixels with non-integer coordinates are computed using the bilinear interpolation.
+
+@sa remap
+ */
 CV_EXPORTS void remap(InputArray src, OutputArray dst, InputArray xmap, InputArray ymap,
                      int interpolation, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(),
                      Stream& stream = Stream::Null());

-//! resizes the image
-//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA
+/** @brief Resizes an image.
+
+@param src Source image.
+@param dst Destination image with the same type as src . The size is dsize (when it is non-zero)
+or the size is computed from src.size() , fx , and fy .
+@param dsize Destination image size. If it is zero, it is computed as:
+\f[\texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))}\f]
+Either dsize or both fx and fy must be non-zero.
+@param fx Scale factor along the horizontal axis. If it is zero, it is computed as:
+\f[\texttt{(double)dsize.width/src.cols}\f]
+@param fy Scale factor along the vertical axis. If it is zero, it is computed as:
+\f[\texttt{(double)dsize.height/src.rows}\f]
+@param interpolation Interpolation method. INTER_NEAREST , INTER_LINEAR and INTER_CUBIC are
+supported for now.
+@param stream Stream for the asynchronous version.
+
+@sa resize
+ */
 CV_EXPORTS void resize(InputArray src, OutputArray dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());

-//! warps the image using affine transformation
-//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+/** @brief Applies an affine transformation to an image.
+
+@param src Source image. CV_8U , CV_16U , CV_32S , or CV_32F depth and 1, 3, or 4 channels are
+supported.
+@param dst Destination image with the same type as src . The size is dsize .
+@param M *2x3* transformation matrix.
+@param dsize Size of the destination image.
+@param flags Combination of interpolation methods (see resize) and the optional flag
+WARP_INVERSE_MAP specifying that M is an inverse transformation ( dst=\>src ). Only
+INTER_NEAREST , INTER_LINEAR , and INTER_CUBIC interpolation methods are supported.
+@param borderMode
+@param borderValue
+@param stream Stream for the asynchronous version.
+
+@sa warpAffine
+ */
 CV_EXPORTS void warpAffine(InputArray src, OutputArray dst, InputArray M, Size dsize, int flags = INTER_LINEAR,
    int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());

+/** @brief Builds transformation maps for affine transformation.
+
+@param M *2x3* transformation matrix.
+@param inverse Flag specifying that M is an inverse transformation ( dst=\>src ).
+@param dsize Size of the destination image.
+@param xmap X values with CV_32FC1 type.
+@param ymap Y values with CV_32FC1 type.
+@param stream Stream for the asynchronous version.
+
+@sa cuda::warpAffine , cuda::remap
+ */
 CV_EXPORTS void buildWarpAffineMaps(InputArray M, bool inverse, Size dsize, OutputArray xmap, OutputArray ymap, Stream& stream = Stream::Null());

-//! warps the image using perspective transformation
-//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+/** @brief Applies a perspective transformation to an image.
+
+@param src Source image. CV_8U , CV_16U , CV_32S , or CV_32F depth and 1, 3, or 4 channels are
+supported.
+@param dst Destination image with the same type as src . The size is dsize .
+@param M *3x3* transformation matrix.
+@param dsize Size of the destination image.
+@param flags Combination of interpolation methods (see resize ) and the optional flag
+WARP_INVERSE_MAP specifying that M is the inverse transformation ( dst =\> src ). Only
+INTER_NEAREST , INTER_LINEAR , and INTER_CUBIC interpolation methods are supported.
+@param borderMode
+@param borderValue
+@param stream Stream for the asynchronous version.
+
+@sa warpPerspective
+ */
 CV_EXPORTS void warpPerspective(InputArray src, OutputArray dst, InputArray M, Size dsize, int flags = INTER_LINEAR,
    int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());

+/** @brief Builds transformation maps for perspective transformation.
+
+@param M *3x3* transformation matrix.
+@param inverse Flag specifying that M is an inverse transformation ( dst=\>src ).
+@param dsize Size of the destination image.
+@param xmap X values with CV_32FC1 type.
+@param ymap Y values with CV_32FC1 type.
+@param stream Stream for the asynchronous version.
+
+@sa cuda::warpPerspective , cuda::remap
+ */
 CV_EXPORTS void buildWarpPerspectiveMaps(InputArray M, bool inverse, Size dsize, OutputArray xmap, OutputArray ymap, Stream& stream = Stream::Null());

-//! builds plane warping maps
+/** @brief Builds plane warping maps.
+ */
 CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, InputArray T, float scale,
                                   OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null());

-//! builds cylindrical warping maps
+/** @brief Builds cylindrical warping maps.
+ */
 CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, float scale,
                                         OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null());

-//! builds spherical warping maps
+/** @brief Builds spherical warping maps.
+ */
 CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, float scale,
                                       OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null());

-//! rotates an image around the origin (0,0) and then shifts it
-//! supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
-//! supports 1, 3 or 4 channels images with CV_8U, CV_16U or CV_32F depth
+/** @brief Rotates an image around the origin (0,0) and then shifts it.
+
+@param src Source image. Supports 1, 3 or 4 channels images with CV_8U , CV_16U or CV_32F
+depth.
+@param dst Destination image with the same type as src . The size is dsize .
+@param dsize Size of the destination image.
+@param angle Angle of rotation in degrees.
+@param xShift Shift along the horizontal axis.
+@param yShift Shift along the vertical axis.
+@param interpolation Interpolation method. Only INTER_NEAREST , INTER_LINEAR , and INTER_CUBIC
+are supported.
+@param stream Stream for the asynchronous version.
+
+@sa cuda::warpAffine
+ */
 CV_EXPORTS void rotate(InputArray src, OutputArray dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
                       int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());

-//! smoothes the source image and downsamples it
+/** @brief Smoothes an image and downsamples it.
+
+@param src Source image.
+@param dst Destination image. Will have Size((src.cols+1)/2, (src.rows+1)/2) size and the same
+type as src .
+@param stream Stream for the asynchronous version.
+
+@sa pyrDown
+ */
 CV_EXPORTS void pyrDown(InputArray src, OutputArray dst, Stream& stream = Stream::Null());

-//! upsamples the source image and then smoothes it
+/** @brief Upsamples an image and then smoothes it.
+
+@param src Source image.
+@param dst Destination image. Will have Size(src.cols\*2, src.rows\*2) size and the same type as
+src .
+@param stream Stream for the asynchronous version.
+ */
 CV_EXPORTS void pyrUp(InputArray src, OutputArray dst, Stream& stream = Stream::Null());

 class CV_EXPORTS ImagePyramid : public Algorithm
@ -108,6 +232,8 @@ public:

 CV_EXPORTS Ptr<ImagePyramid> createImagePyramid(InputArray img, int nLayers = -1, Stream& stream = Stream::Null());

+//! @}
+
 }} // namespace cv { namespace cuda {

 #endif /* __OPENCV_CUDAWARPING_HPP__ */
--- a/modules/cudev/include/opencv2/cudev.hpp
+++ b/modules/cudev/include/opencv2/cudev.hpp
@ -109,4 +109,11 @@
 #include "cudev/expr/unary_op.hpp"
 #include "cudev/expr/warping.hpp"

+/**
+  @addtogroup cuda
+  @{
+    @defgroup cudev Device layer
+  @}
+*/
+
 #endif
--- a/modules/cudev/include/opencv2/cudev/block/block.hpp
+++ b/modules/cudev/include/opencv2/cudev/block/block.hpp
@ -50,6 +50,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 struct Block
 {
    __device__ __forceinline__ static uint blockId()
@ -122,6 +125,9 @@ __device__ __forceinline__ static void blockTransfrom(InIt1 beg1, InIt1 end1, In
    for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
        *o = op(*t1, *t2);
 }
+
+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/block/dynamic_smem.hpp
+++ b/modules/cudev/include/opencv2/cudev/block/dynamic_smem.hpp
@ -50,6 +50,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class T> struct DynamicSharedMem
 {
    __device__ __forceinline__ operator T*()
@ -81,6 +84,8 @@ template <> struct DynamicSharedMem<double>
    }
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/block/reduce.hpp
+++ b/modules/cudev/include/opencv2/cudev/block/reduce.hpp
@ -54,6 +54,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // blockReduce

 template <int N, typename T, class Op>
@ -123,6 +126,8 @@ __device__ __forceinline__ void blockReduceKeyVal(const tuple<KP0, KP1, KP2, KP3
            >(skeys, key, svals, val, tid, cmp);
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/block/scan.hpp
+++ b/modules/cudev/include/opencv2/cudev/block/scan.hpp
@ -51,6 +51,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <int THREADS_NUM, typename T>
 __device__ T blockScanInclusive(T data, volatile T* smem, uint tid)
 {
@ -96,6 +99,8 @@ __device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint t
    return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data;
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/block/vec_distance.hpp
+++ b/modules/cudev/include/opencv2/cudev/block/vec_distance.hpp
@ -53,6 +53,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // NormL1

 template <typename T> struct NormL1
@ -179,6 +182,8 @@ struct NormHamming
    }
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/common.hpp
+++ b/modules/cudev/include/opencv2/cudev/common.hpp
@ -52,6 +52,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 using namespace cv::cuda;

 // CV_CUDEV_ARCH
@ -84,6 +87,8 @@ __host__ __device__ __forceinline__ int divUp(int total, int grain)
 #define CV_PI_F   ((float)CV_PI)
 #define CV_LOG2_F ((float)CV_LOG2)

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/expr/binary_func.hpp
+++ b/modules/cudev/include/opencv2/cudev/expr/binary_func.hpp
@ -55,6 +55,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 #define CV_CUDEV_EXPR_BINARY_FUNC(name) \
    template <class SrcPtr1, class SrcPtr2> \
    __host__ Expr<BinaryTransformPtrSz<typename PtrTraits<SrcPtr1>::ptr_type, typename PtrTraits<SrcPtr2>::ptr_type, name ## _func<typename LargerType<typename PtrTraits<SrcPtr1>::value_type, typename PtrTraits<SrcPtr2>::value_type>::type> > > \
@ -70,6 +73,8 @@ CV_CUDEV_EXPR_BINARY_FUNC(absdiff)

 #undef CV_CUDEV_EXPR_BINARY_FUNC

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/expr/binary_op.hpp
+++ b/modules/cudev/include/opencv2/cudev/expr/binary_op.hpp
@ -58,6 +58,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // Binary Operations

 #define CV_CUDEV_EXPR_BINOP_INST(op, functor) \
@ -230,6 +233,8 @@ CV_CUDEV_EXPR_BINOP_INST(>>, bit_rshift)

 #undef CV_CUDEV_EXPR_BINOP_INST

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/expr/color.hpp
+++ b/modules/cudev/include/opencv2/cudev/expr/color.hpp
@ -54,6 +54,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 #define CV_CUDEV_EXPR_CVTCOLOR_INST(name) \
    template <class SrcPtr> \
    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, name ## _func<typename VecTraits<typename PtrTraits<SrcPtr>::value_type>::elem_type> > > \
@ -277,6 +280,8 @@ CV_CUDEV_EXPR_CVTCOLOR_INST(Luv4_to_LBGRA)

 #undef CV_CUDEV_EXPR_CVTCOLOR_INST

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/expr/deriv.hpp
+++ b/modules/cudev/include/opencv2/cudev/expr/deriv.hpp
@ -53,6 +53,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // derivX

 template <class SrcPtr>
@ -116,6 +119,8 @@ laplacian_(const SrcPtr& src)
    return makeExpr(laplacianPtr<ksize>(src));
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/expr/expr.hpp
+++ b/modules/cudev/include/opencv2/cudev/expr/expr.hpp
@ -51,6 +51,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class Body> struct Expr
 {
    Body body;
@ -87,6 +90,8 @@ template <class Body> struct PtrTraits< Expr<Body> >
    }
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/expr/per_element_func.hpp
+++ b/modules/cudev/include/opencv2/cudev/expr/per_element_func.hpp
@ -56,6 +56,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // min/max

 template <class SrcPtr1, class SrcPtr2>
@ -127,6 +130,8 @@ lut_(const SrcPtr& src, const TablePtr& tbl)
    return makeExpr(lutPtr(src, tbl));
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/expr/reduction.hpp
+++ b/modules/cudev/include/opencv2/cudev/expr/reduction.hpp
@ -56,6 +56,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // sum

 template <class SrcPtr> struct SumExprBody
@ -254,6 +257,8 @@ integral_(const SrcPtr& src)
    return makeExpr(body);
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/expr/unary_func.hpp
+++ b/modules/cudev/include/opencv2/cudev/expr/unary_func.hpp
@ -54,6 +54,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 #define CV_CUDEV_EXPR_UNARY_FUNC(name) \
    template <class SrcPtr> \
    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<SrcPtr>::ptr_type, name ## _func<typename PtrTraits<SrcPtr>::value_type> > > \
@ -93,6 +96,8 @@ pow_(const SrcPtr& src, float power)
    return makeExpr(transformPtr(src, bind2nd(pow_func<typename PtrTraits<SrcPtr>::value_type>(), power)));
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/expr/unary_op.hpp
+++ b/modules/cudev/include/opencv2/cudev/expr/unary_op.hpp
@ -57,6 +57,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 #define CV_CUDEV_EXPR_UNOP_INST(op, functor) \
    template <typename T> \
    __host__ Expr<UnaryTransformPtrSz<typename PtrTraits<GpuMat_<T> >::ptr_type, functor<T> > > \
@ -89,6 +92,8 @@ CV_CUDEV_EXPR_UNOP_INST(~, bit_not)

 #undef CV_CUDEV_EXPR_UNOP_INST

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/expr/warping.hpp
+++ b/modules/cudev/include/opencv2/cudev/expr/warping.hpp
@ -57,6 +57,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // resize

 template <class SrcPtr>
@ -166,6 +169,8 @@ transpose_(const SrcPtr& src)
    return makeExpr(body);
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/functional/color_cvt.hpp
+++ b/modules/cudev/include/opencv2/cudev/functional/color_cvt.hpp
@ -51,6 +51,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // Various 3/4-channel to 3/4-channel RGB transformations

 #define CV_CUDEV_RGB2RGB_INST(name, scn, dcn, bidx) \
@ -469,6 +472,8 @@ CV_CUDEV_RGB5x52GRAY_INST(BGR565_to_GRAY, 6)

 #undef CV_CUDEV_RGB5x52GRAY_INST

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/functional/functional.hpp
+++ b/modules/cudev/include/opencv2/cudev/functional/functional.hpp
@ -54,6 +54,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // Function Objects

 template <typename _Arg, typename _Result> struct unary_function
@ -873,6 +876,8 @@ template <typename F> struct IsBinaryFunction
    enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/functional/tuple_adapter.hpp
+++ b/modules/cudev/include/opencv2/cudev/functional/tuple_adapter.hpp
@ -51,6 +51,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class Op, int n> struct UnaryTupleAdapter
 {
    typedef typename Op::result_type result_type;
@ -93,6 +96,8 @@ __host__ __device__ BinaryTupleAdapter<Op, n0, n1> binaryTupleAdapter(const Op&
    return a;
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/grid/copy.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/copy.hpp
@ -57,6 +57,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
 __host__ void gridCopy_(const SrcPtr& src, GpuMat_<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
 {
@ -447,6 +450,8 @@ __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz<D0>, Glob
    gridCopy_<DefaultCopyPolicy>(src, dst, stream);
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/grid/histogram.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/histogram.hpp
@ -54,6 +54,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <int BIN_COUNT, class Policy, class SrcPtr, typename ResType, class MaskPtr>
 __host__ void gridHistogram_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
 {
@ -114,6 +117,8 @@ __host__ void gridHistogram(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& st
    gridHistogram_<BIN_COUNT, DefaultHistogramPolicy>(src, dst, stream);
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/grid/integral.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/integral.hpp
@ -53,6 +53,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class SrcPtr, typename DstType>
 __host__ void gridIntegral(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
 {
@ -64,6 +67,8 @@ __host__ void gridIntegral(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& str
    integral_detail::integral(shrinkPtr(src), shrinkPtr(dst), rows, cols, StreamAccessor::getStream(stream));
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/grid/pyramids.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/pyramids.hpp
@ -55,6 +55,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class Brd, class SrcPtr, typename DstType>
 __host__ void gridPyrDown_(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
 {
@ -83,6 +86,8 @@ __host__ void gridPyrUp(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream
    pyramids_detail::pyrUp(shrinkPtr(src), shrinkPtr(dst), rows, cols, dst.rows, dst.cols, StreamAccessor::getStream(stream));
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/grid/reduce.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/reduce.hpp
@ -57,6 +57,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
 __host__ void gridCalcSum_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
 {
@ -370,6 +373,8 @@ __host__ void gridCountNonZero(const SrcPtr& src, GpuMat_<ResType>& dst, Stream&
    gridCountNonZero_<DefaultGlobReducePolicy>(src, dst, stream);
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/grid/reduce_to_vec.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/reduce_to_vec.hpp
@ -59,6 +59,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <typename T> struct Sum : plus<T>
 {
    typedef T work_type;
@ -225,6 +228,8 @@ __host__ void gridReduceToColumn(const SrcPtr& src, GpuMat_<ResType>& dst, Strea
    gridReduceToColumn_<Reductor, DefaultReduceToVecPolicy>(src, dst, stream);
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/grid/split_merge.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/split_merge.hpp
@ -57,6 +57,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class Policy, class SrcPtrTuple, typename DstType, class MaskPtr>
 __host__ void gridMerge_(const SrcPtrTuple& src, GpuMat_<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
 {
@ -579,6 +582,8 @@ __host__ void gridSplit(const SrcPtr& src, GlobPtrSz<DstType> (&dst)[COUNT], Str
    gridSplit_<DefaultSplitMergePolicy>(src, dst, stream);
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/grid/transform.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/transform.hpp
@ -57,6 +57,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class Policy, class SrcPtr, typename DstType, class UnOp, class MaskPtr>
 __host__ void gridTransformUnary_(const SrcPtr& src, GpuMat_<DstType>& dst, const UnOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
 {
@ -536,6 +539,8 @@ __host__ void gridTransformTuple(const SrcPtr& src, const tuple< GlobPtrSz<D0>,
    gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, stream);
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/grid/transpose.hpp
+++ b/modules/cudev/include/opencv2/cudev/grid/transpose.hpp
@ -54,6 +54,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class Policy, class SrcPtr, typename DstType>
 __host__ void gridTranspose_(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
 {
@ -98,6 +101,8 @@ __host__ void gridTranspose(const SrcPtr& src, const GlobPtrSz<DstType>& dst, St
    gridTranspose_<DefaultTransposePolicy>(src, dst, stream);
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/constant.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/constant.hpp
@ -51,6 +51,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <typename T> struct ConstantPtr
 {
    typedef T   value_type;
@ -88,6 +91,8 @@ template <typename T> struct PtrTraits< ConstantPtrSz<T> > : PtrTraitsBase< Cons
 {
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/deriv.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/deriv.hpp
@ -53,6 +53,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // derivX

 template <class SrcPtr> struct DerivXPtr
@ -388,6 +391,8 @@ template <int ksize, class SrcPtr> struct PtrTraits< LaplacianPtrSz<ksize, SrcPt
 {
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/extrapolation.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/extrapolation.hpp
@ -52,6 +52,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // BrdConstant

 template <class SrcPtr> struct BrdConstant
@ -214,6 +217,8 @@ __host__ BrdBase<BrdWrap, typename PtrTraits<SrcPtr>::ptr_type> brdWrap(const Sr
    return b;
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp
@ -51,6 +51,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <typename T> struct GlobPtr
 {
    typedef T   value_type;
@ -106,6 +109,8 @@ template <typename T> struct PtrTraits< GlobPtrSz<T> > : PtrTraitsBase<GlobPtrSz
 {
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/gpumat.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/gpumat.hpp
@ -53,6 +53,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <typename T>
 class GpuMat_ : public GpuMat
 {
@ -154,6 +157,8 @@ template <typename T> struct PtrTraits< GpuMat_<T> > : PtrTraitsBase<GpuMat_<T>,
 {
 };

+//! @}
+
 }}

 #include "detail/gpumat.hpp"
--- a/modules/cudev/include/opencv2/cudev/ptr2d/interpolation.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/interpolation.hpp
@ -55,6 +55,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // Nearest

 template <class SrcPtr> struct NearestInterPtr
@ -380,6 +383,8 @@ template <class SrcPtr> struct PtrTraits< CommonAreaInterPtrSz<SrcPtr> > : PtrTr
 {
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp
@ -54,6 +54,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class SrcPtr, class TablePtr> struct LutPtr
 {
    typedef typename PtrTraits<TablePtr>::value_type value_type;
@ -95,6 +98,8 @@ template <class SrcPtr, class TablePtr> struct PtrTraits< LutPtrSz<SrcPtr, Table
 {
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/mask.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/mask.hpp
@ -51,6 +51,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 struct WithOutMask
 {
    typedef bool value_type;
@ -98,6 +101,8 @@ template <class MaskPtr> struct PtrTraits< SingleMaskChannelsSz<MaskPtr> > : Ptr
 {
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/remap.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/remap.hpp
@ -54,6 +54,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class SrcPtr, class MapPtr> struct RemapPtr1
 {
    typedef typename PtrTraits<SrcPtr>::value_type value_type;
@ -149,6 +152,8 @@ template <class SrcPtr, class MapXPtr, class MapYPtr> struct PtrTraits< RemapPtr
 {
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/resize.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/resize.hpp
@ -54,6 +54,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class SrcPtr> struct ResizePtr
 {
    typedef typename PtrTraits<SrcPtr>::value_type value_type;
@ -98,6 +101,8 @@ template <class SrcPtr> struct PtrTraits< ResizePtrSz<SrcPtr> > : PtrTraitsBase<
 {
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp
@ -92,6 +92,9 @@ namespace

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 #if CUDART_VERSION >= 5050

 template <typename T> struct TexturePtr
@ -248,6 +251,8 @@ template <typename T> struct PtrTraits< Texture<T> > : PtrTraitsBase<Texture<T>,

 #endif

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/traits.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/traits.hpp
@ -50,6 +50,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class Ptr2DSz, class Ptr2D> struct PtrTraitsBase
 {
    typedef Ptr2DSz ptr_sz_type;
@ -96,6 +99,8 @@ __host__ int getCols(const Ptr2DSz& ptr)
    return PtrTraits<Ptr2DSz>::getCols(ptr);
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/transform.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/transform.hpp
@ -53,6 +53,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // UnaryTransformPtr

 template <class SrcPtr, class Op> struct UnaryTransformPtr
@ -146,6 +149,8 @@ template <class Src1Ptr, class Src2Ptr, class Op> struct PtrTraits< BinaryTransf
 {
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/warping.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/warping.hpp
@ -53,6 +53,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // affine

 struct AffineMapPtr
@ -147,6 +150,8 @@ warpPerspectivePtr(const SrcPtr& src, Size dstSize, const GpuMat_<float>& warpMa
    return remapPtr(src, perspectiveMap(dstSize, warpMat));
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/ptr2d/zip.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/zip.hpp
@ -52,6 +52,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class PtrTuple> struct ZipPtr;

 template <class Ptr0, class Ptr1> struct ZipPtr< tuple<Ptr0, Ptr1> > : tuple<Ptr0, Ptr1>
@ -168,6 +171,8 @@ template <class PtrTuple> struct PtrTraits< ZipPtrSz<PtrTuple> > : PtrTraitsBase
 {
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/util/atomic.hpp
+++ b/modules/cudev/include/opencv2/cudev/util/atomic.hpp
@ -50,6 +50,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // atomicAdd

 __device__ __forceinline__ int atomicAdd(int* address, int val)
@ -192,6 +195,8 @@ __device__ static double atomicMax(double* address, double val)
 #endif
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/util/limits.hpp
+++ b/modules/cudev/include/opencv2/cudev/util/limits.hpp
@ -52,6 +52,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <class T> struct numeric_limits;

 template <> struct numeric_limits<bool>
@ -119,6 +122,8 @@ template <> struct numeric_limits<double>
    static const bool is_signed = true;
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp
+++ b/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp
@ -50,6 +50,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <typename T> __device__ __forceinline__ T saturate_cast(uchar v) { return T(v); }
 template <typename T> __device__ __forceinline__ T saturate_cast(schar v) { return T(v); }
 template <typename T> __device__ __forceinline__ T saturate_cast(ushort v) { return T(v); }
@ -267,6 +270,8 @@ template <> __device__ __forceinline__ uint saturate_cast<uint>(double v)
 #endif
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/util/simd_functions.hpp
+++ b/modules/cudev/include/opencv2/cudev/util/simd_functions.hpp
@ -128,6 +128,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // 2

 __device__ __forceinline__ uint vadd2(uint a, uint b)
@ -908,6 +911,8 @@ __device__ __forceinline__ uint vmin4(uint a, uint b)
    return r;
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/util/tuple.hpp
+++ b/modules/cudev/include/opencv2/cudev/util/tuple.hpp
@ -51,6 +51,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 using tuple_detail::tuple;
 using tuple_detail::tuple_size;
 using tuple_detail::get;
@ -75,6 +78,8 @@ template <class Tuple, template <typename T> class CvtOp> struct ConvertTuple
    typedef typename tuple_detail::ConvertTuple<Tuple, tuple_size<Tuple>::value, CvtOp>::type type;
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/util/type_traits.hpp
+++ b/modules/cudev/include/opencv2/cudev/util/type_traits.hpp
@ -52,6 +52,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // NullType

 struct NullType {};
@ -164,6 +167,8 @@ template <typename A, typename B> struct LargerType
    >::type type;
 };

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/util/vec_math.hpp
+++ b/modules/cudev/include/opencv2/cudev/util/vec_math.hpp
@ -51,6 +51,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // saturate_cast

 namespace vec_math_detail
@ -931,6 +934,8 @@ CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)

 #undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/util/vec_traits.hpp
+++ b/modules/cudev/include/opencv2/cudev/util/vec_traits.hpp
@ -50,6 +50,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // MakeVec

 template<typename T, int CN> struct MakeVec;
@ -177,6 +180,8 @@ template<> struct VecTraits<char4>
    __host__ __device__ __forceinline__ static char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
 };

+//! @}
+
 }}

 // DataType
--- a/modules/cudev/include/opencv2/cudev/warp/reduce.hpp
+++ b/modules/cudev/include/opencv2/cudev/warp/reduce.hpp
@ -53,6 +53,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 // warpReduce

 template <typename T, class Op>
@ -201,6 +204,8 @@ smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t
    return make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/warp/scan.hpp
+++ b/modules/cudev/include/opencv2/cudev/warp/scan.hpp
@ -52,6 +52,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 template <typename T>
 __device__ T warpScanInclusive(T data, volatile T* smem, uint tid)
 {
@ -94,6 +97,8 @@ __device__ __forceinline__ T warpScanExclusive(T data, volatile T* smem, uint ti
    return warpScanInclusive(data, smem, tid) - data;
 }

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp
+++ b/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp
@ -51,6 +51,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 #if CV_CUDEV_ARCH >= 300

 // shfl
@ -419,6 +422,8 @@ CV_CUDEV_SHFL_XOR_VEC_INST(double)

 #endif // CV_CUDEV_ARCH >= 300

+//! @}
+
 }}

 #endif
--- a/modules/cudev/include/opencv2/cudev/warp/warp.hpp
+++ b/modules/cudev/include/opencv2/cudev/warp/warp.hpp
@ -50,6 +50,9 @@

 namespace cv { namespace cudev {

+//! @addtogroup cudev
+//! @{
+
 enum
 {
    LOG_WARP_SIZE = 5,
@ -117,6 +120,8 @@ __device__ __forceinline__ void warpYota(OutIt beg, OutIt end, T value)
        *t = value;
 }

+//! @}
+
 }}

 #endif
--- a/modules/features2d/include/opencv2/features2d.hpp
+++ b/modules/features2d/include/opencv2/features2d.hpp
@ -46,18 +46,54 @@
 #include "opencv2/core.hpp"
 #include "opencv2/flann/miniflann.hpp"

+/**
+  @defgroup features2d 2D Features Framework
+  @{
+    @defgroup features2d_main Feature Detection and Description
+    @defgroup features2d_match Descriptor Matchers
+
+Matchers of keypoint descriptors in OpenCV have wrappers with a common interface that enables you to
+easily switch between different algorithms solving the same problem. This section is devoted to
+matching descriptors that are represented as vectors in a multidimensional space. All objects that
+implement vector descriptor matchers inherit the DescriptorMatcher interface.
+
+@note
+   -   An example explaining keypoint matching can be found at
+        opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
+    -   An example on descriptor matching evaluation can be found at
+        opencv_source_code/samples/cpp/detector_descriptor_matcher_evaluation.cpp
+    -   An example on one to many image matching can be found at
+        opencv_source_code/samples/cpp/matching_to_many_images.cpp
+
+    @defgroup features2d_draw Drawing Function of Keypoints and Matches
+    @defgroup features2d_category Object Categorization
+
+This section describes approaches based on local 2D features and used to categorize objects.
+
+@note
+   -   A complete Bag-Of-Words sample can be found at
+        opencv_source_code/samples/cpp/bagofwords_classification.cpp
+    -   (Python) An example using the features2D framework to perform object categorization can be
+        found at opencv_source_code/samples/python2/find_obj.py
+
+  @}
+ */
+
 namespace cv
 {

+//! @addtogroup features2d
+//! @{
+
 // //! writes vector of keypoints to the file storage
 // CV_EXPORTS void write(FileStorage& fs, const String& name, const std::vector<KeyPoint>& keypoints);
 // //! reads vector of keypoints from the specified file storage node
 // CV_EXPORTS void read(const FileNode& node, CV_OUT std::vector<KeyPoint>& keypoints);

-/*
- * A class filters a vector of keypoints.
- * Because now it is difficult to provide a convenient interface for all usage scenarios of the keypoints filter class,
- * it has only several needed by now static methods.
+/** @brief A class filters a vector of keypoints.
+
+ Because now it is difficult to provide a convenient interface for all usage scenarios of the
+ keypoints filter class, it has only several needed by now static methods.
 */
 class CV_EXPORTS KeyPointsFilter
 {
@ -91,44 +127,66 @@ public:

 /************************************ Base Classes ************************************/

-/*
- * Abstract base class for 2D image feature detectors and descriptor extractors
- */
+/** @brief Abstract base class for 2D image feature detectors and descriptor extractors
+*/
 class CV_EXPORTS_W Feature2D : public virtual Algorithm
 {
 public:
    virtual ~Feature2D();

-    /*
-     * Detect keypoints in an image.
-     * image        The image.
-     * keypoints    The detected keypoints.
-     * mask         Mask specifying where to look for keypoints (optional). Must be a char
-     *              matrix with non-zero values in the region of interest.
+    /** @brief Detects keypoints in an image (first variant) or image set (second variant).
+
+    @param image Image.
+    @param keypoints The detected keypoints. In the second variant of the method keypoints[i] is a set
+    of keypoints detected in images[i] .
+    @param mask Mask specifying where to look for keypoints (optional). It must be a 8-bit integer
+    matrix with non-zero values in the region of interest.
     */
    CV_WRAP virtual void detect( InputArray image,
                                 CV_OUT std::vector<KeyPoint>& keypoints,
                                 InputArray mask=noArray() );

+    /** @overload
+    @param images Image set.
+    @param keypoints The detected keypoints. In the second variant of the method keypoints[i] is a set
+    of keypoints detected in images[i] .
+    @param masks Masks for each input image specifying where to look for keypoints (optional).
+    masks[i] is a mask for images[i].
+    */
    virtual void detect( InputArrayOfArrays images,
                         std::vector<std::vector<KeyPoint> >& keypoints,
                         InputArrayOfArrays masks=noArray() );

-    /*
-     * Compute the descriptors for a set of keypoints in an image.
-     * image        The image.
-     * keypoints    The input keypoints. Keypoints for which a descriptor cannot be computed are removed.
-     * descriptors  Copmputed descriptors. Row i is the descriptor for keypoint i.
+    /** @brief Computes the descriptors for a set of keypoints detected in an image (first variant) or image set
+    (second variant).
+
+    @param image Image.
+    @param keypoints Input collection of keypoints. Keypoints for which a descriptor cannot be
+    computed are removed. Sometimes new keypoints can be added, for example: SIFT duplicates keypoint
+    with several dominant orientations (for each orientation).
+    @param descriptors Computed descriptors. In the second variant of the method descriptors[i] are
+    descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the
+    descriptor for keypoint j-th keypoint.
     */
    CV_WRAP virtual void compute( InputArray image,
                                  CV_OUT CV_IN_OUT std::vector<KeyPoint>& keypoints,
                                  OutputArray descriptors );

+    /** @overload
+
+    @param images Image set.
+    @param keypoints Input collection of keypoints. Keypoints for which a descriptor cannot be
+    computed are removed. Sometimes new keypoints can be added, for example: SIFT duplicates keypoint
+    with several dominant orientations (for each orientation).
+    @param descriptors Computed descriptors. In the second variant of the method descriptors[i] are
+    descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the
+    descriptor for keypoint j-th keypoint.
+    */
    virtual void compute( InputArrayOfArrays images,
                          std::vector<std::vector<KeyPoint> >& keypoints,
                          OutputArrayOfArrays descriptors );

-    /* Detects keypoints and computes the descriptors */
+    /** Detects keypoints and computes the descriptors */
    CV_WRAP virtual void detectAndCompute( InputArray image, InputArray mask,
                                           CV_OUT std::vector<KeyPoint>& keypoints,
                                           OutputArray descriptors,
@ -138,33 +196,96 @@ public:
    CV_WRAP virtual int descriptorType() const;
    CV_WRAP virtual int defaultNorm() const;

-    // Return true if detector object is empty
+    //! Return true if detector object is empty
    CV_WRAP virtual bool empty() const;
 };

+/** Feature detectors in OpenCV have wrappers with a common interface that enables you to easily switch
+between different algorithms solving the same problem. All objects that implement keypoint detectors
+inherit the FeatureDetector interface. */
 typedef Feature2D FeatureDetector;
+
+/** Extractors of keypoint descriptors in OpenCV have wrappers with a common interface that enables you
+to easily switch between different algorithms solving the same problem. This section is devoted to
+computing descriptors represented as vectors in a multidimensional space. All objects that implement
+the vector descriptor extractors inherit the DescriptorExtractor interface.
+ */
 typedef Feature2D DescriptorExtractor;

-/*!
-  BRISK implementation
-*/
+//! @addtogroup features2d_main
+//! @{
+
+/** @brief Class implementing the BRISK keypoint detector and descriptor extractor, described in @cite LCS11 .
+ */
 class CV_EXPORTS_W BRISK : public Feature2D
 {
 public:
+    /** @brief The BRISK constructor
+
+    @param thresh FAST/AGAST detection threshold score.
+    @param octaves detection octaves. Use 0 to do single scale.
+    @param patternScale apply this scale to the pattern used for sampling the neighbourhood of a
+    keypoint.
+     */
    CV_WRAP static Ptr<BRISK> create(int thresh=30, int octaves=3, float patternScale=1.0f);
-    // custom setup
+
+    /** @brief The BRISK constructor for a custom pattern
+
+    @param radiusList defines the radii (in pixels) where the samples around a keypoint are taken (for
+    keypoint scale 1).
+    @param numberList defines the number of sampling points on the sampling circle. Must be the same
+    size as radiusList..
+    @param dMax threshold for the short pairings used for descriptor formation (in pixels for keypoint
+    scale 1).
+    @param dMin threshold for the long pairings used for orientation determination (in pixels for
+    keypoint scale 1).
+    @param indexChange index remapping of the bits. */
    CV_WRAP static Ptr<BRISK> create(const std::vector<float> &radiusList, const std::vector<int> &numberList,
        float dMax=5.85f, float dMin=8.2f, const std::vector<int>& indexChange=std::vector<int>());
 };

-/*!
- ORB implementation.
-*/
+/** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor
+
+described in @cite RRKB11 . The algorithm uses FAST in pyramids to detect stable keypoints, selects
+the strongest features using FAST or Harris response, finds their orientation using first-order
+moments and computes the descriptors using BRIEF (where the coordinates of random point pairs (or
+k-tuples) are rotated according to the measured orientation).
+ */
 class CV_EXPORTS_W ORB : public Feature2D
 {
 public:
    enum { kBytes = 32, HARRIS_SCORE=0, FAST_SCORE=1 };

+    /** @brief The ORB constructor
+
+    @param nfeatures The maximum number of features to retain.
+    @param scaleFactor Pyramid decimation ratio, greater than 1. scaleFactor==2 means the classical
+    pyramid, where each next level has 4x less pixels than the previous, but such a big scale factor
+    will degrade feature matching scores dramatically. On the other hand, too close to 1 scale factor
+    will mean that to cover certain scale range you will need more pyramid levels and so the speed
+    will suffer.
+    @param nlevels The number of pyramid levels. The smallest level will have linear size equal to
+    input_image_linear_size/pow(scaleFactor, nlevels).
+    @param edgeThreshold This is size of the border where the features are not detected. It should
+    roughly match the patchSize parameter.
+    @param firstLevel It should be 0 in the current implementation.
+    @param WTA_K The number of points that produce each element of the oriented BRIEF descriptor. The
+    default value 2 means the BRIEF where we take a random point pair and compare their brightnesses,
+    so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3
+    random points (of course, those point coordinates are random, but they are generated from the
+    pre-defined seed, so each element of BRIEF descriptor is computed deterministically from the pixel
+    rectangle), find point of maximum brightness and output index of the winner (0, 1 or 2). Such
+    output will occupy 2 bits, and therefore it will need a special variant of Hamming distance,
+    denoted as NORM_HAMMING2 (2 bits per bin). When WTA_K=4, we take 4 random points to compute each
+    bin (that will also occupy 2 bits with possible values 0, 1, 2 or 3).
+    @param scoreType The default HARRIS_SCORE means that Harris algorithm is used to rank features
+    (the score is written to KeyPoint::score and is used to retain best nfeatures features);
+    FAST_SCORE is alternative value of the parameter that produces slightly less stable keypoints,
+    but it is a little faster to compute.
+    @param patchSize size of the patch used by the oriented BRIEF descriptor. Of course, on smaller
+    pyramid layers the perceived image area covered by a feature will be larger.
+    @param fastThreshold
+     */
    CV_WRAP static Ptr<ORB> create(int nfeatures=500, float scaleFactor=1.2f, int nlevels=8, int edgeThreshold=31,
        int firstLevel=0, int WTA_K=2, int scoreType=ORB::HARRIS_SCORE, int patchSize=31, int fastThreshold=20);

@ -196,15 +317,16 @@ public:
    CV_WRAP virtual int getFastThreshold() const = 0;
 };

-/*!
- Maximal Stable Extremal Regions class.
+/** @brief Maximally stable extremal region extractor. :

- The class implements MSER algorithm introduced by J. Matas.
- Unlike SIFT, SURF and many other detectors in OpenCV, this is salient region detector,
- not the salient point detector.
+The class encapsulates all the parameters of the MSER extraction algorithm (see
+<http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions>). Also see
+<http://code.opencv.org/projects/opencv/wiki/MSER> for useful comments and parameters description.

- It returns the regions, each of those is encoded as a contour.
-*/
+@note
+   -   (Python) A complete example showing the use of the MSER detector can be found at
+        opencv_source_code/samples/python2/mser.py
+ */
 class CV_EXPORTS_W MSER : public Feature2D
 {
 public:
@ -231,13 +353,38 @@ public:
    CV_WRAP virtual bool getPass2Only() const = 0;
 };

-//! detects corners using FAST algorithm by E. Rosten
+/** @overload */
 CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector<KeyPoint>& keypoints,
                      int threshold, bool nonmaxSuppression=true );

+/** @brief Detects corners using the FAST algorithm
+
+@param image grayscale image where keypoints (corners) are detected.
+@param keypoints keypoints detected on the image.
+@param threshold threshold on difference between intensity of the central pixel and pixels of a
+circle around this pixel.
+@param nonmaxSuppression if true, non-maximum suppression is applied to detected corners
+(keypoints).
+@param type one of the three neighborhoods as defined in the paper:
+FastFeatureDetector::TYPE_9_16, FastFeatureDetector::TYPE_7_12,
+FastFeatureDetector::TYPE_5_8
+
+Detects corners using the FAST algorithm by @cite Rosten06 .
+
+@note In Python API, types are given as cv2.FAST_FEATURE_DETECTOR_TYPE_5_8,
+cv2.FAST_FEATURE_DETECTOR_TYPE_7_12 and cv2.FAST_FEATURE_DETECTOR_TYPE_9_16. For corner
+detection, use cv2.FAST.detect() method.
+ */
 CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector<KeyPoint>& keypoints,
                      int threshold, bool nonmaxSuppression, int type );

+//! @} features2d_main
+
+//! @addtogroup features2d_main
+//! @{
+
+/** @brief Wrapping class for feature detection using the FAST method. :
+ */
 class CV_EXPORTS_W FastFeatureDetector : public Feature2D
 {
 public:
@ -261,7 +408,8 @@ public:
    CV_WRAP virtual int getType() const = 0;
 };

-
+/** @brief Wrapping class for feature detection using the goodFeaturesToTrack function. :
+ */
 class CV_EXPORTS_W GFTTDetector : public Feature2D
 {
 public:
@ -286,7 +434,37 @@ public:
    CV_WRAP virtual double getK() const = 0;
 };

-
+/** @brief Class for extracting blobs from an image. :
+
+The class implements a simple algorithm for extracting blobs from an image:
+
+1.  Convert the source image to binary images by applying thresholding with several thresholds from
+    minThreshold (inclusive) to maxThreshold (exclusive) with distance thresholdStep between
+    neighboring thresholds.
+2.  Extract connected components from every binary image by findContours and calculate their
+    centers.
+3.  Group centers from several binary images by their coordinates. Close centers form one group that
+    corresponds to one blob, which is controlled by the minDistBetweenBlobs parameter.
+4.  From the groups, estimate final centers of blobs and their radiuses and return as locations and
+    sizes of keypoints.
+
+This class performs several filtrations of returned blobs. You should set filterBy\* to true/false
+to turn on/off corresponding filtration. Available filtrations:
+
+-   **By color**. This filter compares the intensity of a binary image at the center of a blob to
+blobColor. If they differ, the blob is filtered out. Use blobColor = 0 to extract dark blobs
+and blobColor = 255 to extract light blobs.
+-   **By area**. Extracted blobs have an area between minArea (inclusive) and maxArea (exclusive).
+-   **By circularity**. Extracted blobs have circularity
+(\f$\frac{4*\pi*Area}{perimeter * perimeter}\f$) between minCircularity (inclusive) and
+maxCircularity (exclusive).
+-   **By ratio of the minimum inertia to maximum inertia**. Extracted blobs have this ratio
+between minInertiaRatio (inclusive) and maxInertiaRatio (exclusive).
+-   **By convexity**. Extracted blobs have convexity (area / area of blob convex hull) between
+minConvexity (inclusive) and maxConvexity (exclusive).
+
+Default values of parameters are tuned to extract dark circular blobs.
+ */
 class CV_EXPORTS_W SimpleBlobDetector : public Feature2D
 {
 public:
@ -322,9 +500,16 @@ public:
    create(const SimpleBlobDetector::Params &parameters = SimpleBlobDetector::Params());
 };

+//! @} features2d_main
+
+//! @addtogroup features2d_main
+//! @{

-/*!
-KAZE implementation
+/** @brief Class implementing the KAZE keypoint detector and descriptor extractor, described in @cite ABD12 .
+
+@note AKAZE descriptor can only be used with KAZE or AKAZE keypoints .. [ABD12] KAZE Features. Pablo
+F. Alcantarilla, Adrien Bartoli and Andrew J. Davison. In European Conference on Computer Vision
+(ECCV), Fiorenze, Italy, October 2012.
 */
 class CV_EXPORTS_W KAZE : public Feature2D
 {
@ -337,6 +522,16 @@ public:
        DIFF_CHARBONNIER = 3
    };

+    /** @brief The KAZE constructor
+
+    @param extended Set to enable extraction of extended (128-byte) descriptor.
+    @param upright Set to enable use of upright descriptors (non rotation-invariant).
+    @param threshold Detector response threshold to accept point
+    @param nOctaves Maximum octave evolution of the image
+    @param nOctaveLayers Default number of sublevels per scale level
+    @param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or
+    DIFF_CHARBONNIER
+     */
    CV_WRAP static Ptr<KAZE> create(bool extended=false, bool upright=false,
                                    float threshold = 0.001f,
                                    int nOctaves = 4, int nOctaveLayers = 4,
@ -361,9 +556,13 @@ public:
    CV_WRAP virtual int getDiffusivity() const = 0;
 };

-/*!
-AKAZE implementation
-*/
+/** @brief Class implementing the AKAZE keypoint detector and descriptor extractor, described in @cite ANB13 . :
+
+@note AKAZE descriptors can only be used with KAZE or AKAZE keypoints. Try to avoid using *extract*
+and *detect* instead of *operator()* due to performance reasons. .. [ANB13] Fast Explicit Diffusion
+for Accelerated Features in Nonlinear Scale Spaces. Pablo F. Alcantarilla, Jesús Nuevo and Adrien
+Bartoli. In British Machine Vision Conference (BMVC), Bristol, UK, September 2013.
+ */
 class CV_EXPORTS_W AKAZE : public Feature2D
 {
 public:
@ -376,6 +575,18 @@ public:
        DESCRIPTOR_MLDB = 5
    };

+    /** @brief The AKAZE constructor
+
+    @param descriptor_type Type of the extracted descriptor: DESCRIPTOR_KAZE,
+    DESCRIPTOR_KAZE_UPRIGHT, DESCRIPTOR_MLDB or DESCRIPTOR_MLDB_UPRIGHT.
+    @param descriptor_size Size of the descriptor in bits. 0 -\> Full size
+    @param descriptor_channels Number of channels in the descriptor (1, 2, 3)
+    @param threshold Detector response threshold to accept point
+    @param nOctaves Maximum octave evolution of the image
+    @param nOctaveLayers Default number of sublevels per scale level
+    @param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or
+    DIFF_CHARBONNIER
+     */
    CV_WRAP static Ptr<AKAZE> create(int descriptor_type=AKAZE::DESCRIPTOR_MLDB,
                                     int descriptor_size = 0, int descriptor_channels = 3,
                                     float threshold = 0.001f, int nOctaves = 4,
@ -403,6 +614,8 @@ public:
    CV_WRAP virtual int getDiffusivity() const = 0;
 };

+//! @} features2d_main
+
 /****************************************************************************************\
 *                                      Distance                                          *
 \****************************************************************************************/
@ -501,76 +714,153 @@ template<int cellsize> struct HammingMultilevel
 /****************************************************************************************\
 *                                  DescriptorMatcher                                     *
 \****************************************************************************************/
-/*
- * Abstract base class for matching two sets of descriptors.
+
+//! @addtogroup features2d_match
+//! @{
+
+/** @brief Abstract base class for matching keypoint descriptors.
+
+It has two groups of match methods: for matching descriptors of an image with another image or with
+an image set.
 */
 class CV_EXPORTS_W DescriptorMatcher : public Algorithm
 {
 public:
    virtual ~DescriptorMatcher();

-    /*
-     * Add descriptors to train descriptor collection.
-     * descriptors      Descriptors to add. Each descriptors[i] is a descriptors set from one image.
+    /** @brief Adds descriptors to train a CPU(trainDescCollectionis) or GPU(utrainDescCollectionis) descriptor
+    collection.
+
+    If the collection is not empty, the new descriptors are added to existing train descriptors.
+
+    @param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same
+    train image.
     */
    CV_WRAP virtual void add( InputArrayOfArrays descriptors );
-    /*
-     * Get train descriptors collection.
+
+    /** @brief Returns a constant link to the train descriptor collection trainDescCollection .
     */
    CV_WRAP const std::vector<Mat>& getTrainDescriptors() const;
-    /*
-     * Clear train descriptors collection.
+
+    /** @brief Clears the train descriptor collections.
     */
    CV_WRAP virtual void clear();

-    /*
-     * Return true if there are not train descriptors in collection.
+    /** @brief Returns true if there are no train descriptors in the both collections.
     */
    CV_WRAP virtual bool empty() const;
-    /*
-     * Return true if the matcher supports mask in match methods.
+
+    /** @brief Returns true if the descriptor matcher supports masking permissible matches.
     */
    CV_WRAP virtual bool isMaskSupported() const = 0;

-    /*
-     * Train matcher (e.g. train flann index).
-     * In all methods to match the method train() is run every time before matching.
-     * Some descriptor matchers (e.g. BruteForceMatcher) have empty implementation
-     * of this method, other matchers really train their inner structures
-     * (e.g. FlannBasedMatcher trains flann::Index). So nonempty implementation
-     * of train() should check the class object state and do traing/retraining
-     * only if the state requires that (e.g. FlannBasedMatcher trains flann::Index
-     * if it has not trained yet or if new descriptors have been added to the train
-     * collection).
+    /** @brief Trains a descriptor matcher
+
+    Trains a descriptor matcher (for example, the flann index). In all methods to match, the method
+    train() is run every time before matching. Some descriptor matchers (for example, BruteForceMatcher)
+    have an empty implementation of this method. Other matchers really train their inner structures (for
+    example, FlannBasedMatcher trains flann::Index ).
     */
    CV_WRAP virtual void train();
-    /*
-     * Group of methods to match descriptors from image pair.
-     * Method train() is run in this methods.
+
+    /** @brief Finds the best match for each descriptor from a query set.
+
+    @param queryDescriptors Query set of descriptors.
+    @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
+    collection stored in the class object.
+    @param matches Matches. If a query descriptor is masked out in mask , no match is added for this
+    descriptor. So, matches size may be smaller than the query descriptors count.
+    @param mask Mask specifying permissible matches between an input query and train matrices of
+    descriptors.
+
+    In the first variant of this method, the train descriptors are passed as an input argument. In the
+    second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is
+    used. Optional mask (or masks) can be passed to specify which query and training descriptors can be
+    matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if
+    mask.at\<uchar\>(i,j) is non-zero.
     */
-    // Find one best match for each query descriptor (if mask is empty).
    CV_WRAP void match( InputArray queryDescriptors, InputArray trainDescriptors,
                CV_OUT std::vector<DMatch>& matches, InputArray mask=noArray() ) const;
-    // Find k best matches for each query descriptor (in increasing order of distances).
-    // compactResult is used when mask is not empty. If compactResult is false matches
-    // vector will have the same size as queryDescriptors rows. If compactResult is true
-    // matches vector will not contain matches for fully masked out query descriptors.
+
+    /** @brief Finds the k best matches for each descriptor from a query set.
+
+    @param queryDescriptors Query set of descriptors.
+    @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
+    collection stored in the class object.
+    @param mask Mask specifying permissible matches between an input query and train matrices of
+    descriptors.
+    @param matches Matches. Each matches[i] is k or less matches for the same query descriptor.
+    @param k Count of best matches found per each query descriptor or less if a query descriptor has
+    less than k possible matches in total.
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+
+    These extended variants of DescriptorMatcher::match methods find several best matches for each query
+    descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match
+    for the details about query and train descriptors.
+     */
    CV_WRAP void knnMatch( InputArray queryDescriptors, InputArray trainDescriptors,
                   CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
                   InputArray mask=noArray(), bool compactResult=false ) const;
-    // Find best matches for each query descriptor which have distance less than
-    // maxDistance (in increasing order of distances).
+
+    /** @brief For each query descriptor, finds the training descriptors not farther than the specified distance.
+
+    @param queryDescriptors Query set of descriptors.
+    @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
+    collection stored in the class object.
+    @param matches Found matches.
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+    @param maxDistance Threshold for the distance between matched descriptors. Distance means here
+    metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
+    in Pixels)!
+    @param mask Mask specifying permissible matches between an input query and train matrices of
+    descriptors.
+
+    For each query descriptor, the methods find such training descriptors that the distance between the
+    query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
+    returned in the distance increasing order.
+     */
    void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors,
                      std::vector<std::vector<DMatch> >& matches, float maxDistance,
                      InputArray mask=noArray(), bool compactResult=false ) const;
-    /*
-     * Group of methods to match descriptors from one image to image set.
-     * See description of similar methods for matching image pair above.
-     */
+
+    /** @overload
+    @param queryDescriptors Query set of descriptors.
+    @param matches Matches. If a query descriptor is masked out in mask , no match is added for this
+    descriptor. So, matches size may be smaller than the query descriptors count.
+    @param masks Set of masks. Each masks[i] specifies permissible matches between the input query
+    descriptors and stored train descriptors from the i-th image trainDescCollection[i].
+    */
    CV_WRAP void match( InputArray queryDescriptors, CV_OUT std::vector<DMatch>& matches,
                        InputArrayOfArrays masks=noArray() );
+    /** @overload
+    @param queryDescriptors Query set of descriptors.
+    @param matches Matches. Each matches[i] is k or less matches for the same query descriptor.
+    @param k Count of best matches found per each query descriptor or less if a query descriptor has
+    less than k possible matches in total.
+    @param masks Set of masks. Each masks[i] specifies permissible matches between the input query
+    descriptors and stored train descriptors from the i-th image trainDescCollection[i].
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+    */
    CV_WRAP void knnMatch( InputArray queryDescriptors, CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
                           InputArrayOfArrays masks=noArray(), bool compactResult=false );
+    /** @overload
+    @param queryDescriptors Query set of descriptors.
+    @param matches Found matches.
+    @param maxDistance Threshold for the distance between matched descriptors. Distance means here
+    metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
+    in Pixels)!
+    @param masks Set of masks. Each masks[i] specifies permissible matches between the input query
+    descriptors and stored train descriptors from the i-th image trainDescCollection[i].
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+    */
    void radiusMatch( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
                      InputArrayOfArrays masks=noArray(), bool compactResult=false );

@ -579,14 +869,28 @@ public:
    // Writes matcher object to a file storage
    virtual void write( FileStorage& ) const;

-    // Clone the matcher. If emptyTrainData is false the method create deep copy of the object, i.e. copies
-    // both parameters and train data. If emptyTrainData is true the method create object copy with current parameters
-    // but with empty train data.
+    /** @brief Clones the matcher.
+
+    @param emptyTrainData If emptyTrainData is false, the method creates a deep copy of the object,
+    that is, copies both parameters and train data. If emptyTrainData is true, the method creates an
+    object copy with the current parameters but with empty train data.
+     */
    virtual Ptr<DescriptorMatcher> clone( bool emptyTrainData=false ) const = 0;

+    /** @brief Creates a descriptor matcher of a given type with the default parameters (using default
+    constructor).
+
+    @param descriptorMatcherType Descriptor matcher type. Now the following matcher types are
+    supported:
+    -   `BruteForce` (it uses L2 )
+    -   `BruteForce-L1`
+    -   `BruteForce-Hamming`
+    -   `BruteForce-Hamming(2)`
+    -   `FlannBased`
+     */
    CV_WRAP static Ptr<DescriptorMatcher> create( const String& descriptorMatcherType );
 protected:
-    /*
+    /**
     * Class to work with descriptors from several images as with one merged matrix.
     * It is used e.g. in FlannBasedMatcher.
     */
@ -613,9 +917,9 @@ protected:
        std::vector<int> startIdxs;
    };

-    // In fact the matching is implemented only by the following two methods. These methods suppose
-    // that the class object has been trained already. Public match methods call these methods
-    // after calling train().
+    //! In fact the matching is implemented only by the following two methods. These methods suppose
+    //! that the class object has been trained already. Public match methods call these methods
+    //! after calling train().
    virtual void knnMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
        InputArrayOfArrays masks=noArray(), bool compactResult=false ) = 0;
    virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
@ -627,23 +931,33 @@ protected:
    static Mat clone_op( Mat m ) { return m.clone(); }
    void checkMasks( InputArrayOfArrays masks, int queryDescriptorsCount ) const;

-    // Collection of descriptors from train images.
+    //! Collection of descriptors from train images.
    std::vector<Mat> trainDescCollection;
    std::vector<UMat> utrainDescCollection;
 };

-/*
- * Brute-force descriptor matcher.
- *
- * For each descriptor in the first set, this matcher finds the closest
- * descriptor in the second set by trying each one.
- *
- * For efficiency, BruteForceMatcher is templated on the distance metric.
- * For float descriptors, a common choice would be cv::L2<float>.
+/** @brief Brute-force descriptor matcher.
+
+For each descriptor in the first set, this matcher finds the closest descriptor in the second set
+by trying each one. This descriptor matcher supports masking permissible matches of descriptor
+sets.
 */
 class CV_EXPORTS_W BFMatcher : public DescriptorMatcher
 {
 public:
+    /** @brief Brute-force matcher constructor.
+
+    @param normType One of NORM_L1, NORM_L2, NORM_HAMMING, NORM_HAMMING2. L1 and L2 norms are
+    preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and
+    BRIEF, NORM_HAMMING2 should be used with ORB when WTA_K==3 or 4 (see ORB::ORB constructor
+    description).
+    @param crossCheck If it is false, this is will be default BFMatcher behaviour when it finds the k
+    nearest neighbors for each query descriptor. If crossCheck==true, then the knnMatch() method with
+    k=1 will only return pairs (i,j) such that for i-th query descriptor the j-th descriptor in the
+    matcher's collection is the nearest and vice versa, i.e. the BFMatcher will only return consistent
+    pairs. Such technique usually produces best results with minimal number of outliers when there are
+    enough matches. This is alternative to the ratio test, used by D. Lowe in SIFT paper.
+     */
    CV_WRAP BFMatcher( int normType=NORM_L2, bool crossCheck=false );
    virtual ~BFMatcher() {}

@ -661,8 +975,12 @@ protected:
 };


-/*
- * Flann based matcher
+/** @brief Flann-based descriptor matcher.
+
+This matcher trains flann::Index_ on a train descriptor collection and calls its nearest search
+methods to find the best matches. So, this matcher may be faster when matching a large train
+collection than the brute force matcher. FlannBasedMatcher does not support masking permissible
+matches of descriptor sets because flann::Index does not support this. :
 */
 class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher
 {
@ -700,42 +1018,85 @@ protected:
    int addedDescCount;
 };

+//! @} features2d_match

 /****************************************************************************************\
 *                                   Drawing functions                                    *
 \****************************************************************************************/
+
+//! @addtogroup features2d_draw
+//! @{
+
 struct CV_EXPORTS DrawMatchesFlags
 {
-    enum{ DEFAULT = 0, // Output image matrix will be created (Mat::create),
-                       // i.e. existing memory of output image may be reused.
-                       // Two source image, matches and single keypoints will be drawn.
-                       // For each keypoint only the center point will be drawn (without
-                       // the circle around keypoint with keypoint size and orientation).
-          DRAW_OVER_OUTIMG = 1, // Output image matrix will not be created (Mat::create).
-                                // Matches will be drawn on existing content of output image.
-          NOT_DRAW_SINGLE_POINTS = 2, // Single keypoints will not be drawn.
-          DRAW_RICH_KEYPOINTS = 4 // For each keypoint the circle around keypoint with keypoint size and
-                                  // orientation will be drawn.
+    enum{ DEFAULT = 0, //!< Output image matrix will be created (Mat::create),
+                       //!< i.e. existing memory of output image may be reused.
+                       //!< Two source image, matches and single keypoints will be drawn.
+                       //!< For each keypoint only the center point will be drawn (without
+                       //!< the circle around keypoint with keypoint size and orientation).
+          DRAW_OVER_OUTIMG = 1, //!< Output image matrix will not be created (Mat::create).
+                                //!< Matches will be drawn on existing content of output image.
+          NOT_DRAW_SINGLE_POINTS = 2, //!< Single keypoints will not be drawn.
+          DRAW_RICH_KEYPOINTS = 4 //!< For each keypoint the circle around keypoint with keypoint size and
+                                  //!< orientation will be drawn.
        };
 };

-// Draw keypoints.
+/** @brief Draws keypoints.
+
+@param image Source image.
+@param keypoints Keypoints from the source image.
+@param outImage Output image. Its content depends on the flags value defining what is drawn in the
+output image. See possible flags bit values below.
+@param color Color of keypoints.
+@param flags Flags setting drawing features. Possible flags bit values are defined by
+DrawMatchesFlags. See details above in drawMatches .
+
+@note
+For Python API, flags are modified as cv2.DRAW_MATCHES_FLAGS_DEFAULT,
+cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS, cv2.DRAW_MATCHES_FLAGS_DRAW_OVER_OUTIMG,
+cv2.DRAW_MATCHES_FLAGS_NOT_DRAW_SINGLE_POINTS
+ */
 CV_EXPORTS_W void drawKeypoints( InputArray image, const std::vector<KeyPoint>& keypoints, InputOutputArray outImage,
                               const Scalar& color=Scalar::all(-1), int flags=DrawMatchesFlags::DEFAULT );

-// Draws matches of keypints from two images on output image.
+/** @brief Draws the found matches of keypoints from two images.
+
+@param img1 First source image.
+@param keypoints1 Keypoints from the first source image.
+@param img2 Second source image.
+@param keypoints2 Keypoints from the second source image.
+@param matches1to2 Matches from the first image to the second one, which means that keypoints1[i]
+has a corresponding point in keypoints2[matches[i]] .
+@param outImg Output image. Its content depends on the flags value defining what is drawn in the
+output image. See possible flags bit values below.
+@param matchColor Color of matches (lines and connected keypoints). If matchColor==Scalar::all(-1)
+, the color is generated randomly.
+@param singlePointColor Color of single keypoints (circles), which means that keypoints do not
+have the matches. If singlePointColor==Scalar::all(-1) , the color is generated randomly.
+@param matchesMask Mask determining which matches are drawn. If the mask is empty, all matches are
+drawn.
+@param flags Flags setting drawing features. Possible flags bit values are defined by
+DrawMatchesFlags.
+
+This function draws matches of keypoints from two images in the output image. Match is a line
+connecting two keypoints (circles). See cv::DrawMatchesFlags.
+ */
 CV_EXPORTS_W void drawMatches( InputArray img1, const std::vector<KeyPoint>& keypoints1,
                             InputArray img2, const std::vector<KeyPoint>& keypoints2,
                             const std::vector<DMatch>& matches1to2, InputOutputArray outImg,
                             const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1),
                             const std::vector<char>& matchesMask=std::vector<char>(), int flags=DrawMatchesFlags::DEFAULT );

+/** @overload */
 CV_EXPORTS_AS(drawMatchesKnn) void drawMatches( InputArray img1, const std::vector<KeyPoint>& keypoints1,
                             InputArray img2, const std::vector<KeyPoint>& keypoints2,
                             const std::vector<std::vector<DMatch> >& matches1to2, InputOutputArray outImg,
                             const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1),
                             const std::vector<std::vector<char> >& matchesMask=std::vector<std::vector<char> >(), int flags=DrawMatchesFlags::DEFAULT );

+//! @} features2d_draw
+
 /****************************************************************************************\
 *   Functions to evaluate the feature detectors and [generic] descriptor extractors      *
 \****************************************************************************************/
@ -755,8 +1116,14 @@ CV_EXPORTS int getNearestPoint( const std::vector<Point2f>& recallPrecisionCurve
 /****************************************************************************************\
 *                                     Bag of visual words                                *
 \****************************************************************************************/
-/*
- * Abstract base class for training of a 'bag of visual words' vocabulary from a set of descriptors
+
+//! @addtogroup features2d_category
+//! @{
+
+/** @brief Abstract base class for training the *bag of visual words* vocabulary from a set of descriptors.
+
+For details, see, for example, *Visual Categorization with Bags of Keypoints* by Gabriella Csurka,
+Christopher R. Dance, Lixin Fan, Jutta Willamowski, Cedric Bray, 2004. :
 */
 class CV_EXPORTS_W BOWTrainer
 {
@ -764,20 +1131,37 @@ public:
    BOWTrainer();
    virtual ~BOWTrainer();

+    /** @brief Adds descriptors to a training set.
+
+    @param descriptors Descriptors to add to a training set. Each row of the descriptors matrix is a
+    descriptor.
+
+    The training set is clustered using clustermethod to construct the vocabulary.
+     */
    CV_WRAP void add( const Mat& descriptors );
+
+    /** @brief Returns a training set of descriptors.
+    */
    CV_WRAP const std::vector<Mat>& getDescriptors() const;
+
+    /** @brief Returns the count of all descriptors stored in the training set.
+    */
    CV_WRAP int descriptorsCount() const;

    CV_WRAP virtual void clear();

-    /*
-     * Train visual words vocabulary, that is cluster training descriptors and
-     * compute cluster centers.
-     * Returns cluster centers.
-     *
-     * descriptors      Training descriptors computed on images keypoints.
-     */
+    /** @overload */
    CV_WRAP virtual Mat cluster() const = 0;
+
+    /** @brief Clusters train descriptors.
+
+    @param descriptors Descriptors to cluster. Each row of the descriptors matrix is a descriptor.
+    Descriptors are not added to the inner train descriptor set.
+
+    The vocabulary consists of cluster centers. So, this method returns the vocabulary. In the first
+    variant of the method, train descriptors stored in the object are clustered. In the second variant,
+    input descriptors are clustered.
+     */
    CV_WRAP virtual Mat cluster( const Mat& descriptors ) const = 0;

 protected:
@ -785,12 +1169,15 @@ protected:
    int size;
 };

-/*
- * This is BOWTrainer using cv::kmeans to get vocabulary.
+/** @brief kmeans -based class to train visual vocabulary using the *bag of visual words* approach. :
 */
 class CV_EXPORTS_W BOWKMeansTrainer : public BOWTrainer
 {
 public:
+    /** @brief The constructor.
+
+    @see cv::kmeans
+    */
    CV_WRAP BOWKMeansTrainer( int clusterCount, const TermCriteria& termcrit=TermCriteria(),
                      int attempts=3, int flags=KMEANS_PP_CENTERS );
    virtual ~BOWKMeansTrainer();
@ -807,21 +1194,62 @@ protected:
    int flags;
 };

-/*
- * Class to compute image descriptor using bag of visual words.
+/** @brief Class to compute an image descriptor using the *bag of visual words*.
+
+Such a computation consists of the following steps:
+
+1.  Compute descriptors for a given image and its keypoints set.
+2.  Find the nearest visual words from the vocabulary for each keypoint descriptor.
+3.  Compute the bag-of-words image descriptor as is a normalized histogram of vocabulary words
+encountered in the image. The i-th bin of the histogram is a frequency of i-th word of the
+vocabulary in the given image.
 */
 class CV_EXPORTS_W BOWImgDescriptorExtractor
 {
 public:
+    /** @brief The constructor.
+
+    @param dextractor Descriptor extractor that is used to compute descriptors for an input image and
+    its keypoints.
+    @param dmatcher Descriptor matcher that is used to find the nearest word of the trained vocabulary
+    for each keypoint descriptor of the image.
+     */
    CV_WRAP BOWImgDescriptorExtractor( const Ptr<DescriptorExtractor>& dextractor,
                               const Ptr<DescriptorMatcher>& dmatcher );
+    /** @overload */
    BOWImgDescriptorExtractor( const Ptr<DescriptorMatcher>& dmatcher );
    virtual ~BOWImgDescriptorExtractor();

+    /** @brief Sets a visual vocabulary.
+
+    @param vocabulary Vocabulary (can be trained using the inheritor of BOWTrainer ). Each row of the
+    vocabulary is a visual word (cluster center).
+     */
    CV_WRAP void setVocabulary( const Mat& vocabulary );
+
+    /** @brief Returns the set vocabulary.
+    */
    CV_WRAP const Mat& getVocabulary() const;
+
+    /** @brief Computes an image descriptor using the set visual vocabulary.
+
+    @param image Image, for which the descriptor is computed.
+    @param keypoints Keypoints detected in the input image.
+    @param imgDescriptor Computed output image descriptor.
+    @param pointIdxsOfClusters Indices of keypoints that belong to the cluster. This means that
+    pointIdxsOfClusters[i] are keypoint indices that belong to the i -th cluster (word of vocabulary)
+    returned if it is non-zero.
+    @param descriptors Descriptors of the image keypoints that are returned if they are non-zero.
+     */
    void compute( InputArray image, std::vector<KeyPoint>& keypoints, OutputArray imgDescriptor,
                  std::vector<std::vector<int> >* pointIdxsOfClusters=0, Mat* descriptors=0 );
+    /** @overload
+    @param keypointDescriptors Computed descriptors to match with vocabulary.
+    @param imgDescriptor Computed output image descriptor.
+    @param pointIdxsOfClusters Indices of keypoints that belong to the cluster. This means that
+    pointIdxsOfClusters[i] are keypoint indices that belong to the i -th cluster (word of vocabulary)
+    returned if it is non-zero.
+    */
    void compute( InputArray keypointDescriptors, OutputArray imgDescriptor,
                  std::vector<std::vector<int> >* pointIdxsOfClusters=0 );
    // compute() is not constant because DescriptorMatcher::match is not constant
@ -829,7 +1257,12 @@ public:
    CV_WRAP_AS(compute) void compute2( const Mat& image, std::vector<KeyPoint>& keypoints, CV_OUT Mat& imgDescriptor )
    { compute(image,keypoints,imgDescriptor); }

+    /** @brief Returns an image descriptor size if the vocabulary is set. Otherwise, it returns 0.
+    */
    CV_WRAP int descriptorSize() const;
+
+    /** @brief Returns an image descriptor type.
+     */
    CV_WRAP int descriptorType() const;

 protected:
@ -838,6 +1271,10 @@ protected:
    Ptr<DescriptorMatcher> dmatcher;
 };

+//! @} features2d_category
+
+//! @} features2d
+
 } /* namespace cv */

 #endif
--- a/modules/flann/include/opencv2/flann.hpp
+++ b/modules/flann/include/opencv2/flann.hpp
@ -47,6 +47,15 @@
 #include "opencv2/flann/miniflann.hpp"
 #include "opencv2/flann/flann_base.hpp"

+/**
+@defgroup flann Clustering and Search in Multi-Dimensional Spaces
+
+This section documents OpenCV's interface to the FLANN library. FLANN (Fast Library for Approximate
+Nearest Neighbors) is a library that contains a collection of algorithms optimized for fast nearest
+neighbor search in large datasets and for high dimensional features. More information about FLANN
+can be found in @cite Muja2009 .
+*/
+
 namespace cvflann
 {
    CV_EXPORTS flann_distance_t flann_distance_type();
@ -59,6 +68,10 @@ namespace cv
 namespace flann
 {

+
+//! @addtogroup flann
+//! @{
+
 template <typename T> struct CvType {};
 template <> struct CvType<unsigned char> { static int type() { return CV_8U; } };
 template <> struct CvType<char> { static int type() { return CV_8S; } };
@ -88,7 +101,9 @@ using ::cvflann::ChiSquareDistance;
 using ::cvflann::KL_Divergence;


-
+/** @brief The FLANN nearest neighbor index class. This class is templated with the type of elements for which
+the index is built.
+ */
 template <typename Distance>
 class GenericIndex
 {
@ -96,10 +111,108 @@ public:
        typedef typename Distance::ElementType ElementType;
        typedef typename Distance::ResultType DistanceType;

+        /** @brief Constructs a nearest neighbor search index for a given dataset.
+
+        @param features Matrix of containing the features(points) to index. The size of the matrix is
+        num_features x feature_dimensionality and the data type of the elements in the matrix must
+        coincide with the type of the index.
+        @param params Structure containing the index parameters. The type of index that will be
+        constructed depends on the type of this parameter. See the description.
+        @param distance
+
+        The method constructs a fast search structure from a set of features using the specified algorithm
+        with specified parameters, as defined by params. params is a reference to one of the following class
+        IndexParams descendants:
+
+        - **LinearIndexParams** When passing an object of this type, the index will perform a linear,
+        brute-force search. :
+        @code
+        struct LinearIndexParams : public IndexParams
+        {
+        };
+        @endcode
+        - **KDTreeIndexParams** When passing an object of this type the index constructed will consist of
+        a set of randomized kd-trees which will be searched in parallel. :
+        @code
+        struct KDTreeIndexParams : public IndexParams
+        {
+            KDTreeIndexParams( int trees = 4 );
+        };
+        @endcode
+        - **KMeansIndexParams** When passing an object of this type the index constructed will be a
+        hierarchical k-means tree. :
+        @code
+        struct KMeansIndexParams : public IndexParams
+        {
+            KMeansIndexParams(
+                int branching = 32,
+                int iterations = 11,
+                flann_centers_init_t centers_init = CENTERS_RANDOM,
+                float cb_index = 0.2 );
+        };
+        @endcode
+        - **CompositeIndexParams** When using a parameters object of this type the index created
+        combines the randomized kd-trees and the hierarchical k-means tree. :
+        @code
+        struct CompositeIndexParams : public IndexParams
+        {
+            CompositeIndexParams(
+                int trees = 4,
+                int branching = 32,
+                int iterations = 11,
+                flann_centers_init_t centers_init = CENTERS_RANDOM,
+                float cb_index = 0.2 );
+        };
+        @endcode
+        - **LshIndexParams** When using a parameters object of this type the index created uses
+        multi-probe LSH (by Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity Search
+        by Qin Lv, William Josephson, Zhe Wang, Moses Charikar, Kai Li., Proceedings of the 33rd
+        International Conference on Very Large Data Bases (VLDB). Vienna, Austria. September 2007) :
+        @code
+        struct LshIndexParams : public IndexParams
+        {
+            LshIndexParams(
+                unsigned int table_number,
+                unsigned int key_size,
+                unsigned int multi_probe_level );
+        };
+        @endcode
+        - **AutotunedIndexParams** When passing an object of this type the index created is
+        automatically tuned to offer the best performance, by choosing the optimal index type
+        (randomized kd-trees, hierarchical kmeans, linear) and parameters for the dataset provided. :
+        @code
+        struct AutotunedIndexParams : public IndexParams
+        {
+            AutotunedIndexParams(
+                float target_precision = 0.9,
+                float build_weight = 0.01,
+                float memory_weight = 0,
+                float sample_fraction = 0.1 );
+        };
+        @endcode
+        - **SavedIndexParams** This object type is used for loading a previously saved index from the
+        disk. :
+        @code
+        struct SavedIndexParams : public IndexParams
+        {
+            SavedIndexParams( String filename );
+        };
+        @endcode
+         */
        GenericIndex(const Mat& features, const ::cvflann::IndexParams& params, Distance distance = Distance());

        ~GenericIndex();

+        /** @brief Performs a K-nearest neighbor search for a given query point using the index.
+
+        @param query The query point
+        @param indices Vector that will contain the indices of the K-nearest neighbors found. It must have
+        at least knn size.
+        @param dists Vector that will contain the distances to the K-nearest neighbors found. It must have
+        at least knn size.
+        @param knn Number of nearest neighbors to search for.
+        @param params SearchParams
+         */
        void knnSearch(const std::vector<ElementType>& query, std::vector<int>& indices,
                       std::vector<DistanceType>& dists, int knn, const ::cvflann::SearchParams& params);
        void knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& params);
@ -123,6 +236,7 @@ private:
        ::cvflann::Index<Distance>* nnIndex;
 };

+//! @cond IGNORED

 #define FLANN_DISTANCE_CHECK \
    if ( ::cvflann::flann_distance_type() != cvflann::FLANN_DIST_L2) { \
@ -218,6 +332,8 @@ int GenericIndex<Distance>::radiusSearch(const Mat& query, Mat& indices, Mat& di
    return nnIndex->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
 }

+//! @endcond
+
 /**
 * @deprecated Use GenericIndex class instead
 */
@ -283,6 +399,8 @@ template <typename T>
 class FLANN_DEPRECATED Index_;
 #endif

+//! @cond IGNORED
+
 template <typename T>
 Index_<T>::Index_(const Mat& dataset, const ::cvflann::IndexParams& params)
 {
@ -377,7 +495,25 @@ int Index_<T>::radiusSearch(const Mat& query, Mat& indices, Mat& dists, Distance
    if (nnIndex_L2) return nnIndex_L2->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
 }

+//! @endcond
+
+/** @brief Clusters features using hierarchical k-means algorithm.
+
+@param features The points to be clustered. The matrix must have elements of type
+Distance::ElementType.
+@param centers The centers of the clusters obtained. The matrix must have type
+Distance::ResultType. The number of rows in this matrix represents the number of clusters desired,
+however, because of the way the cut in the hierarchical tree is chosen, the number of clusters
+computed will be the highest number of the form (branching-1)\*k+1 that's lower than the number of
+clusters desired, where branching is the tree's branching factor (see description of the
+KMeansIndexParams).
+@param params Parameters used in the construction of the hierarchical k-means tree.
+@param d Distance to be used for clustering.

+The method clusters the given feature vectors by constructing a hierarchical k-means tree and
+choosing a cut in the tree that minimizes the cluster's variance. It returns the number of clusters
+found.
+ */
 template <typename Distance>
 int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::KMeansIndexParams& params,
                           Distance d = Distance())
@ -396,7 +532,8 @@ int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::K
    return ::cvflann::hierarchicalClustering<Distance>(m_features, m_centers, params, d);
 }

-
+/** @deprecated
+*/
 template <typename ELEM_TYPE, typename DIST_TYPE>
 FLANN_DEPRECATED int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::KMeansIndexParams& params)
 {
@ -417,6 +554,8 @@ FLANN_DEPRECATED int hierarchicalClustering(const Mat& features, Mat& centers, c
    }
 }

+//! @} flann
+
 } } // namespace cv::flann

 #endif
--- a/modules/highgui/include/opencv2/highgui.hpp
+++ b/modules/highgui/include/opencv2/highgui.hpp
@ -47,11 +47,92 @@
 #include "opencv2/imgcodecs.hpp"
 #include "opencv2/videoio.hpp"

+/**
+@defgroup highgui High-level GUI
+
+While OpenCV was designed for use in full-scale applications and can be used within functionally
+rich UI frameworks (such as Qt\*, WinForms\*, or Cocoa\*) or without any UI at all, sometimes there
+it is required to try functionality quickly and visualize the results. This is what the HighGUI
+module has been designed for.
+
+It provides easy interface to:
+
+-   Create and manipulate windows that can display images and "remember" their content (no need to
+    handle repaint events from OS).
+-   Add trackbars to the windows, handle simple mouse events as well as keyboard commands.
+
+@{
+    @defgroup highgui_opengl OpenGL support
+    @defgroup highgui_qt Qt New Functions
+
+    ![image](pics/qtgui.png)
+
+    This figure explains new functionality implemented with Qt\* GUI. The new GUI provides a statusbar,
+    a toolbar, and a control panel. The control panel can have trackbars and buttonbars attached to it.
+    If you cannot see the control panel, press Ctrl+P or right-click any Qt window and select **Display
+    properties window**.
+
+    -   To attach a trackbar, the window name parameter must be NULL.
+
+    -   To attach a buttonbar, a button must be created. If the last bar attached to the control panel
+        is a buttonbar, the new button is added to the right of the last button. If the last bar
+        attached to the control panel is a trackbar, or the control panel is empty, a new buttonbar is
+        created. Then, a new button is attached to it.
+
+    See below the example used to generate the figure: :
+    @code
+        int main(int argc, char *argv[])
+            int value = 50;
+            int value2 = 0;
+
+            cvNamedWindow("main1",CV_WINDOW_NORMAL);
+            cvNamedWindow("main2",CV_WINDOW_AUTOSIZE | CV_GUI_NORMAL);
+
+            cvCreateTrackbar( "track1", "main1", &value, 255,  NULL);//OK tested
+            char* nameb1 = "button1";
+            char* nameb2 = "button2";
+            cvCreateButton(nameb1,callbackButton,nameb1,CV_CHECKBOX,1);
+
+            cvCreateButton(nameb2,callbackButton,nameb2,CV_CHECKBOX,0);
+            cvCreateTrackbar( "track2", NULL, &value2, 255, NULL);
+            cvCreateButton("button5",callbackButton1,NULL,CV_RADIOBOX,0);
+            cvCreateButton("button6",callbackButton2,NULL,CV_RADIOBOX,1);
+
+            cvSetMouseCallback( "main2",on_mouse,NULL );
+
+            IplImage* img1 = cvLoadImage("files/flower.jpg");
+            IplImage* img2 = cvCreateImage(cvGetSize(img1),8,3);
+            CvCapture* video = cvCaptureFromFile("files/hockey.avi");
+            IplImage* img3 = cvCreateImage(cvGetSize(cvQueryFrame(video)),8,3);
+
+            while(cvWaitKey(33) != 27)
+            {
+                cvAddS(img1,cvScalarAll(value),img2);
+                cvAddS(cvQueryFrame(video),cvScalarAll(value2),img3);
+                cvShowImage("main1",img2);
+                cvShowImage("main2",img3);
+            }
+
+            cvDestroyAllWindows();
+            cvReleaseImage(&img1);
+            cvReleaseImage(&img2);
+            cvReleaseImage(&img3);
+            cvReleaseCapture(&video);
+            return 0;
+        }
+    @endcode
+
+    @defgroup highgui_c C API
+@}
+*/

 ///////////////////////// graphical user interface //////////////////////////
 namespace cv
 {

+//! @addtogroup highgui
+//! @{
+
 // Flags for namedWindow
 enum { WINDOW_NORMAL     = 0x00000000, // the user can resize the window (no constraint) / also use to switch a fullscreen window to a normal size
       WINDOW_AUTOSIZE   = 0x00000001, // the user cannot resize the window, the size is constrainted by the image displayed
@ -117,54 +198,334 @@ typedef void (*TrackbarCallback)(int pos, void* userdata);
 typedef void (*OpenGlDrawCallback)(void* userdata);
 typedef void (*ButtonCallback)(int state, void* userdata);

+/** @brief Creates a window.
+
+@param winname Name of the window in the window caption that may be used as a window identifier.
+@param flags Flags of the window. The supported flags are:
+> -   **WINDOW_NORMAL** If this is set, the user can resize the window (no constraint).
+> -   **WINDOW_AUTOSIZE** If this is set, the window size is automatically adjusted to fit the
+>     displayed image (see imshow ), and you cannot change the window size manually.
+> -   **WINDOW_OPENGL** If this is set, the window will be created with OpenGL support.
+
+The function namedWindow creates a window that can be used as a placeholder for images and
+trackbars. Created windows are referred to by their names.
+
+If a window with the same name already exists, the function does nothing.
+
+You can call destroyWindow or destroyAllWindows to close the window and de-allocate any associated
+memory usage. For a simple program, you do not really have to call these functions because all the
+resources and windows of the application are closed automatically by the operating system upon exit.

+@note
+
+Qt backend supports additional flags:
+ -   **CV_WINDOW_NORMAL or CV_WINDOW_AUTOSIZE:** CV_WINDOW_NORMAL enables you to resize the
+     window, whereas CV_WINDOW_AUTOSIZE adjusts automatically the window size to fit the
+     displayed image (see imshow ), and you cannot change the window size manually.
+ -   **CV_WINDOW_FREERATIO or CV_WINDOW_KEEPRATIO:** CV_WINDOW_FREERATIO adjusts the image
+     with no respect to its ratio, whereas CV_WINDOW_KEEPRATIO keeps the image ratio.
+ -   **CV_GUI_NORMAL or CV_GUI_EXPANDED:** CV_GUI_NORMAL is the old way to draw the window
+     without statusbar and toolbar, whereas CV_GUI_EXPANDED is a new enhanced GUI.
+By default, flags == CV_WINDOW_AUTOSIZE | CV_WINDOW_KEEPRATIO | CV_GUI_EXPANDED
+ */
 CV_EXPORTS_W void namedWindow(const String& winname, int flags = WINDOW_AUTOSIZE);

+/** @brief Destroys a window.
+
+@param winname Name of the window to be destroyed.
+
+The function destroyWindow destroys the window with the given name.
+ */
 CV_EXPORTS_W void destroyWindow(const String& winname);

+/** @brief Destroys all of the HighGUI windows.
+
+The function destroyAllWindows destroys all of the opened HighGUI windows.
+ */
 CV_EXPORTS_W void destroyAllWindows();

 CV_EXPORTS_W int startWindowThread();

+/** @brief Waits for a pressed key.
+
+@param delay Delay in milliseconds. 0 is the special value that means "forever".
+
+The function waitKey waits for a key event infinitely (when \f$\texttt{delay}\leq 0\f$ ) or for delay
+milliseconds, when it is positive. Since the OS has a minimum time between switching threads, the
+function will not wait exactly delay ms, it will wait at least delay ms, depending on what else is
+running on your computer at that time. It returns the code of the pressed key or -1 if no key was
+pressed before the specified time had elapsed.
+
+@note
+
+This function is the only method in HighGUI that can fetch and handle events, so it needs to be
+called periodically for normal event processing unless HighGUI is used within an environment that
+takes care of event processing.
+
+@note
+
+The function only works if there is at least one HighGUI window created and the window is active.
+If there are several HighGUI windows, any of them can be active.
+ */
 CV_EXPORTS_W int waitKey(int delay = 0);

+/** @brief Displays an image in the specified window.
+
+@param winname Name of the window.
+@param mat Image to be shown.
+
+The function imshow displays an image in the specified window. If the window was created with the
+CV_WINDOW_AUTOSIZE flag, the image is shown with its original size. Otherwise, the image is scaled
+to fit the window. The function may scale the image, depending on its depth:
+
+-   If the image is 8-bit unsigned, it is displayed as is.
+-   If the image is 16-bit unsigned or 32-bit integer, the pixels are divided by 256. That is, the
+    value range [0,255\*256] is mapped to [0,255].
+-   If the image is 32-bit floating-point, the pixel values are multiplied by 255. That is, the
+    value range [0,1] is mapped to [0,255].
+
+If window was created with OpenGL support, imshow also support ogl::Buffer , ogl::Texture2D and
+cuda::GpuMat as input.
+
+@note This function should be followed by waitKey function which displays the image for specified
+milliseconds. Otherwise, it won't display the image. For example, waitKey(0) will display the window
+infinitely until any keypress (it is suitable for image display). waitKey(25) will display a frame
+for 25 ms, after which display will be automatically closed. (If you put it in a loop to read
+videos, it will display the video frame-by-frame)
+
+@note
+
+[Windows Backend Only] Pressing Ctrl+C will copy the image to the clipboard.
+
+ */
 CV_EXPORTS_W void imshow(const String& winname, InputArray mat);

+/** @brief Resizes window to the specified size
+
+@param winname Window name
+@param width The new window width
+@param height The new window height
+
+@note
+
+-   The specified window size is for the image area. Toolbars are not counted.
+-   Only windows created without CV_WINDOW_AUTOSIZE flag can be resized.
+ */
 CV_EXPORTS_W void resizeWindow(const String& winname, int width, int height);

+/** @brief Moves window to the specified position
+
+@param winname Window name
+@param x The new x-coordinate of the window
+@param y The new y-coordinate of the window
+ */
 CV_EXPORTS_W void moveWindow(const String& winname, int x, int y);

+/** @brief Changes parameters of a window dynamically.
+
+@param winname Name of the window.
+@param prop_id Window property to edit. The following operation flags are available:
+ -   **CV_WND_PROP_FULLSCREEN** Change if the window is fullscreen ( CV_WINDOW_NORMAL or
+     CV_WINDOW_FULLSCREEN ).
+ -   **CV_WND_PROP_AUTOSIZE** Change if the window is resizable (CV_WINDOW_NORMAL or
+     CV_WINDOW_AUTOSIZE ).
+ -   **CV_WND_PROP_ASPECTRATIO** Change if the aspect ratio of the image is preserved (
+     CV_WINDOW_FREERATIO or CV_WINDOW_KEEPRATIO ).
+@param prop_value New value of the window property. The following operation flags are available:
+ -   **CV_WINDOW_NORMAL** Change the window to normal size or make the window resizable.
+ -   **CV_WINDOW_AUTOSIZE** Constrain the size by the displayed image. The window is not
+     resizable.
+ -   **CV_WINDOW_FULLSCREEN** Change the window to fullscreen.
+ -   **CV_WINDOW_FREERATIO** Make the window resizable without any ratio constraints.
+ -   **CV_WINDOW_KEEPRATIO** Make the window resizable, but preserve the proportions of the
+     displayed image.
+
+The function setWindowProperty enables changing properties of a window.
+ */
 CV_EXPORTS_W void setWindowProperty(const String& winname, int prop_id, double prop_value);

+/** @brief Updates window title
+*/
 CV_EXPORTS_W void setWindowTitle(const String& winname, const String& title);

+/** @brief Provides parameters of a window.
+
+@param winname Name of the window.
+@param prop_id Window property to retrieve. The following operation flags are available:
+ -   **CV_WND_PROP_FULLSCREEN** Change if the window is fullscreen ( CV_WINDOW_NORMAL or
+     CV_WINDOW_FULLSCREEN ).
+ -   **CV_WND_PROP_AUTOSIZE** Change if the window is resizable (CV_WINDOW_NORMAL or
+     CV_WINDOW_AUTOSIZE ).
+ -   **CV_WND_PROP_ASPECTRATIO** Change if the aspect ratio of the image is preserved
+     (CV_WINDOW_FREERATIO or CV_WINDOW_KEEPRATIO ).
+
+See setWindowProperty to know the meaning of the returned values.
+
+The function getWindowProperty returns properties of a window.
+ */
 CV_EXPORTS_W double getWindowProperty(const String& winname, int prop_id);

-//! assigns callback for mouse events
+/** @brief Sets mouse handler for the specified window
+
+@param winname Window name
+@param onMouse Mouse callback. See OpenCV samples, such as
+<https://github.com/Itseez/opencv/tree/master/samples/cpp/ffilldemo.cpp>, on how to specify and
+use the callback.
+@param userdata The optional parameter passed to the callback.
+ */
 CV_EXPORTS void setMouseCallback(const String& winname, MouseCallback onMouse, void* userdata = 0);

+/** @brief Gets the mouse-wheel motion delta, when handling mouse-wheel events EVENT_MOUSEWHEEL and
+EVENT_MOUSEHWHEEL.
+
+@param flags The mouse callback flags parameter.
+
+For regular mice with a scroll-wheel, delta will be a multiple of 120. The value 120 corresponds to
+a one notch rotation of the wheel or the threshold for action to be taken and one such action should
+occur for each delta. Some high-precision mice with higher-resolution freely-rotating wheels may
+generate smaller values.
+
+For EVENT_MOUSEWHEEL positive and negative values mean forward and backward scrolling,
+respectively. For EVENT_MOUSEHWHEEL, where available, positive and negative values mean right and
+left scrolling, respectively.
+
+With the C API, the macro CV_GET_WHEEL_DELTA(flags) can be used alternatively.
+
+@note
+
+Mouse-wheel events are currently supported only on Windows.
+ */
 CV_EXPORTS int getMouseWheelDelta(int flags);

+/** @brief Creates a trackbar and attaches it to the specified window.
+
+@param trackbarname Name of the created trackbar.
+@param winname Name of the window that will be used as a parent of the created trackbar.
+@param value Optional pointer to an integer variable whose value reflects the position of the
+slider. Upon creation, the slider position is defined by this variable.
+@param count Maximal position of the slider. The minimal position is always 0.
+@param onChange Pointer to the function to be called every time the slider changes position. This
+function should be prototyped as void Foo(int,void\*); , where the first parameter is the trackbar
+position and the second parameter is the user data (see the next parameter). If the callback is
+the NULL pointer, no callbacks are called, but only value is updated.
+@param userdata User data that is passed as is to the callback. It can be used to handle trackbar
+events without using global variables.
+
+The function createTrackbar creates a trackbar (a slider or range control) with the specified name
+and range, assigns a variable value to be a position synchronized with the trackbar and specifies
+the callback function onChange to be called on the trackbar position change. The created trackbar is
+displayed in the specified window winname.
+
+@note
+
+**[Qt Backend Only]** winname can be empty (or NULL) if the trackbar should be attached to the
+control panel.
+
+Clicking the label of each trackbar enables editing the trackbar values manually.
+
+@note
+
+-   An example of using the trackbar functionality can be found at
+    opencv_source_code/samples/cpp/connected_components.cpp
+ */
 CV_EXPORTS int createTrackbar(const String& trackbarname, const String& winname,
                              int* value, int count,
                              TrackbarCallback onChange = 0,
                              void* userdata = 0);

+/** @brief Returns the trackbar position.
+
+@param trackbarname Name of the trackbar.
+@param winname Name of the window that is the parent of the trackbar.
+
+The function returns the current position of the specified trackbar.
+
+@note
+
+**[Qt Backend Only]** winname can be empty (or NULL) if the trackbar is attached to the control
+panel.
+
+ */
 CV_EXPORTS_W int getTrackbarPos(const String& trackbarname, const String& winname);

+/** @brief Sets the trackbar position.
+
+@param trackbarname Name of the trackbar.
+@param winname Name of the window that is the parent of trackbar.
+@param pos New position.
+
+The function sets the position of the specified trackbar in the specified window.
+
+@note
+
+**[Qt Backend Only]** winname can be empty (or NULL) if the trackbar is attached to the control
+panel.
+ */
 CV_EXPORTS_W void setTrackbarPos(const String& trackbarname, const String& winname, int pos);

+//! @addtogroup highgui_opengl OpenGL support
+//! @{

-// OpenGL support
 CV_EXPORTS void imshow(const String& winname, const ogl::Texture2D& tex);

+/** @brief Sets a callback function to be called to draw on top of displayed image.
+
+@param winname Name of the window.
+@param onOpenGlDraw Pointer to the function to be called every frame. This function should be
+prototyped as void Foo(void\*) .
+@param userdata Pointer passed to the callback function. *(Optional)*
+
+The function setOpenGlDrawCallback can be used to draw 3D data on the window. See the example of
+callback function below: :
+@code
+    void on_opengl(void* param)
+    {
+        glLoadIdentity();
+
+        glTranslated(0.0, 0.0, -1.0);
+
+        glRotatef( 55, 1, 0, 0 );
+        glRotatef( 45, 0, 1, 0 );
+        glRotatef( 0, 0, 0, 1 );
+
+        static const int coords[6][4][3] = {
+            { { +1, -1, -1 }, { -1, -1, -1 }, { -1, +1, -1 }, { +1, +1, -1 } },
+            { { +1, +1, -1 }, { -1, +1, -1 }, { -1, +1, +1 }, { +1, +1, +1 } },
+            { { +1, -1, +1 }, { +1, -1, -1 }, { +1, +1, -1 }, { +1, +1, +1 } },
+            { { -1, -1, -1 }, { -1, -1, +1 }, { -1, +1, +1 }, { -1, +1, -1 } },
+            { { +1, -1, +1 }, { -1, -1, +1 }, { -1, -1, -1 }, { +1, -1, -1 } },
+            { { -1, -1, +1 }, { +1, -1, +1 }, { +1, +1, +1 }, { -1, +1, +1 } }
+        };
+
+        for (int i = 0; i < 6; ++i) {
+                    glColor3ub( i*20, 100+i*10, i*42 );
+                    glBegin(GL_QUADS);
+                    for (int j = 0; j < 4; ++j) {
+                            glVertex3d(0.2 * coords[i][j][0], 0.2 * coords[i][j][1], 0.2 * coords[i][j][2]);
+                    }
+                    glEnd();
+        }
+    }
+@endcode
+ */
 CV_EXPORTS void setOpenGlDrawCallback(const String& winname, OpenGlDrawCallback onOpenGlDraw, void* userdata = 0);

+/** @brief Sets the specified window as current OpenGL context.
+
+@param winname Window name
+ */
 CV_EXPORTS void setOpenGlContext(const String& winname);

+/** @brief Force window to redraw its context and call draw callback ( setOpenGlDrawCallback ).
+
+@param winname Window name
+ */
 CV_EXPORTS void updateWindow(const String& winname);

+//! @} highgui_opengl

+//! @addtogroup highgui_qt
+//! @{
 // Only for Qt

 struct QtFont
@ -182,27 +543,138 @@ struct QtFont
    int         line_type; // Qt: PointSize
 };

+/** @brief Creates the font to draw a text on an image.
+
+@param nameFont Name of the font. The name should match the name of a system font (such as
+*Times*). If the font is not found, a default one is used.
+@param pointSize Size of the font. If not specified, equal zero or negative, the point size of the
+font is set to a system-dependent default value. Generally, this is 12 points.
+@param color Color of the font in BGRA where A = 255 is fully transparent. Use the macro CV _ RGB
+for simplicity.
+@param weight Font weight. The following operation flags are available:
+ -   **CV_FONT_LIGHT** Weight of 25
+ -   **CV_FONT_NORMAL** Weight of 50
+ -   **CV_FONT_DEMIBOLD** Weight of 63
+ -   **CV_FONT_BOLD** Weight of 75
+ -   **CV_FONT_BLACK** Weight of 87
+
+ You can also specify a positive integer for better control.
+@param style Font style. The following operation flags are available:
+ -   **CV_STYLE_NORMAL** Normal font
+ -   **CV_STYLE_ITALIC** Italic font
+ -   **CV_STYLE_OBLIQUE** Oblique font
+@param spacing Spacing between characters. It can be negative or positive.
+
+The function fontQt creates a CvFont object. This CvFont is not compatible with putText .
+
+A basic usage of this function is the following: :
+@code
+    CvFont font = fontQt(''Times'');
+    addText( img1, ``Hello World !'', Point(50,50), font);
+@endcode
+ */
 CV_EXPORTS QtFont fontQt(const String& nameFont, int pointSize = -1,
                         Scalar color = Scalar::all(0), int weight = QT_FONT_NORMAL,
                         int style = QT_STYLE_NORMAL, int spacing = 0);

+/** @brief Creates the font to draw a text on an image.
+
+@param img 8-bit 3-channel image where the text should be drawn.
+@param text Text to write on an image.
+@param org Point(x,y) where the text should start on an image.
+@param font Font to use to draw a text.
+
+The function addText draws *text* on an image *img* using a specific font *font* (see example fontQt
+)
+ */
 CV_EXPORTS void addText( const Mat& img, const String& text, Point org, const QtFont& font);

+/** @brief Displays a text on a window image as an overlay for a specified duration.
+
+@param winname Name of the window.
+@param text Overlay text to write on a window image.
+@param delayms The period (in milliseconds), during which the overlay text is displayed. If this
+function is called before the previous overlay text timed out, the timer is restarted and the text
+is updated. If this value is zero, the text never disappears.
+
+The function displayOverlay displays useful information/tips on top of the window for a certain
+amount of time *delayms*. The function does not modify the image, displayed in the window, that is,
+after the specified delay the original content of the window is restored.
+ */
 CV_EXPORTS void displayOverlay(const String& winname, const String& text, int delayms = 0);

+/** @brief Displays a text on the window statusbar during the specified period of time.
+
+@param winname Name of the window.
+@param text Text to write on the window statusbar.
+@param delayms Duration (in milliseconds) to display the text. If this function is called before
+the previous text timed out, the timer is restarted and the text is updated. If this value is
+zero, the text never disappears.
+
+The function displayOverlay displays useful information/tips on top of the window for a certain
+amount of time *delayms* . This information is displayed on the window statusbar (the window must be
+created with the CV_GUI_EXPANDED flags).
+ */
 CV_EXPORTS void displayStatusBar(const String& winname, const String& text, int delayms = 0);

+/** @brief Saves parameters of the specified window.
+
+@param windowName Name of the window.
+
+The function saveWindowParameters saves size, location, flags, trackbars value, zoom and panning
+location of the window window_name .
+ */
 CV_EXPORTS void saveWindowParameters(const String& windowName);

+/** @brief Loads parameters of the specified window.
+
+@param windowName Name of the window.
+
+The function loadWindowParameters loads size, location, flags, trackbars value, zoom and panning
+location of the window window_name .
+ */
 CV_EXPORTS void loadWindowParameters(const String& windowName);

 CV_EXPORTS  int startLoop(int (*pt2Func)(int argc, char *argv[]), int argc, char* argv[]);

 CV_EXPORTS  void stopLoop();

+/** @brief Attaches a button to the control panel.
+
+@param  bar_name
+   Name of the button.
+@param on_change Pointer to the function to be called every time the button changes its state.
+This function should be prototyped as void Foo(int state,\*void); . *state* is the current state
+of the button. It could be -1 for a push button, 0 or 1 for a check/radio box button.
+@param userdata Pointer passed to the callback function.
+@param type Optional type of the button.
+ -   **CV_PUSH_BUTTON** Push button
+ -   **CV_CHECKBOX** Checkbox button
+ -   **CV_RADIOBOX** Radiobox button. The radiobox on the same buttonbar (same line) are
+     exclusive, that is only one can be selected at a time.
+@param initial_button_state Default state of the button. Use for checkbox and radiobox. Its
+value could be 0 or 1. *(Optional)*
+
+The function createButton attaches a button to the control panel. Each button is added to a
+buttonbar to the right of the last button. A new buttonbar is created if nothing was attached to the
+control panel before, or if the last element attached to the control panel was a trackbar.
+
+See below various examples of the createButton function call: :
+@code
+    createButton(NULL,callbackButton);//create a push button "button 0", that will call callbackButton.
+    createButton("button2",callbackButton,NULL,CV_CHECKBOX,0);
+    createButton("button3",callbackButton,&value);
+    createButton("button5",callbackButton1,NULL,CV_RADIOBOX);
+    createButton("button6",callbackButton2,NULL,CV_PUSH_BUTTON,1);
+@endcode
+*/
 CV_EXPORTS int createButton( const String& bar_name, ButtonCallback on_change,
                             void* userdata = 0, int type = QT_PUSH_BUTTON,
                             bool initial_button_state = false);

+//! @} highgui_qt
+
+//! @} highgui
+
 } // cv
 #endif
--- a/modules/highgui/include/opencv2/highgui/highgui_c.h
+++ b/modules/highgui/include/opencv2/highgui/highgui_c.h
@ -51,6 +51,10 @@
 extern "C" {
 #endif /* __cplusplus */

+/** @addtogroup highgui_c
+  @{
+  */
+
 /****************************************************************************************\
 *                                  Basic GUI functions                                   *
 \****************************************************************************************/
@ -237,6 +241,8 @@ CVAPI(void) cvSetPostprocessFuncWin32_(const void* callback);

 #endif

+/** @} highgui_c */
+
 #ifdef __cplusplus
 }
 #endif
--- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp
+++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
@ -45,10 +45,21 @@

 #include "opencv2/core.hpp"

+/**
+  @defgroup imgcodecs Image file reading and writing
+  @{
+    @defgroup imgcodecs_c C API
+    @defgroup imgcodecs_ios iOS glue
+  @}
+*/
+
 //////////////////////////////// image codec ////////////////////////////////
 namespace cv
 {

+//! @addtogroup imgcodecs
+//! @{
+
 enum { IMREAD_UNCHANGED  = -1, // 8bit, color or not
       IMREAD_GRAYSCALE  = 0,  // 8bit, gray
       IMREAD_COLOR      = 1,  // ?, color
@ -77,19 +88,166 @@ enum { IMWRITE_PNG_STRATEGY_DEFAULT      = 0,
       IMWRITE_PNG_STRATEGY_FIXED        = 4
     };

+/** @brief Loads an image from a file.
+
+@param filename Name of file to be loaded.
+@param flags Flags specifying the color type of a loaded image:
+-   CV_LOAD_IMAGE_ANYDEPTH - If set, return 16-bit/32-bit image when the input has the
+    corresponding depth, otherwise convert it to 8-bit.
+-   CV_LOAD_IMAGE_COLOR - If set, always convert image to the color one
+-   CV_LOAD_IMAGE_GRAYSCALE - If set, always convert image to the grayscale one
+-   **\>0** Return a 3-channel color image.
+
+@note In the current implementation the alpha channel, if any, is stripped from the output image.
+Use negative value if you need the alpha channel.
+
+-   **=0** Return a grayscale image.
+-   **\<0** Return the loaded image as is (with alpha channel).
+
+The function imread loads an image from the specified file and returns it. If the image cannot be
+read (because of missing file, improper permissions, unsupported or invalid format), the function
+returns an empty matrix ( Mat::data==NULL ). Currently, the following file formats are supported:
+
+-   Windows bitmaps - \*.bmp, \*.dib (always supported)
+-   JPEG files - \*.jpeg, \*.jpg, \*.jpe (see the *Notes* section)
+-   JPEG 2000 files - \*.jp2 (see the *Notes* section)
+-   Portable Network Graphics - \*.png (see the *Notes* section)
+-   WebP - \*.webp (see the *Notes* section)
+-   Portable image format - \*.pbm, \*.pgm, \*.ppm (always supported)
+-   Sun rasters - \*.sr, \*.ras (always supported)
+-   TIFF files - \*.tiff, \*.tif (see the *Notes* section)
+
+@note
+
+-   The function determines the type of an image by the content, not by the file extension.
+-   On Microsoft Windows\* OS and MacOSX\*, the codecs shipped with an OpenCV image (libjpeg,
+    libpng, libtiff, and libjasper) are used by default. So, OpenCV can always read JPEGs, PNGs,
+    and TIFFs. On MacOSX, there is also an option to use native MacOSX image readers. But beware
+    that currently these native image loaders give images with different pixel values because of
+    the color management embedded into MacOSX.
+-   On Linux\*, BSD flavors and other Unix-like open-source operating systems, OpenCV looks for
+    codecs supplied with an OS image. Install the relevant packages (do not forget the development
+    files, for example, "libjpeg-dev", in Debian\* and Ubuntu\*) to get the codec support or turn
+    on the OPENCV_BUILD_3RDPARTY_LIBS flag in CMake.
+
+@note In the case of color images, the decoded images will have the channels stored in B G R order.
+ */
 CV_EXPORTS_W Mat imread( const String& filename, int flags = IMREAD_COLOR );

+/** @brief Saves an image to a specified file.
+
+@param filename Name of the file.
+@param img Image to be saved.
+@param params Format-specific save parameters encoded as pairs
+paramId_1, paramValue_1, paramId_2, paramValue_2, ... . The following parameters are currently
+supported:
+-   For JPEG, it can be a quality ( CV_IMWRITE_JPEG_QUALITY ) from 0 to 100 (the higher is
+    the better). Default value is 95.
+-   For WEBP, it can be a quality ( CV_IMWRITE_WEBP_QUALITY ) from 1 to 100 (the higher is
+    the better). By default (without any parameter) and for quality above 100 the lossless
+    compression is used.
+-   For PNG, it can be the compression level ( CV_IMWRITE_PNG_COMPRESSION ) from 0 to 9. A
+    higher value means a smaller size and longer compression time. Default value is 3.
+-   For PPM, PGM, or PBM, it can be a binary format flag ( CV_IMWRITE_PXM_BINARY ), 0 or 1.
+    Default value is 1.
+
+The function imwrite saves the image to the specified file. The image format is chosen based on the
+filename extension (see imread for the list of extensions). Only 8-bit (or 16-bit unsigned (CV_16U)
+in case of PNG, JPEG 2000, and TIFF) single-channel or 3-channel (with 'BGR' channel order) images
+can be saved using this function. If the format, depth or channel order is different, use
+Mat::convertTo , and cvtColor to convert it before saving. Or, use the universal FileStorage I/O
+functions to save the image to XML or YAML format.
+
+It is possible to store PNG images with an alpha channel using this function. To do this, create
+8-bit (or 16-bit) 4-channel image BGRA, where the alpha channel goes last. Fully transparent pixels
+should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535. The sample below
+shows how to create such a BGRA image and store to PNG file. It also demonstrates how to set custom
+compression parameters :
+@code
+    #include <vector>
+    #include <stdio.h>
+    #include <opencv2/opencv.hpp>
+
+    using namespace cv;
+    using namespace std;
+
+    void createAlphaMat(Mat &mat)
+    {
+        for (int i = 0; i < mat.rows; ++i) {
+            for (int j = 0; j < mat.cols; ++j) {
+                Vec4b& rgba = mat.at<Vec4b>(i, j);
+                rgba[0] = UCHAR_MAX;
+                rgba[1] = saturate_cast<uchar>((float (mat.cols - j)) / ((float)mat.cols) * UCHAR_MAX);
+                rgba[2] = saturate_cast<uchar>((float (mat.rows - i)) / ((float)mat.rows) * UCHAR_MAX);
+                rgba[3] = saturate_cast<uchar>(0.5 * (rgba[1] + rgba[2]));
+            }
+        }
+    }
+
+    int main(int argv, char **argc)
+    {
+        // Create mat with alpha channel
+        Mat mat(480, 640, CV_8UC4);
+        createAlphaMat(mat);
+
+        vector<int> compression_params;
+        compression_params.push_back(CV_IMWRITE_PNG_COMPRESSION);
+        compression_params.push_back(9);
+
+        try {
+            imwrite("alpha.png", mat, compression_params);
+        }
+        catch (runtime_error& ex) {
+            fprintf(stderr, "Exception converting image to PNG format: %s\n", ex.what());
+            return 1;
+        }
+
+        fprintf(stdout, "Saved PNG file with alpha data.\n");
+        return 0;
+    }
+@endcode
+ */
 CV_EXPORTS_W bool imwrite( const String& filename, InputArray img,
              const std::vector<int>& params = std::vector<int>());

+/** @overload */
 CV_EXPORTS_W Mat imdecode( InputArray buf, int flags );

+/** @brief Reads an image from a buffer in memory.
+
+@param buf Input array or vector of bytes.
+@param flags The same flags as in imread .
+@param dst The optional output placeholder for the decoded matrix. It can save the image
+reallocations when the function is called repeatedly for images of the same size.
+
+The function reads an image from the specified buffer in the memory. If the buffer is too short or
+contains invalid data, the empty matrix/image is returned.
+
+See imread for the list of supported formats and flags description.
+
+@note In the case of color images, the decoded images will have the channels stored in B G R order.
+ */
 CV_EXPORTS Mat imdecode( InputArray buf, int flags, Mat* dst);

+/** @brief Encodes an image into a memory buffer.
+
+@param ext File extension that defines the output format.
+@param img Image to be written.
+@param buf Output buffer resized to fit the compressed image.
+@param params Format-specific parameters. See imwrite .
+
+The function compresses the image and stores it in the memory buffer that is resized to fit the
+result. See imwrite for the list of supported formats and flags description.
+
+@note cvEncodeImage returns single-row matrix of type CV_8UC1 that contains encoded image as array
+of bytes.
+ */
 CV_EXPORTS_W bool imencode( const String& ext, InputArray img,
                            CV_OUT std::vector<uchar>& buf,
                            const std::vector<int>& params = std::vector<int>());

+//! @} imgcodecs
+
 } // cv

 #endif //__OPENCV_IMGCODECS_HPP__
--- a/modules/imgcodecs/include/opencv2/imgcodecs/imgcodecs_c.h
+++ b/modules/imgcodecs/include/opencv2/imgcodecs/imgcodecs_c.h
@ -48,6 +48,10 @@
 extern "C" {
 #endif /* __cplusplus */

+/** @addtogroup imgcodecs_c
+  @{
+  */
+
 enum
 {
 /* 8bit, color or not */
@ -124,6 +128,7 @@ CVAPI(int) cvHaveImageWriter(const char* filename);
 #define cvvSaveImage cvSaveImage
 #define cvvConvertImage cvConvertImage

+/** @} imgcodecs_c */

 #ifdef __cplusplus
 }
--- a/modules/imgcodecs/include/opencv2/imgcodecs/ios.h
+++ b/modules/imgcodecs/include/opencv2/imgcodecs/ios.h
@ -47,6 +47,11 @@
 #import <ImageIO/ImageIO.h>
 #include "opencv2/core/core.hpp"

+//! @addtogroup imgcodecs_ios
+//! @{
+
 UIImage* MatToUIImage(const cv::Mat& image);
 void UIImageToMat(const UIImage* image,
                         cv::Mat& m, bool alphaExist = false);
+
+//! @}
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@ -969,7 +969,7 @@ An example using the LineSegmentDetector

 /** @brief Line segment detector class

-following the algorithm described at @cite Rafael12.
+following the algorithm described at @cite Rafael12 .
 */
 class CV_EXPORTS_W LineSegmentDetector : public Algorithm
 {
@ -1361,7 +1361,7 @@ call

 is equivalent to

-\f[\texttt{Sobel(src, dst, ddepth, dx, dy, CV_SCHARR, scale, delta, borderType)} .\f]
+\f[\texttt{Sobel(src, dst, ddepth, dx, dy, CV\_SCHARR, scale, delta, borderType)} .\f]

@param src input image.
@param dst output image of the same size and the same number of channels as src.
@ -1418,7 +1418,7 @@ CV_EXPORTS_W void Laplacian( InputArray src, OutputArray dst, int ddepth,
  An example on using the canny edge detector
 */

-/** @brief Finds edges in an image using the Canny algorithm @cite Canny86.
+/** @brief Finds edges in an image using the Canny algorithm @cite Canny86 .

 The function finds edges in the input image image and marks them in the output map edges using the
 Canny algorithm. The smallest value between threshold1 and threshold2 is used for edge linking. The
@ -2940,7 +2940,7 @@ An example using the watershed algorithm
 /** @brief Performs a marker-based image segmentation using the watershed algorithm.

 The function implements one of the variants of watershed, non-parametric marker-based segmentation
-algorithm, described in @cite Meyer92.
+algorithm, described in @cite Meyer92 .

 Before passing the image to the function, you have to roughly outline the desired regions in the
 image markers with positive (\>0) indices. So, every region is represented as one or more connected
@ -3050,7 +3050,7 @@ The functions distanceTransform calculate the approximate or precise distance fr
 image pixel to the nearest zero pixel. For zero image pixels, the distance will obviously be zero.

 When maskSize == DIST_MASK_PRECISE and distanceType == DIST_L2 , the function runs the
-algorithm described in @cite Felzenszwalb04. This algorithm is parallelized with the TBB library.
+algorithm described in @cite Felzenszwalb04 . This algorithm is parallelized with the TBB library.

 In other cases, the algorithm @cite Borgefors86 is used. This means that for a pixel the function
 finds the shortest path to the nearest zero pixel consisting of basic shifts: horizontal, vertical,
@ -3371,7 +3371,7 @@ CV_EXPORTS_W int connectedComponentsWithStats(InputArray image, OutputArray labe

 /** @brief Finds contours in a binary image.

-The function retrieves contours from the binary image using the algorithm @cite Suzuki85. The contours
+The function retrieves contours from the binary image using the algorithm @cite Suzuki85 . The contours
 are a useful tool for shape analysis and object detection and recognition. See squares.c in the
 OpenCV sample directory.

--- a/modules/ml/include/opencv2/ml.hpp
+++ b/modules/ml/include/opencv2/ml.hpp
--- a/modules/objdetect/include/opencv2/objdetect.hpp
+++ b/modules/objdetect/include/opencv2/objdetect.hpp
@ -46,15 +46,78 @@

 #include "opencv2/core.hpp"

+/**
+@defgroup objdetect Object Detection
+
+Haar Feature-based Cascade Classifier for Object Detection
+----------------------------------------------------------
+
+The object detector described below has been initially proposed by Paul Viola @cite Viola01 and
+improved by Rainer Lienhart @cite Lienhart02 .
+
+First, a classifier (namely a *cascade of boosted classifiers working with haar-like features*) is
+trained with a few hundred sample views of a particular object (i.e., a face or a car), called
+positive examples, that are scaled to the same size (say, 20x20), and negative examples - arbitrary
+images of the same size.
+
+After a classifier is trained, it can be applied to a region of interest (of the same size as used
+during the training) in an input image. The classifier outputs a "1" if the region is likely to show
+the object (i.e., face/car), and "0" otherwise. To search for the object in the whole image one can
+move the search window across the image and check every location using the classifier. The
+classifier is designed so that it can be easily "resized" in order to be able to find the objects of
+interest at different sizes, which is more efficient than resizing the image itself. So, to find an
+object of an unknown size in the image the scan procedure should be done several times at different
+scales.
+
+The word "cascade" in the classifier name means that the resultant classifier consists of several
+simpler classifiers (*stages*) that are applied subsequently to a region of interest until at some
+stage the candidate is rejected or all the stages are passed. The word "boosted" means that the
+classifiers at every stage of the cascade are complex themselves and they are built out of basic
+classifiers using one of four different boosting techniques (weighted voting). Currently Discrete
+Adaboost, Real Adaboost, Gentle Adaboost and Logitboost are supported. The basic classifiers are
+decision-tree classifiers with at least 2 leaves. Haar-like features are the input to the basic
+classifiers, and are calculated as described below. The current algorithm uses the following
+Haar-like features:
+
+![image](pics/haarfeatures.png)
+
+The feature used in a particular classifier is specified by its shape (1a, 2b etc.), position within
+the region of interest and the scale (this scale is not the same as the scale used at the detection
+stage, though these two scales are multiplied). For example, in the case of the third line feature
+(2c) the response is calculated as the difference between the sum of image pixels under the
+rectangle covering the whole feature (including the two white stripes and the black stripe in the
+middle) and the sum of the image pixels under the black stripe multiplied by 3 in order to
+compensate for the differences in the size of areas. The sums of pixel values over a rectangular
+regions are calculated rapidly using integral images (see below and the integral description).
+
+To see the object detector at work, have a look at the facedetect demo:
+<https://github.com/Itseez/opencv/tree/master/samples/cpp/dbt_face_detection.cpp>
+
+The following reference is for the detection part only. There is a separate application called
+opencv_traincascade that can train a cascade of boosted classifiers from a set of samples.
+
+@note In the new C++ interface it is also possible to use LBP (local binary pattern) features in
+addition to Haar-like features. .. [Viola01] Paul Viola and Michael J. Jones. Rapid Object Detection
+using a Boosted Cascade of Simple Features. IEEE CVPR, 2001. The paper is available online at
+<http://research.microsoft.com/en-us/um/people/viola/Pubs/Detect/violaJones_CVPR2001.pdf>
+
+@{
+    @defgroup objdetect_c C API
+@}
+ */
+
 typedef struct CvHaarClassifierCascade CvHaarClassifierCascade;

 namespace cv
 {

+//! @addtogroup objdetect
+//! @{
+
 ///////////////////////////// Object Detection ////////////////////////////

-// class for grouping object candidates, detected by Cascade Classifier, HOG etc.
-// instance of the class is to be passed to cv::partition (see cxoperations.hpp)
+//! class for grouping object candidates, detected by Cascade Classifier, HOG etc.
+//! instance of the class is to be passed to cv::partition (see cxoperations.hpp)
 class CV_EXPORTS SimilarRects
 {
 public:
@ -70,13 +133,32 @@ public:
    double eps;
 };

+/** @brief Groups the object candidate rectangles.
+
+@param rectList Input/output vector of rectangles. Output vector includes retained and grouped
+rectangles. (The Python list is not modified in place.)
+@param groupThreshold Minimum possible number of rectangles minus 1. The threshold is used in a
+group of rectangles to retain it.
+@param eps Relative difference between sides of the rectangles to merge them into a group.
+
+The function is a wrapper for the generic function partition . It clusters all the input rectangles
+using the rectangle equivalence criteria that combines rectangles with similar sizes and similar
+locations. The similarity is defined by eps. When eps=0 , no clustering is done at all. If
+\f$\texttt{eps}\rightarrow +\inf\f$ , all the rectangles are put in one cluster. Then, the small
+clusters containing less than or equal to groupThreshold rectangles are rejected. In each other
+cluster, the average rectangle is computed and put into the output rectangle list.
+ */
 CV_EXPORTS   void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps = 0.2);
+/** @overload */
 CV_EXPORTS_W void groupRectangles(CV_IN_OUT std::vector<Rect>& rectList, CV_OUT std::vector<int>& weights,
                                  int groupThreshold, double eps = 0.2);
+/** @overload */
 CV_EXPORTS   void groupRectangles(std::vector<Rect>& rectList, int groupThreshold,
                                  double eps, std::vector<int>* weights, std::vector<double>* levelWeights );
+/** @overload */
 CV_EXPORTS   void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& rejectLevels,
                                  std::vector<double>& levelWeights, int groupThreshold, double eps = 0.2);
+/** @overload */
 CV_EXPORTS   void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>& foundWeights,
                                            std::vector<double>& foundScales,
                                            double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
@ -133,15 +215,54 @@ public:
    virtual Ptr<MaskGenerator> getMaskGenerator() = 0;
 };

+/** @brief Cascade classifier class for object detection.
+ */
 class CV_EXPORTS_W CascadeClassifier
 {
 public:
    CV_WRAP CascadeClassifier();
+    /** @brief Loads a classifier from a file.
+
+    @param filename Name of the file from which the classifier is loaded.
+     */
    CV_WRAP CascadeClassifier(const String& filename);
    ~CascadeClassifier();
+    /** @brief Checks whether the classifier has been loaded.
+    */
    CV_WRAP bool empty() const;
+    /** @brief Loads a classifier from a file.
+
+    @param filename Name of the file from which the classifier is loaded. The file may contain an old
+    HAAR classifier trained by the haartraining application or a new cascade classifier trained by the
+    traincascade application.
+     */
    CV_WRAP bool load( const String& filename );
+    /** @brief Reads a classifier from a FileStorage node.
+
+    @note The file may contain a new cascade classifier (trained traincascade application) only.
+     */
    CV_WRAP bool read( const FileNode& node );
+
+    /** @brief Detects objects of different sizes in the input image. The detected objects are returned as a list
+    of rectangles.
+
+    @param image Matrix of the type CV_8U containing an image where objects are detected.
+    @param objects Vector of rectangles where each rectangle contains the detected object, the
+    rectangles may be partially outside the original image.
+    @param scaleFactor Parameter specifying how much the image size is reduced at each image scale.
+    @param minNeighbors Parameter specifying how many neighbors each candidate rectangle should have
+    to retain it.
+    @param flags Parameter with the same meaning for an old cascade as in the function
+    cvHaarDetectObjects. It is not used for a new cascade.
+    @param minSize Minimum possible object size. Objects smaller than that are ignored.
+    @param maxSize Maximum possible object size. Objects larger than that are ignored.
+
+    The function is parallelized with the TBB library.
+
+    @note
+       -   (Python) A face detection example using cascade classifiers can be found at
+            opencv_source_code/samples/python2/facedetect.py
+    */
    CV_WRAP void detectMultiScale( InputArray image,
                          CV_OUT std::vector<Rect>& objects,
                          double scaleFactor = 1.1,
@ -149,6 +270,21 @@ public:
                          Size minSize = Size(),
                          Size maxSize = Size() );

+    /** @overload
+    @param image Matrix of the type CV_8U containing an image where objects are detected.
+    @param objects Vector of rectangles where each rectangle contains the detected object, the
+    rectangles may be partially outside the original image.
+    @param numDetections Vector of detection numbers for the corresponding objects. An object's number
+    of detections is the number of neighboring positively classified rectangles that were joined
+    together to form the object.
+    @param scaleFactor Parameter specifying how much the image size is reduced at each image scale.
+    @param minNeighbors Parameter specifying how many neighbors each candidate rectangle should have
+    to retain it.
+    @param flags Parameter with the same meaning for an old cascade as in the function
+    cvHaarDetectObjects. It is not used for a new cascade.
+    @param minSize Minimum possible object size. Objects smaller than that are ignored.
+    @param maxSize Maximum possible object size. Objects larger than that are ignored.
+    */
    CV_WRAP_AS(detectMultiScale2) void detectMultiScale( InputArray image,
                          CV_OUT std::vector<Rect>& objects,
                          CV_OUT std::vector<int>& numDetections,
@ -157,6 +293,9 @@ public:
                          Size minSize=Size(),
                          Size maxSize=Size() );

+    /** @overload
+    if `outputRejectLevels` is `true` returns `rejectLevels` and `levelWeights`
+    */
    CV_WRAP_AS(detectMultiScale3) void detectMultiScale( InputArray image,
                                  CV_OUT std::vector<Rect>& objects,
                                  CV_OUT std::vector<int>& rejectLevels,
@ -184,14 +323,14 @@ CV_EXPORTS Ptr<BaseCascadeClassifier::MaskGenerator> createFaceDetectionMaskGene

 //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////

-// struct for detection region of interest (ROI)
+//! struct for detection region of interest (ROI)
 struct DetectionROI
 {
-   // scale(size) of the bounding box
+   //! scale(size) of the bounding box
   double scale;
-   // set of requrested locations to be evaluated
+   //! set of requrested locations to be evaluated
   std::vector<cv::Point> locations;
-   // vector that will contain confidence values for each location
+   //! vector that will contain confidence values for each location
   std::vector<double> confidences;
 };

@ -250,24 +389,24 @@ public:
                         Size winStride = Size(), Size padding = Size(),
                         const std::vector<Point>& locations = std::vector<Point>()) const;

-    //with found weights output
+    //! with found weights output
    CV_WRAP virtual void detect(const Mat& img, CV_OUT std::vector<Point>& foundLocations,
                        CV_OUT std::vector<double>& weights,
                        double hitThreshold = 0, Size winStride = Size(),
                        Size padding = Size(),
                        const std::vector<Point>& searchLocations = std::vector<Point>()) const;
-    //without found weights output
+    //! without found weights output
    virtual void detect(const Mat& img, CV_OUT std::vector<Point>& foundLocations,
                        double hitThreshold = 0, Size winStride = Size(),
                        Size padding = Size(),
                        const std::vector<Point>& searchLocations=std::vector<Point>()) const;

-    //with result weights output
+    //! with result weights output
    CV_WRAP virtual void detectMultiScale(InputArray img, CV_OUT std::vector<Rect>& foundLocations,
                                  CV_OUT std::vector<double>& foundWeights, double hitThreshold = 0,
                                  Size winStride = Size(), Size padding = Size(), double scale = 1.05,
                                  double finalThreshold = 2.0,bool useMeanshiftGrouping = false) const;
-    //without found weights output
+    //! without found weights output
    virtual void detectMultiScale(InputArray img, CV_OUT std::vector<Rect>& foundLocations,
                                  double hitThreshold = 0, Size winStride = Size(),
                                  Size padding = Size(), double scale = 1.05,
@ -295,24 +434,26 @@ public:
    CV_PROP int nlevels;


-    // evaluate specified ROI and return confidence value for each location
+    //! evaluate specified ROI and return confidence value for each location
    virtual void detectROI(const cv::Mat& img, const std::vector<cv::Point> &locations,
                                   CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
                                   double hitThreshold = 0, cv::Size winStride = Size(),
                                   cv::Size padding = Size()) const;

-    // evaluate specified ROI and return confidence value for each location in multiple scales
+    //! evaluate specified ROI and return confidence value for each location in multiple scales
    virtual void detectMultiScaleROI(const cv::Mat& img,
                                                       CV_OUT std::vector<cv::Rect>& foundLocations,
                                                       std::vector<DetectionROI>& locations,
                                                       double hitThreshold = 0,
                                                       int groupThreshold = 0) const;

-    // read/parse Dalal's alt model file
+    //! read/parse Dalal's alt model file
    void readALTModel(String modelfile);
    void groupRectangles(std::vector<cv::Rect>& rectList, std::vector<double>& weights, int groupThreshold, double eps) const;
 };

+//! @} objdetect
+
 }

 #include "opencv2/objdetect/detection_based_tracker.hpp"
--- a/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp
+++ b/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp
@ -51,6 +51,10 @@

 namespace cv
 {
+
+//! @addtogroup objdetect
+//! @{
+
 class CV_EXPORTS DetectionBasedTracker
 {
    public:
@ -211,6 +215,9 @@ class CV_EXPORTS DetectionBasedTracker
        cv::Rect calcTrackedObjectPositionToShow(int i, ObjectStatus& status) const;
        void detectInRegion(const cv::Mat& img, const cv::Rect& r, std::vector<cv::Rect>& detectedObjectsInRegions);
 };
+
+//! @} objdetect
+
 } //end of cv namespace
 #endif

--- a/Show More
+++ b/Show More