diff --git a/CMakeLists.txt b/CMakeLists.txt
index ba872cd9cd..07a742597c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -271,7 +271,6 @@ OCV_OPTION(WITH_OPENCLAMDBLAS  "Include AMD OpenCL BLAS library support"     ON
 OCV_OPTION(WITH_DIRECTX        "Include DirectX support"                     ON   IF (WIN32 AND NOT WINRT) )
 OCV_OPTION(WITH_INTELPERC      "Include Intel Perceptual Computing support"  OFF  IF (WIN32 AND NOT WINRT) )
 OCV_OPTION(WITH_LIBREALSENSE   "Include Intel librealsense support"          OFF  IF (NOT WITH_INTELPERC) )
-OCV_OPTION(WITH_MATLAB         "Include Matlab support"                      ON   IF (NOT ANDROID AND NOT IOS AND NOT WINRT))
 OCV_OPTION(WITH_VA             "Include VA support"                          OFF  IF (UNIX AND NOT ANDROID) )
 OCV_OPTION(WITH_VA_INTEL       "Include Intel VA-API/OpenCL support"         OFF  IF (UNIX AND NOT ANDROID) )
 OCV_OPTION(WITH_MFX            "Include Intel Media SDK support"             OFF   IF ((UNIX AND NOT ANDROID) OR (WIN32 AND NOT WINRT AND NOT MINGW)) )
@@ -694,11 +693,6 @@ if(WITH_DIRECTX)
   include(cmake/OpenCVDetectDirectX.cmake)
 endif()
 
-# --- Matlab/Octave ---
-if(WITH_MATLAB)
-  include(cmake/OpenCVFindMatlab.cmake)
-endif()
-
 if(WITH_VTK)
   include(cmake/OpenCVDetectVTK.cmake)
 endif()
@@ -1518,15 +1512,7 @@ if(BUILD_JAVA OR BUILD_opencv_java)
   status("    Java tests:"    BUILD_TESTS AND opencv_test_java_BINARY_DIR                                 THEN YES ELSE NO)
 endif()
 
-# ========================= matlab =========================
-if(WITH_MATLAB OR MATLAB_FOUND)
-  status("")
-  status("  Matlab:" MATLAB_FOUND THEN "YES" ELSE "NO")
-  if(MATLAB_FOUND)
-    status("    mex:"         MATLAB_MEX_SCRIPT  THEN  "${MATLAB_MEX_SCRIPT}"   ELSE NO)
-    status("    Compiler/generator:" MEX_WORKS   THEN  "Working"                ELSE "Not working (bindings will not be generated)")
-  endif()
-endif()
+ocv_cmake_hook(STATUS_DUMP_EXTRA)
 
 # ========================== auxiliary ==========================
 status("")
diff --git a/cmake/OpenCVFindMatlab.cmake b/cmake/OpenCVFindMatlab.cmake
deleted file mode 100644
index ffe8857fec..0000000000
--- a/cmake/OpenCVFindMatlab.cmake
+++ /dev/null
@@ -1,199 +0,0 @@
-# ----- Find Matlab/Octave -----
-#
-# OpenCVFindMatlab.cmake attempts to locate the install path of Matlab in order
-# to extract the mex headers, libraries and shell scripts. If found
-# successfully, the following variables will be defined
-#
-#   MATLAB_FOUND:       true/false
-#   MATLAB_ROOT_DIR:    Root of Matlab installation
-#   MATLAB_BIN:         The main Matlab "executable" (shell script)
-#   MATLAB_MEX_SCRIPT:  The mex script used to compile mex files
-#   MATLAB_INCLUDE_DIRS:Path to "mex.h"
-#   MATLAB_LIBRARY_DIRS:Path to mex and matrix libraries
-#   MATLAB_LIBRARIES:   The Matlab libs, usually mx, mex, mat
-#   MATLAB_MEXEXT:      The mex library extension. It will be one of:
-#                         mexwin32, mexwin64,  mexglx, mexa64, mexmac,
-#                         mexmaci,  mexmaci64, mexsol, mexs64
-#   MATLAB_ARCH:        The installation architecture. It is **usually**
-#                       the MEXEXT with the preceding "mex" removed,
-#                       though it's different for linux distros.
-#
-# There doesn't appear to be an elegant way to detect all versions of Matlab
-# across different platforms. If you know the matlab path and want to avoid
-# the search, you can define the path to the Matlab root when invoking cmake:
-#
-#   cmake -DMATLAB_ROOT_DIR='/PATH/TO/ROOT_DIR' ..
-
-
-
-# ----- set_library_presuffix -----
-#
-# Matlab tends to use some non-standard prefixes and suffixes on its libraries.
-# For example, libmx.dll on Windows (Windows does not add prefixes) and
-# mkl.dylib on OS X (OS X uses "lib" prefixes).
-# On some versions of Windows the .dll suffix also appears to not be checked.
-#
-# This function modifies the library prefixes and suffixes used by
-# find_library when finding Matlab libraries. It does not affect scopes
-# outside of this file.
-function(set_libarch_prefix_suffix)
-  if (UNIX AND NOT APPLE)
-    set(CMAKE_FIND_LIBRARY_PREFIXES "lib" PARENT_SCOPE)
-    set(CMAKE_FIND_LIBRARY_SUFFIXES ".so" ".a" PARENT_SCOPE)
-  elseif (APPLE)
-    set(CMAKE_FIND_LIBRARY_PREFIXES "lib" PARENT_SCOPE)
-    set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".a" PARENT_SCOPE)
-  elseif (WIN32)
-    set(CMAKE_FIND_LIBRARY_PREFIXES "lib" PARENT_SCOPE)
-    set(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".dll" PARENT_SCOPE)
-  endif()
-endfunction()
-
-
-
-# ----- locate_matlab_root -----
-#
-# Attempt to find the path to the Matlab installation. If successful, sets
-# the absolute path in the variable MATLAB_ROOT_DIR
-function(locate_matlab_root)
-
-  # --- UNIX/APPLE ---
-  if (UNIX)
-    # possible root locations, in order of likelihood
-    set(SEARCH_DIRS_ /Applications /usr/local /opt/local /usr /opt)
-    foreach (DIR_ ${SEARCH_DIRS_})
-      file(GLOB MATLAB_ROOT_DIR_ ${DIR_}/MATLAB/R* ${DIR_}/MATLAB_R*)
-      if (MATLAB_ROOT_DIR_)
-        # sort in order from highest to lowest
-        # normally it's in the format MATLAB_R[20XX][A/B]
-        # TODO: numerical rather than lexicographic sort. However,
-        # CMake does not support floating-point MATH(EXPR ...) at this time.
-        list(SORT MATLAB_ROOT_DIR_)
-        list(REVERSE MATLAB_ROOT_DIR_)
-        list(GET MATLAB_ROOT_DIR_ 0 MATLAB_ROOT_DIR_)
-        set(MATLAB_ROOT_DIR ${MATLAB_ROOT_DIR_} PARENT_SCOPE)
-        return()
-      endif()
-    endforeach()
-
-  # --- WINDOWS ---
-  elseif (WIN32)
-    # 1. search the path environment variable
-    find_program(MATLAB_ROOT_DIR_ matlab PATHS ENV PATH)
-    if (MATLAB_ROOT_DIR_)
-      # get the root directory from the full path
-      # /path/to/matlab/rootdir/bin/matlab.exe
-      get_filename_component(MATLAB_ROOT_DIR_ ${MATLAB_ROOT_DIR_} PATH)
-      get_filename_component(MATLAB_ROOT_DIR_ ${MATLAB_ROOT_DIR_} PATH)
-      set(MATLAB_ROOT_DIR ${MATLAB_ROOT_DIR_} PARENT_SCOPE)
-      return()
-    endif()
-
-    # 2. search the registry
-    # determine the available Matlab versions
-    set(REG_EXTENSION_ "SOFTWARE\\Mathworks\\MATLAB")
-    set(REG_ROOTS_ "HKEY_LOCAL_MACHINE" "HKEY_CURRENT_USER")
-    foreach(REG_ROOT_ ${REG_ROOTS_})
-      execute_process(COMMAND reg query "${REG_ROOT_}\\${REG_EXTENSION_}" OUTPUT_VARIABLE QUERY_RESPONSE_ ERROR_VARIABLE UNUSED_)
-      if (QUERY_RESPONSE_)
-        string(REGEX MATCHALL "[0-9]\\.[0-9]" VERSION_STRINGS_ ${QUERY_RESPONSE_})
-        list(APPEND VERSIONS_ ${VERSION_STRINGS_})
-      endif()
-    endforeach()
-
-    # select the highest version
-    list(APPEND VERSIONS_ "0.0")
-    list(SORT VERSIONS_)
-    list(REVERSE VERSIONS_)
-    list(GET VERSIONS_ 0 VERSION_)
-
-    # request the MATLABROOT from the registry
-    foreach(REG_ROOT_ ${REG_ROOTS_})
-      get_filename_component(QUERY_RESPONSE_ [${REG_ROOT_}\\${REG_EXTENSION_}\\${VERSION_};MATLABROOT] ABSOLUTE)
-      if (NOT ${QUERY_RESPONSE_} MATCHES "registry$")
-        set(MATLAB_ROOT_DIR ${QUERY_RESPONSE_} PARENT_SCOPE)
-        return()
-      endif()
-    endforeach()
-  endif()
-endfunction()
-
-
-
-# ----- locate_matlab_components -----
-#
-# Given a directory MATLAB_ROOT_DIR, attempt to find the Matlab components
-# (include directory and libraries) under the root. If everything is found,
-# sets the variable MATLAB_FOUND to TRUE
-function(locate_matlab_components MATLAB_ROOT_DIR)
-  # get the mex extension
-  find_file(MATLAB_MEXEXT_SCRIPT_ NAMES mexext mexext.bat PATHS ${MATLAB_ROOT_DIR}/bin NO_DEFAULT_PATH)
-  execute_process(COMMAND ${MATLAB_MEXEXT_SCRIPT_}
-                  OUTPUT_VARIABLE MATLAB_MEXEXT_
-                  OUTPUT_STRIP_TRAILING_WHITESPACE)
-  if (NOT MATLAB_MEXEXT_)
-    return()
-  endif()
-
-  # map the mexext to an architecture extension
-  set(ARCHITECTURES_ "maci64" "maci" "glnxa64" "glnx64" "sol64" "sola64" "win32" "win64" )
-  foreach(ARCHITECTURE_ ${ARCHITECTURES_})
-    if(EXISTS ${MATLAB_ROOT_DIR}/bin/${ARCHITECTURE_})
-      set(MATLAB_ARCH_ ${ARCHITECTURE_})
-      break()
-    endif()
-  endforeach()
-
-  # get the path to the libraries
-  set(MATLAB_LIBRARY_DIRS_ ${MATLAB_ROOT_DIR}/bin/${MATLAB_ARCH_})
-
-  # get the libraries
-  set_libarch_prefix_suffix()
-  find_library(MATLAB_LIB_MX_  mx  PATHS ${MATLAB_LIBRARY_DIRS_} NO_DEFAULT_PATH)
-  find_library(MATLAB_LIB_MEX_ mex PATHS ${MATLAB_LIBRARY_DIRS_} NO_DEFAULT_PATH)
-  find_library(MATLAB_LIB_MAT_ mat PATHS ${MATLAB_LIBRARY_DIRS_} NO_DEFAULT_PATH)
-  set(MATLAB_LIBRARIES_ ${MATLAB_LIB_MX_} ${MATLAB_LIB_MEX_} ${MATLAB_LIB_MAT_})
-
-  # get the include path
-  find_path(MATLAB_INCLUDE_DIRS_ mex.h ${MATLAB_ROOT_DIR}/extern/include)
-
-  # get the mex shell script
-  find_program(MATLAB_MEX_SCRIPT_ NAMES mex mex.bat PATHS ${MATLAB_ROOT_DIR}/bin NO_DEFAULT_PATH)
-
-  # get the Matlab executable
-  find_program(MATLAB_BIN_ NAMES matlab PATHS ${MATLAB_ROOT_DIR}/bin NO_DEFAULT_PATH)
-
-  # export into parent scope
-  if (MATLAB_MEX_SCRIPT_ AND MATLAB_LIBRARIES_ AND MATLAB_INCLUDE_DIRS_)
-    set(MATLAB_BIN          ${MATLAB_BIN_}          PARENT_SCOPE)
-    set(MATLAB_MEX_SCRIPT   ${MATLAB_MEX_SCRIPT_}   PARENT_SCOPE)
-    set(MATLAB_INCLUDE_DIRS ${MATLAB_INCLUDE_DIRS_} PARENT_SCOPE)
-    set(MATLAB_LIBRARIES    ${MATLAB_LIBRARIES_}    PARENT_SCOPE)
-    set(MATLAB_LIBRARY_DIRS ${MATLAB_LIBRARY_DIRS_} PARENT_SCOPE)
-    set(MATLAB_MEXEXT       ${MATLAB_MEXEXT_}       PARENT_SCOPE)
-    set(MATLAB_ARCH         ${MATLAB_ARCH_}         PARENT_SCOPE)
-  endif()
-endfunction()
-
-
-
-# ----------------------------------------------------------------------------
-# FIND MATLAB COMPONENTS
-# ----------------------------------------------------------------------------
-if (NOT MATLAB_FOUND)
-
-  # attempt to find the Matlab root folder
-  if (NOT MATLAB_ROOT_DIR)
-    locate_matlab_root()
-  endif()
-
-  # given the matlab root folder, find the library locations
-  if (MATLAB_ROOT_DIR)
-    locate_matlab_components(${MATLAB_ROOT_DIR})
-  endif()
-  find_package_handle_standard_args(Matlab DEFAULT_MSG
-                                           MATLAB_MEX_SCRIPT   MATLAB_INCLUDE_DIRS
-                                           MATLAB_ROOT_DIR     MATLAB_LIBRARIES
-                                           MATLAB_LIBRARY_DIRS MATLAB_MEXEXT
-                                           MATLAB_ARCH         MATLAB_BIN)
-endif()
diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake
index 7c81b0c6f7..6a60648359 100644
--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@@ -296,28 +296,29 @@ endfunction()
 # Calls 'add_subdirectory' for each location.
 # Note: both input lists should have same length.
 # Usage: _add_modules_1(<list with paths> <list with names>)
-function(_add_modules_1 paths names)
-  list(LENGTH ${paths} len)
-  if(len EQUAL 0)
-    return()
-  endif()
-  list(LENGTH ${names} len_verify)
-  if(NOT len EQUAL len_verify)
-    message(FATAL_ERROR "Bad configuration! ${len} != ${len_verify}")
+macro(_add_modules_1 paths names)
+  ocv_debug_message("_add_modules_1(paths=${paths}, names=${names}, ... " ${ARGN} ")")
+  list(LENGTH ${paths} __len)
+  if(NOT __len EQUAL 0)
+    list(LENGTH ${names} __len_verify)
+    if(NOT __len EQUAL __len_verify)
+      message(FATAL_ERROR "Bad configuration! ${__len} != ${__len_verify}")
+    endif()
+    math(EXPR __len "${__len} - 1")
+    foreach(i RANGE ${__len})
+      list(GET ${paths} ${i} __path)
+      list(GET ${names} ${i} __name)
+      #message(STATUS "First pass: ${__name} => ${__path}")
+      include("${__path}/cmake/init.cmake" OPTIONAL)
+      add_subdirectory("${__path}" "${CMAKE_CURRENT_BINARY_DIR}/.firstpass/${__name}")
+    endforeach()
   endif()
-  math(EXPR len "${len} - 1")
-  foreach(i RANGE ${len})
-    list(GET ${paths} ${i} path)
-    list(GET ${names} ${i} name)
-    #message(STATUS "First pass: ${name} => ${path}")
-    include("${path}/cmake/init.cmake" OPTIONAL)
-    add_subdirectory("${path}" "${CMAKE_CURRENT_BINARY_DIR}/.firstpass/${name}")
-  endforeach()
-endfunction()
+endmacro()
 
 # Calls 'add_subdirectory' for each module name.
 # Usage: _add_modules_2([<module> ...])
-function(_add_modules_2)
+macro(_add_modules_2)
+  ocv_debug_message("_add_modules_2(" ${ARGN} ")")
   foreach(m ${ARGN})
     set(the_module "${m}")
     ocv_cmake_hook(PRE_MODULES_CREATE_${the_module})
@@ -333,7 +334,8 @@ function(_add_modules_2)
     endif()
     ocv_cmake_hook(POST_MODULES_CREATE_${the_module})
   endforeach()
-endfunction()
+  unset(the_module)
+endmacro()
 
 # Check if list of input items is unique.
 # Usage: _assert_uniqueness(<failure message> <element> [<element> ...])
diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake
index 60c20192dc..e0c740caf9 100644
--- a/cmake/OpenCVUtils.cmake
+++ b/cmake/OpenCVUtils.cmake
@@ -121,8 +121,10 @@ macro(ocv_assert)
 endmacro()
 
 macro(ocv_debug_message)
-#  string(REPLACE ";" " " __msg "${ARGN}")
-#  message(STATUS "${__msg}")
+  if(OPENCV_CMAKE_DEBUG_MESSAGES)
+    string(REPLACE ";" " " __msg "${ARGN}")
+    message(STATUS "${__msg}")
+  endif()
 endmacro()
 
 macro(ocv_check_environment_variables)
diff --git a/doc/opencv.bib b/doc/opencv.bib
index e059096dc4..5c0e475a27 100644
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
@@ -1035,3 +1035,37 @@
     publisher = {BMVA Press},
     author = {Alexander Duda and Udo Frese},
 }
+
+@book{jahne2000computer,
+  title={Computer vision and applications: a guide for students and practitioners},
+  author={Jahne, Bernd},
+  year={2000},
+  publisher={Elsevier}
+}
+
+@book{bigun2006vision,
+  title={Vision with direction},
+  author={Bigun, Josef},
+  year={2006},
+  publisher={Springer}
+}
+
+@inproceedings{van1995estimators,
+  title={Estimators for orientation and anisotropy in digitized images},
+  author={Van Vliet, Lucas J and Verbeek, Piet W},
+  booktitle={ASCI},
+  volume={95},
+  pages={16--18},
+  year={1995}
+}
+
+@article{yang1996structure,
+  title={Structure adaptive anisotropic image filtering},
+  author={Yang, Guang-Zhong and Burger, Peter and Firmin, David N and Underwood, SR},
+  journal={Image and Vision Computing},
+  volume={14},
+  number={2},
+  pages={135--145},
+  year={1996},
+  publisher={Elsevier}
+}
diff --git a/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown b/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown
index b0f92d72c6..1d0ebb3967 100644
--- a/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown
+++ b/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown
@@ -153,15 +153,15 @@ padding etc. This function takes following arguments:
 
 -   **borderType** - Flag defining what kind of border to be added. It can be following types:
     -   **cv.BORDER_CONSTANT** - Adds a constant colored border. The value should be given
-            as next argument.
-        -   **cv.BORDER_REFLECT** - Border will be mirror reflection of the border elements,
-            like this : *fedcba|abcdefgh|hgfedcb*
-        -   **cv.BORDER_REFLECT_101** or **cv.BORDER_DEFAULT** - Same as above, but with a
-            slight change, like this : *gfedcb|abcdefgh|gfedcba*
-        -   **cv.BORDER_REPLICATE** - Last element is replicated throughout, like this:
-            *aaaaaa|abcdefgh|hhhhhhh*
-        -   **cv.BORDER_WRAP** - Can't explain, it will look like this :
-            *cdefgh|abcdefgh|abcdefg*
+        as next argument.
+    -   **cv.BORDER_REFLECT** - Border will be mirror reflection of the border elements,
+        like this : *fedcba|abcdefgh|hgfedcb*
+    -   **cv.BORDER_REFLECT_101** or **cv.BORDER_DEFAULT** - Same as above, but with a
+        slight change, like this : *gfedcb|abcdefgh|gfedcba*
+    -   **cv.BORDER_REPLICATE** - Last element is replicated throughout, like this:
+        *aaaaaa|abcdefgh|hhhhhhh*
+    -   **cv.BORDER_WRAP** - Can't explain, it will look like this :
+        *cdefgh|abcdefgh|abcdefg*
 
 -   **value** - Color of border if border type is cv.BORDER_CONSTANT
 
diff --git a/doc/py_tutorials/py_gui/py_trackbar/py_trackbar.markdown b/doc/py_tutorials/py_gui/py_trackbar/py_trackbar.markdown
index e5e9306d27..d6af059903 100644
--- a/doc/py_tutorials/py_gui/py_trackbar/py_trackbar.markdown
+++ b/doc/py_tutorials/py_gui/py_trackbar/py_trackbar.markdown
@@ -37,6 +37,7 @@ cv.namedWindow('image')
 
 # create trackbars for color change
 cv.createTrackbar('R','image',0,255,nothing)
+
 cv.createTrackbar('G','image',0,255,nothing)
 cv.createTrackbar('B','image',0,255,nothing)
 
diff --git a/doc/tutorials/imgproc/anisotropic_image_segmentation/anisotropic_image_segmentation.markdown b/doc/tutorials/imgproc/anisotropic_image_segmentation/anisotropic_image_segmentation.markdown
new file mode 100755
index 0000000000..16df8eedd2
--- /dev/null
+++ b/doc/tutorials/imgproc/anisotropic_image_segmentation/anisotropic_image_segmentation.markdown
@@ -0,0 +1,91 @@
+Anisotropic image segmentation by a gradient structure tensor {#tutorial_anisotropic_image_segmentation_by_a_gst}
+==========================
+
+Goal
+----
+
+In this tutorial you will learn:
+
+-   what the gradient structure tensor is
+-   how to estimate orientation and coherency of an anisotropic image by a gradient structure tensor
+-   how to segment an anisotropic image with a single local orientation by a gradient structure tensor
+
+Theory
+------
+
+@note The explanation is based on the books @cite jahne2000computer, @cite bigun2006vision and @cite van1995estimators. Good physical explanation of a gradient structure tensor is given in @cite yang1996structure. Also, you can refer to a wikipedia page [Structure tensor].
+@note A anisotropic image on this page is a real world  image.
+
+### What is the gradient structure tensor?
+
+In mathematics, the gradient structure tensor (also referred to as the second-moment matrix, the second order moment tensor, the inertia tensor, etc.) is a matrix derived from the gradient of a function. It summarizes the predominant directions of the gradient in a specified neighborhood of a point, and the degree to which those directions are coherent (coherency). The gradient structure tensor is widely used in image processing and computer vision for 2D/3D image segmentation, motion detection, adaptive filtration, local image features detection, etc.
+
+Important features of anisotropic images include orientation and coherency of a local anisotropy. In this paper we will show how to estimate orientation and coherency, and how to segment an anisotropic image with a single local orientation by a gradient structure tensor.
+
+The gradient structure tensor of an image is a 2x2 symmetric matrix. Eigenvectors of the gradient structure tensor indicate local orientation, whereas eigenvalues give coherency (a measure of anisotropism).
+
+The gradient structure tensor \f$J\f$ of an image \f$Z\f$ can be written as:
+
+\f[J = \begin{bmatrix}
+J_{11} & J_{12}  \\
+J_{12} & J_{22}
+\end{bmatrix}\f]
+
+where \f$J_{11} = M[Z_{x}^{2}]\f$, \f$J_{22} = M[Z_{y}^{2}]\f$, \f$J_{12} = M[Z_{x}Z_{y}]\f$ - components of the tensor, \f$M[]\f$ is a symbol of mathematical expectation (we can consider this operation as averaging in a window w), \f$Z_{x}\f$ and \f$Z_{y}\f$ are partial derivatives of an image \f$Z\f$ with respect to \f$x\f$ and \f$y\f$.
+
+The eigenvalues of the tensor can be found in the below formula:
+\f[\lambda_{1,2} = J_{11} + J_{22} \pm \sqrt{(J_{11} - J_{22})^{2} + 4J_{12}^{2}}\f]
+where \f$\lambda_1\f$ - largest eigenvalue, \f$\lambda_2\f$ - smallest eigenvalue.
+
+### How to estimate orientation and coherency of an anisotropic image by gradient structure tensor?
+
+The orientation of an anisotropic image:
+\f[\alpha = 0.5arctg\frac{2J_{12}}{J_{22} - J_{11}}\f]
+
+Coherency:
+\f[C = \frac{\lambda_1 - \lambda_2}{\lambda_1 + \lambda_2}\f]
+
+The coherency ranges from 0 to 1. For ideal local orientation (\f$\lambda_2\f$ = 0, \f$\lambda_1\f$ > 0) it is one, for an isotropic gray value structure (\f$\lambda_1\f$ = \f$\lambda_2\f$ > 0) it is zero.
+
+Source code
+-----------
+
+You can find source code in the `samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp` of the OpenCV source code library.
+
+@include cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp
+
+Explanation
+-----------
+An anisotropic image segmentation algorithm consists of a gradient structure tensor calculation, an orientation calculation, a coherency calculation and an orientation and coherency thresholding:
+@snippet samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp main
+
+A function calcGST() calculates orientation and coherency by using a gradient structure tensor. An input parameter w defines a window size:
+@snippet samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp calcGST
+
+The below code applies a thresholds LowThr and HighThr to image orientation and a threshold C_Thr to image coherency calculated by the previous function. LowThr and HighThr define orientation range:
+@snippet samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp thresholding
+
+And finally we combine thresholding results:
+@snippet samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp combining
+
+Result
+------
+
+Below you can see the real anisotropic image with single direction:
+![Anisotropic image with the single direction](images/gst_input.jpg)
+
+Below you can see the orientation and coherency of the anisotropic image:
+![Orientation](images/gst_orientation.jpg)
+![Coherency](images/gst_coherency.jpg)
+
+Below you can see the segmentation result:
+![Segmentation result](images/gst_result.jpg)
+
+The result has been computed with w = 52, C_Thr = 0.43, LowThr = 35, HighThr = 57. We can see that the algorithm selected only the areas with one single direction.
+
+References
+------
+- [Structure tensor] - structure tensor description on the wikipedia
+
+<!-- invisible references list -->
+[Structure tensor]: https://en.wikipedia.org/wiki/Structure_tensor
diff --git a/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_coherency.jpg b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_coherency.jpg
new file mode 100755
index 0000000000..87d0881cfc
Binary files /dev/null and b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_coherency.jpg differ
diff --git a/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_input.jpg b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_input.jpg
new file mode 100755
index 0000000000..5fb3dfe830
Binary files /dev/null and b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_input.jpg differ
diff --git a/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_orientation.jpg b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_orientation.jpg
new file mode 100755
index 0000000000..976fb24c90
Binary files /dev/null and b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_orientation.jpg differ
diff --git a/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_result.jpg b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_result.jpg
new file mode 100755
index 0000000000..7a1e7cd672
Binary files /dev/null and b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_result.jpg differ
diff --git a/doc/tutorials/imgproc/table_of_content_imgproc.markdown b/doc/tutorials/imgproc/table_of_content_imgproc.markdown
index bea1e1b9ac..badc30d095 100644
--- a/doc/tutorials/imgproc/table_of_content_imgproc.markdown
+++ b/doc/tutorials/imgproc/table_of_content_imgproc.markdown
@@ -330,3 +330,13 @@ In this section you will learn about the image processing (manipulation) functio
     *Author:* Karpushin Vladislav
 
     You will learn how to recover an image with motion blur distortion using a Wiener filter.
+
+-   @subpage tutorial_anisotropic_image_segmentation_by_a_gst
+
+    *Languages:* C++
+
+    *Compatibility:* \> OpenCV 2.0
+
+    *Author:* Karpushin Vladislav
+
+    You will learn how to segment an anisotropic image with a single local orientation by a gradient structure tensor.
diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp
index 321c54b5c3..38264cc58f 100644
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@@ -238,6 +238,14 @@ void Mat::copyTo( OutputArray _dst ) const
 {
     CV_INSTRUMENT_REGION();
 
+#ifdef HAVE_CUDA
+    if (_dst.isGpuMat())
+    {
+        _dst.getGpuMat().upload(*this);
+        return;
+    }
+#endif
+
     int dtype = _dst.type();
     if( _dst.fixedType() && dtype != type() )
     {
diff --git a/modules/core/src/matrix_wrap.cpp b/modules/core/src/matrix_wrap.cpp
index b5b4514ada..e64d097aad 100644
--- a/modules/core/src/matrix_wrap.cpp
+++ b/modules/core/src/matrix_wrap.cpp
@@ -1146,6 +1146,10 @@ void _InputArray::copyTo(const _OutputArray& arr) const
     }
     else if( k == UMAT )
         ((UMat*)obj)->copyTo(arr);
+#ifdef HAVE_CUDA
+    else if (k == CUDA_GPU_MAT)
+        ((cuda::GpuMat*)obj)->copyTo(arr);
+#endif
     else
         CV_Error(Error::StsNotImplemented, "");
 }
@@ -1163,6 +1167,10 @@ void _InputArray::copyTo(const _OutputArray& arr, const _InputArray & mask) cons
     }
     else if( k == UMAT )
         ((UMat*)obj)->copyTo(arr, mask);
+#ifdef HAVE_CUDA
+    else if (k == CUDA_GPU_MAT)
+        ((cuda::GpuMat*)obj)->copyTo(arr, mask);
+#endif
     else
         CV_Error(Error::StsNotImplemented, "");
 }
diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp
index 248b679379..27d587b186 100644
--- a/modules/core/src/umatrix.cpp
+++ b/modules/core/src/umatrix.cpp
@@ -874,6 +874,14 @@ void UMat::copyTo(OutputArray _dst) const
 {
     CV_INSTRUMENT_REGION();
 
+#ifdef HAVE_CUDA
+    if (_dst.isGpuMat())
+    {
+        _dst.getGpuMat().upload(*this);
+        return;
+    }
+#endif
+
     int dtype = _dst.type();
     if( _dst.fixedType() && dtype != type() )
     {
diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt
index 52416731ff..1cb7c467f9 100644
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@@ -10,7 +10,7 @@ set(the_description "Deep neural network module. It allows to load models from d
 
 ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX)
 
-ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java js)
+ocv_add_module(dnn opencv_core opencv_imgproc WRAP python java js)
 
 ocv_option(OPENCV_DNN_OPENCL "Build with OpenCL support" HAVE_OPENCL AND NOT APPLE)
 
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
index cd4cca4c28..d77dd181dc 100644
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -234,7 +234,9 @@ CV__DNN_INLINE_NS_BEGIN
     {
     public:
         int type;
-        Size kernel, stride, pad;
+        Size kernel, stride;
+        int pad_l, pad_t, pad_r, pad_b;
+        CV_DEPRECATED Size pad;
         bool globalPooling;
         bool computeMaxIdx;
         String padMode;
diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index bf855011e0..b2a68b15d5 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -836,7 +836,7 @@ CV__DNN_INLINE_NS_BEGIN
      *  @returns 4-dimensional Mat with NCHW dimensions order.
      */
     CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(),
-                                   const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true,
+                                   const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
                                    int ddepth=CV_32F);
 
     /** @brief Creates 4-dimensional blob from image.
@@ -845,7 +845,7 @@ CV__DNN_INLINE_NS_BEGIN
      */
     CV_EXPORTS void blobFromImage(InputArray image, OutputArray blob, double scalefactor=1.0,
                                   const Size& size = Size(), const Scalar& mean = Scalar(),
-                                  bool swapRB=true, bool crop=true, int ddepth=CV_32F);
+                                  bool swapRB=false, bool crop=false, int ddepth=CV_32F);
 
 
     /** @brief Creates 4-dimensional blob from series of images. Optionally resizes and
@@ -866,7 +866,7 @@ CV__DNN_INLINE_NS_BEGIN
      *  @returns 4-dimensional Mat with NCHW dimensions order.
      */
     CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0,
-                                    Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true,
+                                    Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
                                     int ddepth=CV_32F);
 
     /** @brief Creates 4-dimensional blob from series of images.
@@ -875,7 +875,7 @@ CV__DNN_INLINE_NS_BEGIN
      */
     CV_EXPORTS void blobFromImages(InputArrayOfArrays images, OutputArray blob,
                                    double scalefactor=1.0, Size size = Size(),
-                                   const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true,
+                                   const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
                                    int ddepth=CV_32F);
 
     /** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure
diff --git a/modules/dnn/include/opencv2/dnn/version.hpp b/modules/dnn/include/opencv2/dnn/version.hpp
index 6d1568cb45..ff9faa0602 100644
--- a/modules/dnn/include/opencv2/dnn/version.hpp
+++ b/modules/dnn/include/opencv2/dnn/version.hpp
@@ -6,7 +6,7 @@
 #define OPENCV_DNN_VERSION_HPP
 
 /// Use with major OpenCV version only.
-#define OPENCV_DNN_API_VERSION 20180903
+#define OPENCV_DNN_API_VERSION 20180917
 
 #if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_INLINE_NS
 #define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 40719f3764..a948c6ef9d 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -64,10 +64,17 @@ public:
     BaseConvolutionLayerImpl(const LayerParams &params)
     {
         setParamsFrom(params);
-        getConvolutionKernelParams(params, kernel.height, kernel.width, pad.height,
-                                   pad.width, stride.height, stride.width, dilation.height,
+        int pad_t = 0, pad_l = 0, pad_r = 0, pad_b = 0;
+        getConvolutionKernelParams(params, kernel.height, kernel.width, pad_t,
+                                   pad_l, pad_b, pad_r, stride.height, stride.width, dilation.height,
                                    dilation.width, padMode);
 
+        if (pad_t != pad_b || pad_l != pad_r)
+            CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
+
+        pad.width = pad_l;
+        pad.height = pad_t;
+
         numOutput = params.get<int>("num_output");
         int ngroups = params.get<int>("group", 1);
 
@@ -100,8 +107,18 @@ public:
         }
 
         Size outSize = Size(outputs[0].size[3], outputs[0].size[2]);
+
+        int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width;
+
         getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize,
-                kernel, stride, padMode, dilation, pad);
+                kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r);
+
+
+        if (pad_t != pad_b || pad_l != pad_r)
+            CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
+
+        pad.width = pad_l;
+        pad.height = pad_t;
     }
 
     bool hasBias() const
@@ -1156,9 +1173,17 @@ public:
         std::vector<Mat> inputs, outputs;
         inputs_arr.getMatVector(inputs);
         outputs_arr.getMatVector(outputs);
+
+        int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width;
         getConvPoolPaddings(Size(outputs[0].size[3], outputs[0].size[2]),
                             Size(inputs[0].size[3], inputs[0].size[2]),
-                            kernel, stride, padMode, dilation, pad);
+                            kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r);
+
+        if (pad_t != pad_b || pad_l != pad_r)
+            CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
+
+        pad.width = pad_l;
+        pad.height = pad_t;
     }
 
     class MatMulInvoker : public ParallelLoopBody
diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp
index bf5834c864..2dbb12109d 100644
--- a/modules/dnn/src/layers/layers_common.cpp
+++ b/modules/dnn/src/layers/layers_common.cpp
@@ -118,9 +118,19 @@ void getKernelSize(const LayerParams &params, int &kernelH, int &kernelW)
     CV_Assert(kernelH > 0 && kernelW > 0);
 }
 
-void getStrideAndPadding(const LayerParams &params, int &padH, int &padW, int &strideH, int &strideW, cv::String& padMode)
+void getStrideAndPadding(const LayerParams &params, int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode)
 {
-    util::getParameter(params, "pad", "pad", padH, padW, true, 0);
+    if (params.has("pad_l") && params.has("pad_t") && params.has("pad_r") && params.has("pad_b")) {
+        padT = params.get<int>("pad_t");
+        padL = params.get<int>("pad_l");
+        padB = params.get<int>("pad_b");
+        padR = params.get<int>("pad_r");
+    }
+    else {
+        util::getParameter(params, "pad", "pad", padT, padL, true, 0);
+        padB = padT;
+        padR = padL;
+    }
     util::getParameter(params, "stride", "stride", strideH, strideW, true, 1);
 
     padMode = "";
@@ -129,15 +139,15 @@ void getStrideAndPadding(const LayerParams &params, int &padH, int &padW, int &s
         padMode = params.get<String>("pad_mode");
     }
 
-    CV_Assert(padH >= 0 && padW >= 0 && strideH > 0 && strideW > 0);
+    CV_Assert(padT >= 0 && padL >= 0 && padB >= 0 && padR >= 0 && strideH > 0 && strideW > 0);
 }
 }
 
 
 void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling,
-                            int &padH, int &padW, int &strideH, int &strideW, cv::String &padMode)
+                            int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String &padMode)
 {
-    util::getStrideAndPadding(params, padH, padW, strideH, strideW, padMode);
+    util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode);
 
     globalPooling = params.has("global_pooling") &&
                     params.get<bool>("global_pooling");
@@ -148,9 +158,9 @@ void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernel
         {
             CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified");
         }
-        if(padH != 0 || padW != 0 || strideH != 1 || strideW != 1)
+        if(padT != 0 || padL != 0 || padB != 0 || padR != 0 || strideH != 1 || strideW != 1)
         {
-            CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pad_h and pad_w must be = 0, and stride_h and stride_w must be = 1");
+            CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0, and stride_h and stride_w must be = 1");
         }
     }
     else
@@ -159,12 +169,11 @@ void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernel
     }
 }
 
-void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW,
+void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR,
                                 int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String &padMode)
 {
     util::getKernelSize(params, kernelH, kernelW);
-    util::getStrideAndPadding(params, padH, padW, strideH, strideW, padMode);
-
+    util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode);
     util::getParameter(params, "dilation", "dilation", dilationH, dilationW, true, 1);
 
     CV_Assert(dilationH > 0 && dilationW > 0);
@@ -201,11 +210,11 @@ void getConvPoolOutParams(const Size& inp, const Size &kernel,
 
 void getConvPoolPaddings(const Size& inp, const Size& out,
                          const Size &kernel, const Size &stride,
-                         const String &padMode, const Size &dilation, Size &pad)
+                         const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR)
 {
     if (padMode == "VALID")
     {
-        pad = cv::Size(0,0);
+        padT = padL = padB = padR = 0;
     }
     else if (padMode == "SAME")
     {
@@ -213,7 +222,8 @@ void getConvPoolPaddings(const Size& inp, const Size& out,
         int Pw = std::max(0, (out.width - 1) * stride.width + (dilation.width * (kernel.width - 1) + 1) - inp.width);
         // For odd values of total padding, add more padding at the 'right'
         // side of the given dimension.
-        pad = cv::Size(Pw / 2, Ph / 2);
+        padT= padB = Ph / 2;
+        padL = padR = Pw / 2;
     }
 }
 
diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp
index 4bb4c317e4..7fce183d6e 100644
--- a/modules/dnn/src/layers/layers_common.hpp
+++ b/modules/dnn/src/layers/layers_common.hpp
@@ -60,19 +60,20 @@ namespace cv
 namespace dnn
 {
 
-void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW,
+void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR,
                                 int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String& padMode);
 
 void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling,
-                            int &padH, int &padW, int &strideH, int &strideW, cv::String& padMode);
+                            int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode);
 
 void getConvPoolOutParams(const Size& inp, const Size &kernel,
                           const Size &stride, const String &padMode,
                           const Size &dilation, Size& out);
 
+
 void getConvPoolPaddings(const Size& inp, const Size& out,
                          const Size &kernel, const Size &stride,
-                         const String &padMode, const Size &dilation, Size &pad);
+                         const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR);
 
 }
 }
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
index a7464217c7..0b4b0ae850 100644
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -85,8 +85,12 @@ public:
                 type = STOCHASTIC;
             else
                 CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
+
             getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
-                                   pad.height, pad.width, stride.height, stride.width, padMode);
+                                   pad_t, pad_l, pad_b, pad_r, stride.height, stride.width, padMode);
+
+            pad.width = pad_l;
+            pad.height = pad_t;
         }
         else if (params.has("pooled_w") || params.has("pooled_h"))
         {
@@ -130,7 +134,9 @@ public:
             kernel = inp;
         }
 
-        getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad);
+        getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad_t, pad_l, pad_b, pad_r);
+        pad.width = pad_l;
+        pad.height = pad_t;
 
 #ifdef HAVE_OPENCL
         poolOp.release();
@@ -149,7 +155,7 @@ public:
         else
             return backendId == DNN_BACKEND_OPENCV ||
                    backendId == DNN_BACKEND_HALIDE && haveHalide() &&
-                   (type == MAX || type == AVE && !pad.width && !pad.height);
+                   (type == MAX || type == AVE && !pad_t && !pad_l && !pad_b && !pad_r);
     }
 
 #ifdef HAVE_OPENCL
@@ -169,7 +175,10 @@ public:
             config.in_shape = shape(inputs[0]);
             config.out_shape = shape(outputs[0]);
             config.kernel = kernel;
-            config.pad = pad;
+            config.pad_l = pad_l;
+            config.pad_t = pad_t;
+            config.pad_r = pad_r;
+            config.pad_b = pad_b;
             config.stride = stride;
             config.channels = inputs[0].size[1];
             config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :
@@ -193,7 +202,6 @@ public:
             if (!poolOp->Forward(inpMat, outMat, maskMat))
                 return false;
         }
-
         return true;
     }
 #endif
@@ -264,8 +272,10 @@ public:
             poolLayer->_kernel_y = kernel.height;
             poolLayer->_stride_x = stride.width;
             poolLayer->_stride_y = stride.height;
-            poolLayer->_padding_x = pad.width;
-            poolLayer->_padding_y = pad.height;
+            poolLayer->_padding_x = pad_l;
+            poolLayer->_padding_y = pad_t;
+            poolLayer->params["pad-r"] = format("%d", pad_r);
+            poolLayer->params["pad-b"] = format("%d", pad_b);
             poolLayer->_exclude_pad = type == AVE && padMode == "SAME";
             poolLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor";
             poolLayer->_type = type == MAX ? InferenceEngine::PoolingLayer::PoolType::MAX :
@@ -296,12 +306,14 @@ public:
         return Ptr<BackendNode>();
     }
 
+
     class PoolingInvoker : public ParallelLoopBody
     {
     public:
         const Mat* src, *rois;
         Mat *dst, *mask;
-        Size kernel, stride, pad;
+        Size kernel, stride;
+        int pad_l, pad_t, pad_r, pad_b;
         bool avePoolPaddedArea;
         int nstripes;
         bool computeMaxIdx;
@@ -313,7 +325,7 @@ public:
                            computeMaxIdx(0), poolingType(MAX), spatialScale(0) {}
 
         static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel,
-                        Size stride, Size pad, bool avePoolPaddedArea, int poolingType, float spatialScale,
+                        Size stride, int pad_l, int pad_t, int pad_r, int pad_b, bool avePoolPaddedArea, int poolingType, float spatialScale,
                         bool computeMaxIdx, int nstripes)
         {
             CV_Assert_N(
@@ -332,7 +344,10 @@ public:
             p.mask = &mask;
             p.kernel = kernel;
             p.stride = stride;
-            p.pad = pad;
+            p.pad_l = pad_l;
+            p.pad_t = pad_t;
+            p.pad_r = pad_r;
+            p.pad_b = pad_b;
             p.avePoolPaddedArea = avePoolPaddedArea;
             p.nstripes = nstripes;
             p.computeMaxIdx = computeMaxIdx;
@@ -359,7 +374,6 @@ public:
             size_t stripeStart = r.start*stripeSize;
             size_t stripeEnd = std::min(r.end*stripeSize, total);
             int kernel_w = kernel.width, kernel_h = kernel.height;
-            int pad_w = pad.width, pad_h = pad.height;
             int stride_w = stride.width, stride_h = stride.height;
             bool compMaxIdx = computeMaxIdx;
 
@@ -411,8 +425,8 @@ public:
                 }
                 else
                 {
-                    ystart = y0 * stride_h - pad_h;
-                    yend = min(ystart + kernel_h, inp_height + pad_h);
+                    ystart = y0 * stride_h - pad_t;
+                    yend = min(ystart + kernel_h, inp_height + pad_b);
                     srcData = src->ptr<float>(n, c);
                 }
                 int ydelta = yend - ystart;
@@ -428,7 +442,7 @@ public:
                 if( poolingType == MAX)
                     for( ; x0 < x1; x0++ )
                     {
-                        int xstart = x0 * stride_w - pad_w;
+                        int xstart = x0 * stride_w - pad_l;
                         int xend = min(xstart + kernel_w, inp_width);
                         xstart = max(xstart, 0);
                         if (xstart >= xend || ystart >= yend)
@@ -439,7 +453,7 @@ public:
                             continue;
                         }
 #if CV_SIMD128
-                        if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_w + kernel_w < inp_width )
+                        if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width )
                         {
                             if( compMaxIdx )
                             {
@@ -578,15 +592,15 @@ public:
                 {
                     for( ; x0 < x1; x0++ )
                     {
-                        int xstart = x0 * stride_w - pad_w;
-                        int xend = min(xstart + kernel_w, inp_width + pad_w);
+                        int xstart = x0 * stride_w - pad_l;
+                        int xend = min(xstart + kernel_w, inp_width + pad_r);
                         int xdelta = xend - xstart;
                         xstart = max(xstart, 0);
                         xend = min(xend, inp_width);
                         float inv_kernel_area = avePoolPaddedArea ? xdelta * ydelta : ((yend - ystart) * (xend - xstart));
                         inv_kernel_area = 1.0 / inv_kernel_area;
 #if CV_SIMD128
-                        if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_w + kernel_w < inp_width )
+                        if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width )
                         {
                             v_float32x4 sum_val0 = v_setzero_f32(), sum_val1 = v_setzero_f32();
                             v_float32x4 ikarea = v_setall_f32(inv_kernel_area);
@@ -695,21 +709,21 @@ public:
     {
         const int nstripes = getNumThreads();
         Mat rois;
-        PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
+        PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b,  avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
     }
 
     void avePooling(Mat &src, Mat &dst)
     {
         const int nstripes = getNumThreads();
         Mat rois, mask;
-        PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
+        PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
     }
 
     void roiPooling(const Mat &src, const Mat &rois, Mat &dst)
     {
         const int nstripes = getNumThreads();
         Mat mask;
-        PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
+        PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
     }
 
     virtual Ptr<BackendNode> initMaxPoolingHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
@@ -723,10 +737,10 @@ public:
         Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
         Halide::RDom r(0, kernel.width, 0, kernel.height);
         Halide::Expr kx, ky;
-        if (pad.width || pad.height)
+        if(pad_l || pad_t)
         {
-            kx = clamp(x * stride.width + r.x - pad.width, 0, inWidth - 1);
-            ky = clamp(y * stride.height + r.y - pad.height, 0, inHeight - 1);
+            kx = clamp(x * stride.width + r.x - pad_l, 0, inWidth - 1);
+            ky = clamp(y * stride.height + r.y - pad_t, 0, inHeight - 1);
         }
         else
         {
@@ -739,11 +753,11 @@ public:
 
         // Compute offset from argmax in range [0, kernel_size).
         Halide::Expr max_index;
-        if (pad.width || pad.height)
+        if(pad_l || pad_t)
         {
-            max_index = clamp(y * stride.height + res[1] - pad.height,
+            max_index = clamp(y * stride.height + res[1] - pad_t,
                               0, inHeight - 1) * inWidth +
-                        clamp(x * stride.width + res[0] - pad.width,
+                        clamp(x * stride.width + res[0] - pad_l,
                               0, inWidth - 1);
         }
         else
@@ -852,21 +866,21 @@ public:
         }
         else if (padMode.empty())
         {
-            float height = (float)(in.height + 2 * pad.height - kernel.height) / stride.height;
-            float width = (float)(in.width + 2 * pad.width - kernel.width) / stride.width;
+            float height = (float)(in.height + pad_t + pad_b - kernel.height) / stride.height;
+            float width = (float)(in.width + pad_l + pad_r - kernel.width) / stride.width;
             out.height = 1 + (ceilMode ? ceil(height) : floor(height));
             out.width = 1 + (ceilMode ? ceil(width) : floor(width));
 
-            if (pad.height || pad.width)
+            if (pad_r || pad_b)
             {
                 // If we have padding, ensure that the last pooling starts strictly
                 // inside the image (instead of at the padding); otherwise clip the last.
-                if ((out.height - 1) * stride.height >= in.height + pad.height)
+                if ((out.height - 1) * stride.height >= in.height + pad_b)
                     --out.height;
-                if ((out.width - 1) * stride.width >= in.width + pad.width)
+                if ((out.width - 1) * stride.width >= in.width + pad_r)
                     --out.width;
-                CV_Assert((out.height - 1) * stride.height < in.height + pad.height);
-                CV_Assert((out.width - 1) * stride.width < in.width + pad.width);
+                CV_Assert((out.height - 1) * stride.height < in.height + pad_b);
+                CV_Assert((out.width - 1) * stride.width < in.width + pad_r);
             }
         }
         else
@@ -888,6 +902,7 @@ public:
             dims[1] = psRoiOutChannels;
         }
         outputs.assign(type == MAX ? 2 : 1, shape(dims, 4));
+
         return false;
     }
 
diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
index e0ca5ca98c..eda2e837c0 100644
--- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
+++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
@@ -345,7 +345,7 @@ struct OCL4DNNPoolConfig
 {
     OCL4DNNPoolConfig() :
         kernel(1, 1),
-        pad(0, 0),
+        pad_l(0), pad_t(0), pad_r(0), pad_b(0),
         stride(1, 1),
         dilation(1, 1),
         channels(0),
@@ -358,7 +358,7 @@ struct OCL4DNNPoolConfig
     MatShape in_shape;
     MatShape out_shape;
     Size kernel;
-    Size pad;
+    int pad_l, pad_t, pad_r, pad_b;
     Size stride;
     Size dilation;
 
@@ -381,7 +381,6 @@ class OCL4DNNPool
                      UMat& top_mask);
     private:
         // Pooling parameters
-        std::vector<int32_t> pad_;
         std::vector<int32_t> stride_;
         std::vector<int32_t> kernel_shape_;
         std::vector<int32_t> im_in_shape_;
@@ -394,8 +393,10 @@ class OCL4DNNPool
         int32_t kernel_w_;
         int32_t stride_h_;
         int32_t stride_w_;
-        int32_t pad_h_;
-        int32_t pad_w_;
+        int32_t pad_t_;
+        int32_t pad_l_;
+        int32_t pad_b_;
+        int32_t pad_r_;
         int32_t height_;
         int32_t width_;
         int32_t pooled_height_;
diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
index 77cd3a6337..47b40cc6c2 100644
--- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
+++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
@@ -62,7 +62,6 @@ OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
     for (int i = 0; i < spatial_dims; ++i)
     {
         kernel_shape_.push_back(i == 0 ? config.kernel.height : config.kernel.width);
-        pad_.push_back(i == 0 ? config.pad.height : config.pad.width);
         stride_.push_back(i == 0 ? config.stride.height : config.stride.width);
         im_in_shape_.push_back(config.in_shape[dims - spatial_dims + i]);
         im_out_shape_.push_back(config.out_shape[dims - spatial_dims + i]);
@@ -72,8 +71,10 @@ OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
     kernel_w_ = kernel_shape_[1];
     stride_h_ = stride_[0];
     stride_w_ = stride_[1];
-    pad_h_ = pad_[0];
-    pad_w_ = pad_[1];
+    pad_t_ = config.pad_t;
+    pad_l_ = config.pad_l;
+    pad_r_ = config.pad_r;
+    pad_b_ = config.pad_b;
     height_ = im_in_shape_[0];
     width_ = im_in_shape_[1];
     pooled_height_ = im_out_shape_[0];
@@ -113,14 +114,13 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
                 ocl::dnn::ocl4dnn_pooling_oclsrc,
                 format(" -D Dtype=%s -D KERNEL_MAX_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
                        " -D STRIDE_W=%d -D STRIDE_H=%d"
-                       " -D PAD_W=%d -D PAD_H=%d%s",
+                       " -D PAD_L=%d -D PAD_T=%d -D PAD_R=%d -D PAD_B=%d%s",
                        (use_half) ? "half" : "float",
                        kernel_w_, kernel_h_,
                        stride_w_, stride_h_,
-                       pad_w_, pad_h_,
+                       pad_l_, pad_t_, pad_r_, pad_b_,
                        computeMaxIdx ? " -D HAVE_MASK=1" : ""
                 ));
-
             if (oclk_max_pool_forward.empty())
                 return false;
 
@@ -150,11 +150,11 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
                 ocl::dnn::ocl4dnn_pooling_oclsrc,
                 format(" -D Dtype=%s -D KERNEL_AVE_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
                        " -D STRIDE_W=%d -D STRIDE_H=%d"
-                       " -D PAD_W=%d -D PAD_H=%d%s",
+                       " -D PAD_L=%d -D PAD_T=%d -D PAD_R=%d -D PAD_B=%d%s",
                        (use_half) ? "half" : "float",
                        kernel_w_, kernel_h_,
                        stride_w_, stride_h_,
-                       pad_w_, pad_h_,
+                       pad_l_, pad_t_, pad_r_, pad_b_,
                        avePoolPaddedArea ? " -D AVE_POOL_PADDING_AREA" : ""
                 ));
 
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index 44d6b41a64..3e58af911d 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -174,9 +174,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
         else if(attribute_name == "pads")
         {
             CV_Assert(attribute_proto.ints_size() == 4);
-            lp.set("pad_h", saturate_cast<int32_t>(attribute_proto.ints(0)));
-            lp.set("pad_w", saturate_cast<int32_t>(attribute_proto.ints(1)));
-            // push pad_b and pad_r for compute ceil_mode
+            lp.set("pad_t", saturate_cast<int32_t>(attribute_proto.ints(0)));
+            lp.set("pad_l", saturate_cast<int32_t>(attribute_proto.ints(1)));
             lp.set("pad_b", saturate_cast<int32_t>(attribute_proto.ints(2)));
             lp.set("pad_r", saturate_cast<int32_t>(attribute_proto.ints(3)));
         }
@@ -306,6 +305,7 @@ void ONNXImporter::populateNet(Net dstNet)
         std::string layer_type = node_proto.op_type();
         layerParams.type = layer_type;
 
+
         if (layer_type == "MaxPool")
         {
             layerParams.type = "Pooling";
@@ -551,7 +551,6 @@ void ONNXImporter::populateNet(Net dstNet)
 
          for (int j = 0; j < node_proto.input_size(); j++) {
              layerId = layer_id.find(node_proto.input(j));
-
              if (layerId != layer_id.end()) {
                  dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j);
              }
diff --git a/modules/dnn/src/opencl/ocl4dnn_pooling.cl b/modules/dnn/src/opencl/ocl4dnn_pooling.cl
index 77d2e5ba33..53c61e4bd2 100644
--- a/modules/dnn/src/opencl/ocl4dnn_pooling.cl
+++ b/modules/dnn/src/opencl/ocl4dnn_pooling.cl
@@ -73,8 +73,8 @@ __kernel void
   const int xx = index / pooled_width;
   const int ph = xx % pooled_height;
   const int ch = xx / pooled_height;
-  int hstart = ph * STRIDE_H - PAD_H;
-  int wstart = pw * STRIDE_W - PAD_W;
+  int hstart = ph * STRIDE_H - PAD_T;
+  int wstart = pw * STRIDE_W - PAD_L;
   Dtype maxval = -FLT_MAX;
   int maxidx = -1;
   int in_offset = ch * height * width;
@@ -117,10 +117,10 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)(
   const int xx = index / pooled_width;
   const int ph = xx % pooled_height;
   const int ch = xx / pooled_height;
-  int hstart = ph * STRIDE_H - PAD_H;
-  int wstart = pw * STRIDE_W - PAD_W;
-  int hend = min(hstart + KERNEL_H, height + PAD_H);
-  int wend = min(wstart + KERNEL_W, width + PAD_W);
+  int hstart = ph * STRIDE_H - PAD_T;
+  int wstart = pw * STRIDE_W - PAD_L;
+  int hend = min(hstart + KERNEL_H, height + PAD_B);
+  int wend = min(wstart + KERNEL_W, width + PAD_R);
   int pool_size;
 #ifdef AVE_POOL_PADDING_AREA
   pool_size = (hend - hstart) * (wend - wstart);
diff --git a/modules/dnn/src/opencl/pooling.cl b/modules/dnn/src/opencl/pooling.cl
index adfd59e6d9..2a92cb2f01 100644
--- a/modules/dnn/src/opencl/pooling.cl
+++ b/modules/dnn/src/opencl/pooling.cl
@@ -27,7 +27,7 @@
 __kernel void MaxPoolForward(const int nthreads,
     __global T* bottom_data, const int num, const int channels, const int height, const int width,
     const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w,
-    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w, const int pad_t, const int pad_l, const int pad_b, const int pad_r,
     __global T* top_data
 #ifdef MASK
     , __global float* mask
@@ -41,8 +41,8 @@ __kernel void MaxPoolForward(const int nthreads,
     int ph = (index / pooled_width) % pooled_height;
     int c = (index / pooled_width / pooled_height) % channels;
     int n = index / pooled_width / pooled_height / channels;
-    int hstart = ph * stride_h - pad_h;
-    int wstart = pw * stride_w - pad_w;
+    int hstart = ph * stride_h - pad_t;
+    int wstart = pw * stride_w - pad_l;
     const int hend = min(hstart + kernel_h, height);
     const int wend = min(wstart + kernel_w, width);
     hstart = max(hstart, 0);
@@ -71,7 +71,7 @@ __kernel void MaxPoolForward(const int nthreads,
 __kernel void AvePoolForward(const int nthreads,
     __global T* bottom_data, const int num, const int channels, const int height, const int width,
     const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w,
-    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w, const int pad_t, const int pad_l, const int pad_b, const int pad_r,
     __global T* top_data
 #ifdef MASK
     , __global float* mask // NOT USED
@@ -84,9 +84,9 @@ __kernel void AvePoolForward(const int nthreads,
     int pw = index % pooled_width;
     int ph = (index / pooled_width) % pooled_height;
     int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels; int hstart = ph * stride_h - pad_h; int wstart = pw * stride_w - pad_w;
-    int hend = min(hstart + kernel_h, height + pad_h);
-    int wend = min(wstart + kernel_w, width + pad_w);
+    int n = index / pooled_width / pooled_height / channels; int hstart = ph * stride_h - pad_t; int wstart = pw * stride_w - pad_l;
+    int hend = min(hstart + kernel_h, height + pad_b);
+    int wend = min(wstart + kernel_w, width + pad_r);
     const int pool_size = (hend - hstart) * (wend - wstart);
     hstart = max(hstart, 0);
     wstart = max(wstart, 0);
diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp
index 4f0041eeb6..6c19093805 100644
--- a/modules/dnn/src/torch/torch_importer.cpp
+++ b/modules/dnn/src/torch/torch_importer.cpp
@@ -74,6 +74,18 @@ enum LuaType
     LEGACY_TYPE_RECUR_FUNCTION = 7
 };
 
+// We use OpenCV's types to manage CV_ELEM_SIZE.
+enum TorchType
+{
+    TYPE_DOUBLE = CV_64F,
+    TYPE_FLOAT  = CV_32F,
+    TYPE_BYTE   = CV_8U,
+    TYPE_CHAR   = CV_8S,
+    TYPE_SHORT  = CV_16S,
+    TYPE_INT    = CV_32S,
+    TYPE_LONG   = CV_32SC2
+};
+
 template<typename T>
 static String toString(const T &v)
 {
@@ -203,19 +215,19 @@ struct TorchImporter
            String typeStr = str.substr(strlen(prefix), str.length() - strlen(prefix) - strlen(suffix));
 
            if (typeStr == "Double")
-               return CV_64F;
+               return TYPE_DOUBLE;
            else if (typeStr == "Float" || typeStr == "Cuda")
-               return CV_32F;
+               return TYPE_FLOAT;
            else if (typeStr == "Byte")
-               return CV_8U;
+               return TYPE_BYTE;
            else if (typeStr == "Char")
-               return CV_8S;
+               return TYPE_CHAR;
            else if (typeStr == "Short")
-               return CV_16S;
+               return TYPE_SHORT;
            else if (typeStr == "Int")
-               return CV_32S;
-           else if (typeStr == "Long") //Carefully! CV_64S type coded as CV_USRTYPE1
-               return CV_USRTYPE1;
+               return TYPE_INT;
+           else if (typeStr == "Long")
+               return TYPE_LONG;
            else
                CV_Error(Error::StsNotImplemented, "Unknown type \"" + typeStr + "\" of torch class \"" + str + "\"");
         }
@@ -236,36 +248,44 @@ struct TorchImporter
     void readTorchStorage(int index, int type = -1)
     {
         long size = readLong();
-        Mat storageMat(1, size, (type != CV_USRTYPE1) ? type : CV_64F); //handle LongStorage as CV_64F Mat
+        Mat storageMat;
 
         switch (type)
         {
-        case CV_32F:
+        case TYPE_FLOAT:
+            storageMat.create(1, size, CV_32F);
             THFile_readFloatRaw(file, (float*)storageMat.data, size);
             break;
-        case CV_64F:
+        case TYPE_DOUBLE:
+            storageMat.create(1, size, CV_64F);
             THFile_readDoubleRaw(file, (double*)storageMat.data, size);
             break;
-        case CV_8S:
-        case CV_8U:
+        case TYPE_CHAR:
+            storageMat.create(1, size, CV_8S);
             THFile_readByteRaw(file, (uchar*)storageMat.data, size);
             break;
-        case CV_16S:
-        case CV_16U:
+        case TYPE_BYTE:
+            storageMat.create(1, size, CV_8U);
+            THFile_readByteRaw(file, (uchar*)storageMat.data, size);
+            break;
+        case TYPE_SHORT:
+            storageMat.create(1, size, CV_16S);
             THFile_readShortRaw(file, (short*)storageMat.data, size);
             break;
-        case CV_32S:
+        case TYPE_INT:
+            storageMat.create(1, size, CV_32S);
             THFile_readIntRaw(file, (int*)storageMat.data, size);
             break;
-        case CV_USRTYPE1:
+        case TYPE_LONG:
         {
+            storageMat.create(1, size, CV_64F);   //handle LongStorage as CV_64F Mat
             double *buf = storageMat.ptr<double>();
             THFile_readLongRaw(file, (int64*)buf, size);
 
             for (size_t i = (size_t)size; i-- > 0; )
                 buf[i] = ((int64*)buf)[i];
-        }
             break;
+        }
         default:
             CV_Error(Error::StsInternal, "");
             break;
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index 85ff7ace21..b6da2f189c 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -307,7 +307,7 @@ TEST_P(Reproducibility_SqueezeNet_v1_1, Accuracy)
     net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(targetId);
 
-    Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(227,227), Scalar(), false);
+    Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(227,227), Scalar(), false, true);
     ASSERT_TRUE(!input.empty());
 
     Mat out;
@@ -403,7 +403,7 @@ TEST_P(Test_Caffe_nets, DenseNet_121)
     const string model = findDataFile("dnn/DenseNet_121.caffemodel", false);
 
     Mat inp = imread(_tf("dog416.png"));
-    inp = blobFromImage(inp, 1.0 / 255, Size(224, 224));
+    inp = blobFromImage(inp, 1.0 / 255, Size(224, 224), Scalar(), true, true);
     Mat ref = blobFromNPY(_tf("densenet_121_output.npy"));
 
     Net net = readNetFromCaffe(proto, model);
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 8d53b63eab..85405803d6 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -346,6 +346,10 @@ TEST_P(Test_ONNX_nets, DenseNet121)
     testONNXModels("densenet121", pb, l1, lInf);
 }
 
+TEST_P(Test_ONNX_nets, Inception_v1)
+{
+    testONNXModels("inception_v1", pb);
+}
 
 INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets());
 
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index b05d1f5440..b10c1388f3 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -62,8 +62,7 @@ TEST(Test_TensorFlow, inception_accuracy)
 
     Mat sample = imread(_tf("grace_hopper_227.png"));
     ASSERT_TRUE(!sample.empty());
-    resize(sample, sample, Size(224, 224));
-    Mat inputBlob = blobFromImage(sample);
+    Mat inputBlob = blobFromImage(sample, 1.0, Size(224, 224), Scalar(), /*swapRB*/true);
 
     net.setInput(inputBlob, "input");
     Mat out = net.forward("softmax2");
diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp
index bd5f11249d..dd7d975af6 100644
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@@ -278,7 +278,7 @@ TEST_P(Test_Torch_nets, OpenFace_accuracy)
     sampleF32 /= 255;
     resize(sampleF32, sampleF32, Size(96, 96), 0, 0, INTER_NEAREST);
 
-    Mat inputBlob = blobFromImage(sampleF32);
+    Mat inputBlob = blobFromImage(sampleF32, 1.0, Size(), Scalar(), /*swapRB*/true);
 
     net.setInput(inputBlob);
     Mat out = net.forward();
@@ -305,7 +305,7 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
     net.setPreferableTarget(target);
 
     Mat sample = imread(_tf("street.png", false));
-    Mat inputBlob = blobFromImage(sample, 1./255);
+    Mat inputBlob = blobFromImage(sample, 1./255, Size(), Scalar(), /*swapRB*/true);
 
     net.setInput(inputBlob, "");
     Mat out = net.forward();
diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp
index e236e08adb..7f3d1515e5 100644
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@@ -1987,10 +1987,10 @@ transform.
 
 @param image 8-bit, single-channel binary source image. The image may be modified by the function.
 @param lines Output vector of lines. Each line is represented by a 2 or 3 element vector
-\f$(\rho, \theta)\f$ or \f$(\rho, \theta, \votes)\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of
+\f$(\rho, \theta)\f$ or \f$(\rho, \theta, \textrm{votes})\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of
 the image). \f$\theta\f$ is the line rotation angle in radians (
 \f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ).
-\f$\votes\f$ is the value of accumulator.
+\f$\textrm{votes}\f$ is the value of accumulator.
 @param rho Distance resolution of the accumulator in pixels.
 @param theta Angle resolution of the accumulator in radians.
 @param threshold Accumulator threshold parameter. Only those lines are returned that get enough
diff --git a/modules/python/test/test_dnn.py b/modules/python/test/test_dnn.py
new file mode 100644
index 0000000000..a1b55f4358
--- /dev/null
+++ b/modules/python/test/test_dnn.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python
+import os
+import cv2 as cv
+import numpy as np
+
+from tests_common import NewOpenCVTests, unittest
+
+def normAssert(test, a, b, lInf=1e-5):
+    test.assertLess(np.max(np.abs(a - b)), lInf)
+
+def inter_area(box1, box2):
+    x_min, x_max = max(box1[0], box2[0]), min(box1[2], box2[2])
+    y_min, y_max = max(box1[1], box2[1]), min(box1[3], box2[3])
+    return (x_max - x_min) * (y_max - y_min)
+
+def area(box):
+    return (box[2] - box[0]) * (box[3] - box[1])
+
+def box2str(box):
+    left, top = box[0], box[1]
+    width, height = box[2] - left, box[3] - top
+    return '[%f x %f from (%f, %f)]' % (width, height, left, top)
+
+def normAssertDetections(test, ref, out, confThreshold=0.0, scores_diff=1e-5, boxes_iou_diff=1e-4):
+    ref = np.array(ref, np.float32)
+    refClassIds, testClassIds = ref[:, 1], out[:, 1]
+    refScores, testScores = ref[:, 2], out[:, 2]
+    refBoxes, testBoxes = ref[:, 3:], out[:, 3:]
+
+    matchedRefBoxes = [False] * len(refBoxes)
+    errMsg = ''
+    for i in range(len(refBoxes)):
+        testScore = testScores[i]
+        if testScore < confThreshold:
+            continue
+
+        testClassId, testBox = testClassIds[i], testBoxes[i]
+        matched = False
+        for j in range(len(refBoxes)):
+            if (not matchedRefBoxes[j]) and testClassId == refClassIds[j] and \
+               abs(testScore - refScores[j]) < scores_diff:
+                interArea = inter_area(testBox, refBoxes[j])
+                iou = interArea / (area(testBox) + area(refBoxes[j]) - interArea)
+                if abs(iou - 1.0) < boxes_iou_diff:
+                    matched = True
+                    matchedRefBoxes[j] = True
+        if not matched:
+            errMsg += '\nUnmatched prediction: class %d score %f box %s' % (testClassId, testScore, box2str(testBox))
+
+    for i in range(len(refBoxes)):
+        if (not matchedRefBoxes[i]) and refScores[i] > confThreshold:
+            errMsg += '\nUnmatched reference: class %d score %f box %s' % (refClassIds[i], refScores[i], box2str(refBoxes[i]))
+    if errMsg:
+        test.fail(errMsg)
+
+
+# Returns a simple one-layer network created from Caffe's format
+def getSimpleNet():
+    prototxt = """
+        name: "simpleNet"
+        input: "data"
+        layer {
+          type: "Identity"
+          name: "testLayer"
+          top: "testLayer"
+          bottom: "data"
+        }
+    """
+    return cv.dnn.readNetFromCaffe(bytearray(prototxt, 'utf8'))
+
+
+def testBackendAndTarget(backend, target):
+    net = getSimpleNet()
+    net.setPreferableBackend(backend)
+    net.setPreferableTarget(target)
+    inp = np.random.standard_normal([1, 2, 3, 4]).astype(np.float32)
+    try:
+        net.setInput(inp)
+        net.forward()
+    except BaseException as e:
+        return False
+    return True
+
+
+haveInfEngine = testBackendAndTarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU)
+dnnBackendsAndTargets = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+]
+
+if haveInfEngine:
+    dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU])
+    if testBackendAndTarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD):
+        dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD])
+
+if cv.ocl.haveOpenCL() and cv.ocl.useOpenCL():
+    dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL])
+    dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL_FP16])
+    if haveInfEngine:  # FIXIT Check Intel iGPU only
+        dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL])
+        dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16])
+
+
+def printParams(backend, target):
+    backendNames = {
+        cv.dnn.DNN_BACKEND_OPENCV: 'OCV',
+        cv.dnn.DNN_BACKEND_INFERENCE_ENGINE: 'DLIE'
+    }
+    targetNames = {
+        cv.dnn.DNN_TARGET_CPU: 'CPU',
+        cv.dnn.DNN_TARGET_OPENCL: 'OCL',
+        cv.dnn.DNN_TARGET_OPENCL_FP16: 'OCL_FP16',
+        cv.dnn.DNN_TARGET_MYRIAD: 'MYRIAD'
+    }
+    print('%s/%s' % (backendNames[backend], targetNames[target]))
+
+
+class dnn_test(NewOpenCVTests):
+
+    def find_dnn_file(self, filename, required=True):
+        return self.find_file(filename, [os.environ.get('OPENCV_DNN_TEST_DATA_PATH', os.getcwd())], required=required)
+
+    def test_blobFromImage(self):
+        np.random.seed(324)
+
+        width = 6
+        height = 7
+        scale = 1.0/127.5
+        mean = (10, 20, 30)
+
+        # Test arguments names.
+        img = np.random.randint(0, 255, [4, 5, 3]).astype(np.uint8)
+        blob = cv.dnn.blobFromImage(img, scale, (width, height), mean, True, False)
+        blob_args = cv.dnn.blobFromImage(img, scalefactor=scale, size=(width, height),
+                                         mean=mean, swapRB=True, crop=False)
+        normAssert(self, blob, blob_args)
+
+        # Test values.
+        target = cv.resize(img, (width, height), interpolation=cv.INTER_LINEAR)
+        target = target.astype(np.float32)
+        target = target[:,:,[2, 1, 0]]  # BGR2RGB
+        target[:,:,0] -= mean[0]
+        target[:,:,1] -= mean[1]
+        target[:,:,2] -= mean[2]
+        target *= scale
+        target = target.transpose(2, 0, 1).reshape(1, 3, height, width)  # to NCHW
+        normAssert(self, blob, target)
+
+
+    def test_face_detection(self):
+        testdata_required = bool(os.environ.get('OPENCV_DNN_TEST_REQUIRE_TESTDATA', False))
+        proto = self.find_dnn_file('dnn/opencv_face_detector.prototxt2', required=testdata_required)
+        model = self.find_dnn_file('dnn/opencv_face_detector.caffemodel', required=testdata_required)
+        if proto is None or model is None:
+            raise unittest.SkipTest("Missing DNN test files (dnn/opencv_face_detector.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
+
+        img = self.get_sample('gpu/lbpcascade/er.png')
+        blob = cv.dnn.blobFromImage(img, mean=(104, 177, 123), swapRB=False, crop=False)
+
+        ref = [[0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631],
+               [0, 1, 0.9934696,  0.2831718,  0.50738752, 0.345781,   0.5985168],
+               [0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290],
+               [0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477],
+               [0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494],
+               [0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427,  0.5347801]]
+
+        print('\n')
+        for backend, target in dnnBackendsAndTargets:
+            printParams(backend, target)
+
+            net = cv.dnn.readNet(proto, model)
+            net.setPreferableBackend(backend)
+            net.setPreferableTarget(target)
+            net.setInput(blob)
+            out = net.forward().reshape(-1, 7)
+
+            scoresDiff = 4e-3 if target in [cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD] else 1e-5
+            iouDiff = 2e-2 if target in [cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD] else 1e-4
+
+            normAssertDetections(self, ref, out, 0.5, scoresDiff, iouDiff)
+
+if __name__ == '__main__':
+    NewOpenCVTests.bootstrap()
diff --git a/modules/python/test/tests_common.py b/modules/python/test/tests_common.py
index e6539ae7f4..a938a8e2cb 100644
--- a/modules/python/test/tests_common.py
+++ b/modules/python/test/tests_common.py
@@ -26,23 +26,25 @@ class NewOpenCVTests(unittest.TestCase):
     # github repository url
     repoUrl = 'https://raw.github.com/opencv/opencv/master'
 
+    def find_file(self, filename, searchPaths=[], required=True):
+        searchPaths = searchPaths if searchPaths else [self.repoPath, self.extraTestDataPath]
+        for path in searchPaths:
+            if path is not None:
+                candidate = path + '/' + filename
+                if os.path.isfile(candidate):
+                    return candidate
+        if required:
+            self.fail('File ' + filename + ' not found')
+        return None
+
+
     def get_sample(self, filename, iscolor = None):
         if iscolor is None:
             iscolor = cv.IMREAD_COLOR
         if not filename in self.image_cache:
-            filedata = None
-            if NewOpenCVTests.repoPath is not None:
-                candidate = NewOpenCVTests.repoPath + '/' + filename
-                if os.path.isfile(candidate):
-                    with open(candidate, 'rb') as f:
-                        filedata = f.read()
-            if NewOpenCVTests.extraTestDataPath is not None:
-                candidate = NewOpenCVTests.extraTestDataPath + '/' + filename
-                if os.path.isfile(candidate):
-                    with open(candidate, 'rb') as f:
-                        filedata = f.read()
-            if filedata is None:
-                return None#filedata = urlopen(NewOpenCVTests.repoUrl + '/' + filename).read()
+            filepath = self.find_file(filename)
+            with open(filepath, 'rb') as f:
+                filedata = f.read()
             self.image_cache[filename] = cv.imdecode(np.fromstring(filedata, dtype=np.uint8), iscolor)
         return self.image_cache[filename]
 
@@ -102,4 +104,4 @@ def isPointInRect(p, rect):
     if rect[0] <= p[0] and rect[1] <=p[1] and p[0] <= rect[2] and p[1] <= rect[3]:
         return True
     else:
-        return False
\ No newline at end of file
+        return False
diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp
index e0198be5f8..ce337ea10f 100644
--- a/modules/videoio/src/cap_ffmpeg_impl.hpp
+++ b/modules/videoio/src/cap_ffmpeg_impl.hpp
@@ -58,6 +58,10 @@
 #  pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #endif
 
+#ifndef CV_UNUSED  // Required for standalone compilation mode (OpenCV defines this in base.hpp)
+#define CV_UNUSED(name) (void)name
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/samples/android/mobilenet-objdetect/src/org/opencv/samples/opencv_mobilenet/MainActivity.java b/samples/android/mobilenet-objdetect/src/org/opencv/samples/opencv_mobilenet/MainActivity.java
index 3b62cc1e1a..44b4ba3d6e 100644
--- a/samples/android/mobilenet-objdetect/src/org/opencv/samples/opencv_mobilenet/MainActivity.java
+++ b/samples/android/mobilenet-objdetect/src/org/opencv/samples/opencv_mobilenet/MainActivity.java
@@ -86,29 +86,13 @@ public class MainActivity extends AppCompatActivity implements CvCameraViewListe
         // Forward image through network.
         Mat blob = Dnn.blobFromImage(frame, IN_SCALE_FACTOR,
                 new Size(IN_WIDTH, IN_HEIGHT),
-                new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), false);
+                new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), /*swapRB*/false, /*crop*/false);
         net.setInput(blob);
         Mat detections = net.forward();
 
         int cols = frame.cols();
         int rows = frame.rows();
 
-        Size cropSize;
-        if ((float)cols / rows > WH_RATIO) {
-            cropSize = new Size(rows * WH_RATIO, rows);
-        } else {
-            cropSize = new Size(cols, cols / WH_RATIO);
-        }
-
-        int y1 = (int)(rows - cropSize.height) / 2;
-        int y2 = (int)(y1 + cropSize.height);
-        int x1 = (int)(cols - cropSize.width) / 2;
-        int x2 = (int)(x1 + cropSize.width);
-        Mat subFrame = frame.submat(y1, y2, x1, x2);
-
-        cols = subFrame.cols();
-        rows = subFrame.rows();
-
         detections = detections.reshape(1, (int)detections.total() / 7);
 
         for (int i = 0; i < detections.rows(); ++i) {
@@ -116,26 +100,24 @@ public class MainActivity extends AppCompatActivity implements CvCameraViewListe
             if (confidence > THRESHOLD) {
                 int classId = (int)detections.get(i, 1)[0];
 
-                int xLeftBottom = (int)(detections.get(i, 3)[0] * cols);
-                int yLeftBottom = (int)(detections.get(i, 4)[0] * rows);
-                int xRightTop   = (int)(detections.get(i, 5)[0] * cols);
-                int yRightTop   = (int)(detections.get(i, 6)[0] * rows);
+                int left   = (int)(detections.get(i, 3)[0] * cols);
+                int top    = (int)(detections.get(i, 4)[0] * rows);
+                int right  = (int)(detections.get(i, 5)[0] * cols);
+                int bottom = (int)(detections.get(i, 6)[0] * rows);
 
                 // Draw rectangle around detected object.
-                Imgproc.rectangle(subFrame, new Point(xLeftBottom, yLeftBottom),
-                        new Point(xRightTop, yRightTop),
-                        new Scalar(0, 255, 0));
+                Imgproc.rectangle(frame, new Point(left, top), new Point(right, bottom),
+                                  new Scalar(0, 255, 0));
                 String label = classNames[classId] + ": " + confidence;
                 int[] baseLine = new int[1];
                 Size labelSize = Imgproc.getTextSize(label, Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, 1, baseLine);
 
                 // Draw background for label.
-                Imgproc.rectangle(subFrame, new Point(xLeftBottom, yLeftBottom - labelSize.height),
-                        new Point(xLeftBottom + labelSize.width, yLeftBottom + baseLine[0]),
-                        new Scalar(255, 255, 255), Imgproc.FILLED);
-
+                Imgproc.rectangle(frame, new Point(left, top - labelSize.height),
+                                  new Point(left + labelSize.width, top + baseLine[0]),
+                                  new Scalar(255, 255, 255), Imgproc.FILLED);
                 // Write class name and confidence.
-                Imgproc.putText(subFrame, label, new Point(xLeftBottom, yLeftBottom),
+                Imgproc.putText(frame, label, new Point(left, top),
                         Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar(0, 0, 0));
             }
         }
diff --git a/samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp b/samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp
new file mode 100755
index 0000000000..345fd060a2
--- /dev/null
+++ b/samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp
@@ -0,0 +1,104 @@
+﻿/**
+* @brief You will learn how to segment an anisotropic image with a single local orientation by a gradient structure tensor (GST)
+* @author Karpushin Vladislav, karpushin@ngs.ru, https://github.com/VladKarpushin
+*/
+
+#include <iostream>
+#include "opencv2/imgproc.hpp"
+#include "opencv2/imgcodecs.hpp"
+
+using namespace cv;
+using namespace std;
+
+void calcGST(const Mat& inputImg, Mat& imgCoherencyOut, Mat& imgOrientationOut, int w);
+
+int main()
+{
+    int W = 52;             // window size is WxW
+    double C_Thr = 0.43;    // threshold for coherency
+    int LowThr = 35;        // threshold1 for orientation, it ranges from 0 to 180
+    int HighThr = 57;       // threshold2 for orientation, it ranges from 0 to 180
+
+    Mat imgIn = imread("input.jpg", IMREAD_GRAYSCALE);
+    if (imgIn.empty()) //check whether the image is loaded or not
+    {
+        cout << "ERROR : Image cannot be loaded..!!" << endl;
+        return -1;
+    }
+
+    //! [main]
+    Mat imgCoherency, imgOrientation;
+    calcGST(imgIn, imgCoherency, imgOrientation, W);
+
+    //! [thresholding]
+    Mat imgCoherencyBin;
+    imgCoherencyBin = imgCoherency > C_Thr;
+    Mat imgOrientationBin;
+    inRange(imgOrientation, Scalar(LowThr), Scalar(HighThr), imgOrientationBin);
+    //! [thresholding]
+
+    //! [combining]
+    Mat imgBin;
+    imgBin = imgCoherencyBin & imgOrientationBin;
+    //! [combining]
+    //! [main]
+
+    normalize(imgCoherency, imgCoherency, 0, 255, NORM_MINMAX);
+    normalize(imgOrientation, imgOrientation, 0, 255, NORM_MINMAX);
+    imwrite("result.jpg", 0.5*(imgIn + imgBin));
+    imwrite("Coherency.jpg", imgCoherency);
+    imwrite("Orientation.jpg", imgOrientation);
+    return 0;
+}
+//! [calcGST]
+void calcGST(const Mat& inputImg, Mat& imgCoherencyOut, Mat& imgOrientationOut, int w)
+{
+    Mat img;
+    inputImg.convertTo(img, CV_64F);
+
+    // GST components calculation (start)
+    // J =  (J11 J12; J12 J22) - GST
+    Mat imgDiffX, imgDiffY, imgDiffXY;
+    Sobel(img, imgDiffX, CV_64F, 1, 0, 3);
+    Sobel(img, imgDiffY, CV_64F, 0, 1, 3);
+    multiply(imgDiffX, imgDiffY, imgDiffXY);
+
+    Mat imgDiffXX, imgDiffYY;
+    multiply(imgDiffX, imgDiffX, imgDiffXX);
+    multiply(imgDiffY, imgDiffY, imgDiffYY);
+
+    Mat J11, J22, J12;      // J11, J22 and J12 are GST components
+    boxFilter(imgDiffXX, J11, CV_64F, Size(w, w));
+    boxFilter(imgDiffYY, J22, CV_64F, Size(w, w));
+    boxFilter(imgDiffXY, J12, CV_64F, Size(w, w));
+    // GST components calculation (stop)
+
+    // eigenvalue calculation (start)
+    // lambda1 = J11 + J22 + sqrt((J11-J22)^2 + 4*J12^2)
+    // lambda2 = J11 + J22 - sqrt((J11-J22)^2 + 4*J12^2)
+    Mat tmp1, tmp2, tmp3, tmp4;
+    tmp1 = J11 + J22;
+    tmp2 = J11 - J22;
+    multiply(tmp2, tmp2, tmp2);
+    multiply(J12, J12, tmp3);
+    sqrt(tmp2 + 4.0 * tmp3, tmp4);
+
+    Mat lambda1, lambda2;
+    lambda1 = tmp1 + tmp4;      // biggest eigenvalue
+    lambda2 = tmp1 - tmp4;      // smallest eigenvalue
+    // eigenvalue calculation (stop)
+
+    // Coherency calculation (start)
+    // Coherency = (lambda1 - lambda2)/(lambda1 + lambda2)) - measure of anisotropism
+    // Coherency is anisotropy degree (consistency of local orientation)
+    divide(lambda1 - lambda2, lambda1 + lambda2, imgCoherencyOut);
+    // Coherency calculation (stop)
+
+    // orientation angle calculation (start)
+    // tan(2*Alpha) = 2*J12/(J22 - J11)
+    // Alpha = 0.5 atan2(2*J12/(J22 - J11))
+    phase(J22 - J11, 2.0*J12, imgOrientationOut, true);
+    imgOrientationOut = 0.5*imgOrientationOut;
+    // orientation angle calculation (stop)
+}
+//! [calcGST]