diff --git a/CMakeLists.txt b/CMakeLists.txt index ba872cd9cd..07a742597c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -271,7 +271,6 @@ OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF (WIN32 AND NOT WINRT) ) OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF (WIN32 AND NOT WINRT) ) OCV_OPTION(WITH_LIBREALSENSE "Include Intel librealsense support" OFF IF (NOT WITH_INTELPERC) ) -OCV_OPTION(WITH_MATLAB "Include Matlab support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT)) OCV_OPTION(WITH_VA "Include VA support" OFF IF (UNIX AND NOT ANDROID) ) OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) ) OCV_OPTION(WITH_MFX "Include Intel Media SDK support" OFF IF ((UNIX AND NOT ANDROID) OR (WIN32 AND NOT WINRT AND NOT MINGW)) ) @@ -694,11 +693,6 @@ if(WITH_DIRECTX) include(cmake/OpenCVDetectDirectX.cmake) endif() -# --- Matlab/Octave --- -if(WITH_MATLAB) - include(cmake/OpenCVFindMatlab.cmake) -endif() - if(WITH_VTK) include(cmake/OpenCVDetectVTK.cmake) endif() @@ -1518,15 +1512,7 @@ if(BUILD_JAVA OR BUILD_opencv_java) status(" Java tests:" BUILD_TESTS AND opencv_test_java_BINARY_DIR THEN YES ELSE NO) endif() -# ========================= matlab ========================= -if(WITH_MATLAB OR MATLAB_FOUND) - status("") - status(" Matlab:" MATLAB_FOUND THEN "YES" ELSE "NO") - if(MATLAB_FOUND) - status(" mex:" MATLAB_MEX_SCRIPT THEN "${MATLAB_MEX_SCRIPT}" ELSE NO) - status(" Compiler/generator:" MEX_WORKS THEN "Working" ELSE "Not working (bindings will not be generated)") - endif() -endif() +ocv_cmake_hook(STATUS_DUMP_EXTRA) # ========================== auxiliary ========================== status("") diff --git a/cmake/OpenCVFindMatlab.cmake b/cmake/OpenCVFindMatlab.cmake deleted file mode 100644 index ffe8857fec..0000000000 --- a/cmake/OpenCVFindMatlab.cmake +++ /dev/null @@ -1,199 +0,0 @@ -# ----- Find Matlab/Octave ----- -# -# OpenCVFindMatlab.cmake attempts to locate the install path of Matlab in order -# to extract the mex headers, libraries and shell scripts. If found -# successfully, the following variables will be defined -# -# MATLAB_FOUND: true/false -# MATLAB_ROOT_DIR: Root of Matlab installation -# MATLAB_BIN: The main Matlab "executable" (shell script) -# MATLAB_MEX_SCRIPT: The mex script used to compile mex files -# MATLAB_INCLUDE_DIRS:Path to "mex.h" -# MATLAB_LIBRARY_DIRS:Path to mex and matrix libraries -# MATLAB_LIBRARIES: The Matlab libs, usually mx, mex, mat -# MATLAB_MEXEXT: The mex library extension. It will be one of: -# mexwin32, mexwin64, mexglx, mexa64, mexmac, -# mexmaci, mexmaci64, mexsol, mexs64 -# MATLAB_ARCH: The installation architecture. It is **usually** -# the MEXEXT with the preceding "mex" removed, -# though it's different for linux distros. -# -# There doesn't appear to be an elegant way to detect all versions of Matlab -# across different platforms. If you know the matlab path and want to avoid -# the search, you can define the path to the Matlab root when invoking cmake: -# -# cmake -DMATLAB_ROOT_DIR='/PATH/TO/ROOT_DIR' .. - - - -# ----- set_library_presuffix ----- -# -# Matlab tends to use some non-standard prefixes and suffixes on its libraries. -# For example, libmx.dll on Windows (Windows does not add prefixes) and -# mkl.dylib on OS X (OS X uses "lib" prefixes). -# On some versions of Windows the .dll suffix also appears to not be checked. -# -# This function modifies the library prefixes and suffixes used by -# find_library when finding Matlab libraries. It does not affect scopes -# outside of this file. -function(set_libarch_prefix_suffix) - if (UNIX AND NOT APPLE) - set(CMAKE_FIND_LIBRARY_PREFIXES "lib" PARENT_SCOPE) - set(CMAKE_FIND_LIBRARY_SUFFIXES ".so" ".a" PARENT_SCOPE) - elseif (APPLE) - set(CMAKE_FIND_LIBRARY_PREFIXES "lib" PARENT_SCOPE) - set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".a" PARENT_SCOPE) - elseif (WIN32) - set(CMAKE_FIND_LIBRARY_PREFIXES "lib" PARENT_SCOPE) - set(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".dll" PARENT_SCOPE) - endif() -endfunction() - - - -# ----- locate_matlab_root ----- -# -# Attempt to find the path to the Matlab installation. If successful, sets -# the absolute path in the variable MATLAB_ROOT_DIR -function(locate_matlab_root) - - # --- UNIX/APPLE --- - if (UNIX) - # possible root locations, in order of likelihood - set(SEARCH_DIRS_ /Applications /usr/local /opt/local /usr /opt) - foreach (DIR_ ${SEARCH_DIRS_}) - file(GLOB MATLAB_ROOT_DIR_ ${DIR_}/MATLAB/R* ${DIR_}/MATLAB_R*) - if (MATLAB_ROOT_DIR_) - # sort in order from highest to lowest - # normally it's in the format MATLAB_R[20XX][A/B] - # TODO: numerical rather than lexicographic sort. However, - # CMake does not support floating-point MATH(EXPR ...) at this time. - list(SORT MATLAB_ROOT_DIR_) - list(REVERSE MATLAB_ROOT_DIR_) - list(GET MATLAB_ROOT_DIR_ 0 MATLAB_ROOT_DIR_) - set(MATLAB_ROOT_DIR ${MATLAB_ROOT_DIR_} PARENT_SCOPE) - return() - endif() - endforeach() - - # --- WINDOWS --- - elseif (WIN32) - # 1. search the path environment variable - find_program(MATLAB_ROOT_DIR_ matlab PATHS ENV PATH) - if (MATLAB_ROOT_DIR_) - # get the root directory from the full path - # /path/to/matlab/rootdir/bin/matlab.exe - get_filename_component(MATLAB_ROOT_DIR_ ${MATLAB_ROOT_DIR_} PATH) - get_filename_component(MATLAB_ROOT_DIR_ ${MATLAB_ROOT_DIR_} PATH) - set(MATLAB_ROOT_DIR ${MATLAB_ROOT_DIR_} PARENT_SCOPE) - return() - endif() - - # 2. search the registry - # determine the available Matlab versions - set(REG_EXTENSION_ "SOFTWARE\\Mathworks\\MATLAB") - set(REG_ROOTS_ "HKEY_LOCAL_MACHINE" "HKEY_CURRENT_USER") - foreach(REG_ROOT_ ${REG_ROOTS_}) - execute_process(COMMAND reg query "${REG_ROOT_}\\${REG_EXTENSION_}" OUTPUT_VARIABLE QUERY_RESPONSE_ ERROR_VARIABLE UNUSED_) - if (QUERY_RESPONSE_) - string(REGEX MATCHALL "[0-9]\\.[0-9]" VERSION_STRINGS_ ${QUERY_RESPONSE_}) - list(APPEND VERSIONS_ ${VERSION_STRINGS_}) - endif() - endforeach() - - # select the highest version - list(APPEND VERSIONS_ "0.0") - list(SORT VERSIONS_) - list(REVERSE VERSIONS_) - list(GET VERSIONS_ 0 VERSION_) - - # request the MATLABROOT from the registry - foreach(REG_ROOT_ ${REG_ROOTS_}) - get_filename_component(QUERY_RESPONSE_ [${REG_ROOT_}\\${REG_EXTENSION_}\\${VERSION_};MATLABROOT] ABSOLUTE) - if (NOT ${QUERY_RESPONSE_} MATCHES "registry$") - set(MATLAB_ROOT_DIR ${QUERY_RESPONSE_} PARENT_SCOPE) - return() - endif() - endforeach() - endif() -endfunction() - - - -# ----- locate_matlab_components ----- -# -# Given a directory MATLAB_ROOT_DIR, attempt to find the Matlab components -# (include directory and libraries) under the root. If everything is found, -# sets the variable MATLAB_FOUND to TRUE -function(locate_matlab_components MATLAB_ROOT_DIR) - # get the mex extension - find_file(MATLAB_MEXEXT_SCRIPT_ NAMES mexext mexext.bat PATHS ${MATLAB_ROOT_DIR}/bin NO_DEFAULT_PATH) - execute_process(COMMAND ${MATLAB_MEXEXT_SCRIPT_} - OUTPUT_VARIABLE MATLAB_MEXEXT_ - OUTPUT_STRIP_TRAILING_WHITESPACE) - if (NOT MATLAB_MEXEXT_) - return() - endif() - - # map the mexext to an architecture extension - set(ARCHITECTURES_ "maci64" "maci" "glnxa64" "glnx64" "sol64" "sola64" "win32" "win64" ) - foreach(ARCHITECTURE_ ${ARCHITECTURES_}) - if(EXISTS ${MATLAB_ROOT_DIR}/bin/${ARCHITECTURE_}) - set(MATLAB_ARCH_ ${ARCHITECTURE_}) - break() - endif() - endforeach() - - # get the path to the libraries - set(MATLAB_LIBRARY_DIRS_ ${MATLAB_ROOT_DIR}/bin/${MATLAB_ARCH_}) - - # get the libraries - set_libarch_prefix_suffix() - find_library(MATLAB_LIB_MX_ mx PATHS ${MATLAB_LIBRARY_DIRS_} NO_DEFAULT_PATH) - find_library(MATLAB_LIB_MEX_ mex PATHS ${MATLAB_LIBRARY_DIRS_} NO_DEFAULT_PATH) - find_library(MATLAB_LIB_MAT_ mat PATHS ${MATLAB_LIBRARY_DIRS_} NO_DEFAULT_PATH) - set(MATLAB_LIBRARIES_ ${MATLAB_LIB_MX_} ${MATLAB_LIB_MEX_} ${MATLAB_LIB_MAT_}) - - # get the include path - find_path(MATLAB_INCLUDE_DIRS_ mex.h ${MATLAB_ROOT_DIR}/extern/include) - - # get the mex shell script - find_program(MATLAB_MEX_SCRIPT_ NAMES mex mex.bat PATHS ${MATLAB_ROOT_DIR}/bin NO_DEFAULT_PATH) - - # get the Matlab executable - find_program(MATLAB_BIN_ NAMES matlab PATHS ${MATLAB_ROOT_DIR}/bin NO_DEFAULT_PATH) - - # export into parent scope - if (MATLAB_MEX_SCRIPT_ AND MATLAB_LIBRARIES_ AND MATLAB_INCLUDE_DIRS_) - set(MATLAB_BIN ${MATLAB_BIN_} PARENT_SCOPE) - set(MATLAB_MEX_SCRIPT ${MATLAB_MEX_SCRIPT_} PARENT_SCOPE) - set(MATLAB_INCLUDE_DIRS ${MATLAB_INCLUDE_DIRS_} PARENT_SCOPE) - set(MATLAB_LIBRARIES ${MATLAB_LIBRARIES_} PARENT_SCOPE) - set(MATLAB_LIBRARY_DIRS ${MATLAB_LIBRARY_DIRS_} PARENT_SCOPE) - set(MATLAB_MEXEXT ${MATLAB_MEXEXT_} PARENT_SCOPE) - set(MATLAB_ARCH ${MATLAB_ARCH_} PARENT_SCOPE) - endif() -endfunction() - - - -# ---------------------------------------------------------------------------- -# FIND MATLAB COMPONENTS -# ---------------------------------------------------------------------------- -if (NOT MATLAB_FOUND) - - # attempt to find the Matlab root folder - if (NOT MATLAB_ROOT_DIR) - locate_matlab_root() - endif() - - # given the matlab root folder, find the library locations - if (MATLAB_ROOT_DIR) - locate_matlab_components(${MATLAB_ROOT_DIR}) - endif() - find_package_handle_standard_args(Matlab DEFAULT_MSG - MATLAB_MEX_SCRIPT MATLAB_INCLUDE_DIRS - MATLAB_ROOT_DIR MATLAB_LIBRARIES - MATLAB_LIBRARY_DIRS MATLAB_MEXEXT - MATLAB_ARCH MATLAB_BIN) -endif() diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 7c81b0c6f7..6a60648359 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -296,28 +296,29 @@ endfunction() # Calls 'add_subdirectory' for each location. # Note: both input lists should have same length. # Usage: _add_modules_1( ) -function(_add_modules_1 paths names) - list(LENGTH ${paths} len) - if(len EQUAL 0) - return() - endif() - list(LENGTH ${names} len_verify) - if(NOT len EQUAL len_verify) - message(FATAL_ERROR "Bad configuration! ${len} != ${len_verify}") +macro(_add_modules_1 paths names) + ocv_debug_message("_add_modules_1(paths=${paths}, names=${names}, ... " ${ARGN} ")") + list(LENGTH ${paths} __len) + if(NOT __len EQUAL 0) + list(LENGTH ${names} __len_verify) + if(NOT __len EQUAL __len_verify) + message(FATAL_ERROR "Bad configuration! ${__len} != ${__len_verify}") + endif() + math(EXPR __len "${__len} - 1") + foreach(i RANGE ${__len}) + list(GET ${paths} ${i} __path) + list(GET ${names} ${i} __name) + #message(STATUS "First pass: ${__name} => ${__path}") + include("${__path}/cmake/init.cmake" OPTIONAL) + add_subdirectory("${__path}" "${CMAKE_CURRENT_BINARY_DIR}/.firstpass/${__name}") + endforeach() endif() - math(EXPR len "${len} - 1") - foreach(i RANGE ${len}) - list(GET ${paths} ${i} path) - list(GET ${names} ${i} name) - #message(STATUS "First pass: ${name} => ${path}") - include("${path}/cmake/init.cmake" OPTIONAL) - add_subdirectory("${path}" "${CMAKE_CURRENT_BINARY_DIR}/.firstpass/${name}") - endforeach() -endfunction() +endmacro() # Calls 'add_subdirectory' for each module name. # Usage: _add_modules_2([ ...]) -function(_add_modules_2) +macro(_add_modules_2) + ocv_debug_message("_add_modules_2(" ${ARGN} ")") foreach(m ${ARGN}) set(the_module "${m}") ocv_cmake_hook(PRE_MODULES_CREATE_${the_module}) @@ -333,7 +334,8 @@ function(_add_modules_2) endif() ocv_cmake_hook(POST_MODULES_CREATE_${the_module}) endforeach() -endfunction() + unset(the_module) +endmacro() # Check if list of input items is unique. # Usage: _assert_uniqueness( [ ...]) diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index 60c20192dc..e0c740caf9 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -121,8 +121,10 @@ macro(ocv_assert) endmacro() macro(ocv_debug_message) -# string(REPLACE ";" " " __msg "${ARGN}") -# message(STATUS "${__msg}") + if(OPENCV_CMAKE_DEBUG_MESSAGES) + string(REPLACE ";" " " __msg "${ARGN}") + message(STATUS "${__msg}") + endif() endmacro() macro(ocv_check_environment_variables) diff --git a/doc/opencv.bib b/doc/opencv.bib index e059096dc4..5c0e475a27 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -1035,3 +1035,37 @@ publisher = {BMVA Press}, author = {Alexander Duda and Udo Frese}, } + +@book{jahne2000computer, + title={Computer vision and applications: a guide for students and practitioners}, + author={Jahne, Bernd}, + year={2000}, + publisher={Elsevier} +} + +@book{bigun2006vision, + title={Vision with direction}, + author={Bigun, Josef}, + year={2006}, + publisher={Springer} +} + +@inproceedings{van1995estimators, + title={Estimators for orientation and anisotropy in digitized images}, + author={Van Vliet, Lucas J and Verbeek, Piet W}, + booktitle={ASCI}, + volume={95}, + pages={16--18}, + year={1995} +} + +@article{yang1996structure, + title={Structure adaptive anisotropic image filtering}, + author={Yang, Guang-Zhong and Burger, Peter and Firmin, David N and Underwood, SR}, + journal={Image and Vision Computing}, + volume={14}, + number={2}, + pages={135--145}, + year={1996}, + publisher={Elsevier} +} diff --git a/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown b/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown index b0f92d72c6..1d0ebb3967 100644 --- a/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown +++ b/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown @@ -153,15 +153,15 @@ padding etc. This function takes following arguments: - **borderType** - Flag defining what kind of border to be added. It can be following types: - **cv.BORDER_CONSTANT** - Adds a constant colored border. The value should be given - as next argument. - - **cv.BORDER_REFLECT** - Border will be mirror reflection of the border elements, - like this : *fedcba|abcdefgh|hgfedcb* - - **cv.BORDER_REFLECT_101** or **cv.BORDER_DEFAULT** - Same as above, but with a - slight change, like this : *gfedcb|abcdefgh|gfedcba* - - **cv.BORDER_REPLICATE** - Last element is replicated throughout, like this: - *aaaaaa|abcdefgh|hhhhhhh* - - **cv.BORDER_WRAP** - Can't explain, it will look like this : - *cdefgh|abcdefgh|abcdefg* + as next argument. + - **cv.BORDER_REFLECT** - Border will be mirror reflection of the border elements, + like this : *fedcba|abcdefgh|hgfedcb* + - **cv.BORDER_REFLECT_101** or **cv.BORDER_DEFAULT** - Same as above, but with a + slight change, like this : *gfedcb|abcdefgh|gfedcba* + - **cv.BORDER_REPLICATE** - Last element is replicated throughout, like this: + *aaaaaa|abcdefgh|hhhhhhh* + - **cv.BORDER_WRAP** - Can't explain, it will look like this : + *cdefgh|abcdefgh|abcdefg* - **value** - Color of border if border type is cv.BORDER_CONSTANT diff --git a/doc/py_tutorials/py_gui/py_trackbar/py_trackbar.markdown b/doc/py_tutorials/py_gui/py_trackbar/py_trackbar.markdown index e5e9306d27..d6af059903 100644 --- a/doc/py_tutorials/py_gui/py_trackbar/py_trackbar.markdown +++ b/doc/py_tutorials/py_gui/py_trackbar/py_trackbar.markdown @@ -37,6 +37,7 @@ cv.namedWindow('image') # create trackbars for color change cv.createTrackbar('R','image',0,255,nothing) + cv.createTrackbar('G','image',0,255,nothing) cv.createTrackbar('B','image',0,255,nothing) diff --git a/doc/tutorials/imgproc/anisotropic_image_segmentation/anisotropic_image_segmentation.markdown b/doc/tutorials/imgproc/anisotropic_image_segmentation/anisotropic_image_segmentation.markdown new file mode 100755 index 0000000000..16df8eedd2 --- /dev/null +++ b/doc/tutorials/imgproc/anisotropic_image_segmentation/anisotropic_image_segmentation.markdown @@ -0,0 +1,91 @@ +Anisotropic image segmentation by a gradient structure tensor {#tutorial_anisotropic_image_segmentation_by_a_gst} +========================== + +Goal +---- + +In this tutorial you will learn: + +- what the gradient structure tensor is +- how to estimate orientation and coherency of an anisotropic image by a gradient structure tensor +- how to segment an anisotropic image with a single local orientation by a gradient structure tensor + +Theory +------ + +@note The explanation is based on the books @cite jahne2000computer, @cite bigun2006vision and @cite van1995estimators. Good physical explanation of a gradient structure tensor is given in @cite yang1996structure. Also, you can refer to a wikipedia page [Structure tensor]. +@note A anisotropic image on this page is a real world image. + +### What is the gradient structure tensor? + +In mathematics, the gradient structure tensor (also referred to as the second-moment matrix, the second order moment tensor, the inertia tensor, etc.) is a matrix derived from the gradient of a function. It summarizes the predominant directions of the gradient in a specified neighborhood of a point, and the degree to which those directions are coherent (coherency). The gradient structure tensor is widely used in image processing and computer vision for 2D/3D image segmentation, motion detection, adaptive filtration, local image features detection, etc. + +Important features of anisotropic images include orientation and coherency of a local anisotropy. In this paper we will show how to estimate orientation and coherency, and how to segment an anisotropic image with a single local orientation by a gradient structure tensor. + +The gradient structure tensor of an image is a 2x2 symmetric matrix. Eigenvectors of the gradient structure tensor indicate local orientation, whereas eigenvalues give coherency (a measure of anisotropism). + +The gradient structure tensor \f$J\f$ of an image \f$Z\f$ can be written as: + +\f[J = \begin{bmatrix} +J_{11} & J_{12} \\ +J_{12} & J_{22} +\end{bmatrix}\f] + +where \f$J_{11} = M[Z_{x}^{2}]\f$, \f$J_{22} = M[Z_{y}^{2}]\f$, \f$J_{12} = M[Z_{x}Z_{y}]\f$ - components of the tensor, \f$M[]\f$ is a symbol of mathematical expectation (we can consider this operation as averaging in a window w), \f$Z_{x}\f$ and \f$Z_{y}\f$ are partial derivatives of an image \f$Z\f$ with respect to \f$x\f$ and \f$y\f$. + +The eigenvalues of the tensor can be found in the below formula: +\f[\lambda_{1,2} = J_{11} + J_{22} \pm \sqrt{(J_{11} - J_{22})^{2} + 4J_{12}^{2}}\f] +where \f$\lambda_1\f$ - largest eigenvalue, \f$\lambda_2\f$ - smallest eigenvalue. + +### How to estimate orientation and coherency of an anisotropic image by gradient structure tensor? + +The orientation of an anisotropic image: +\f[\alpha = 0.5arctg\frac{2J_{12}}{J_{22} - J_{11}}\f] + +Coherency: +\f[C = \frac{\lambda_1 - \lambda_2}{\lambda_1 + \lambda_2}\f] + +The coherency ranges from 0 to 1. For ideal local orientation (\f$\lambda_2\f$ = 0, \f$\lambda_1\f$ > 0) it is one, for an isotropic gray value structure (\f$\lambda_1\f$ = \f$\lambda_2\f$ > 0) it is zero. + +Source code +----------- + +You can find source code in the `samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp` of the OpenCV source code library. + +@include cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp + +Explanation +----------- +An anisotropic image segmentation algorithm consists of a gradient structure tensor calculation, an orientation calculation, a coherency calculation and an orientation and coherency thresholding: +@snippet samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp main + +A function calcGST() calculates orientation and coherency by using a gradient structure tensor. An input parameter w defines a window size: +@snippet samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp calcGST + +The below code applies a thresholds LowThr and HighThr to image orientation and a threshold C_Thr to image coherency calculated by the previous function. LowThr and HighThr define orientation range: +@snippet samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp thresholding + +And finally we combine thresholding results: +@snippet samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp combining + +Result +------ + +Below you can see the real anisotropic image with single direction: +![Anisotropic image with the single direction](images/gst_input.jpg) + +Below you can see the orientation and coherency of the anisotropic image: +![Orientation](images/gst_orientation.jpg) +![Coherency](images/gst_coherency.jpg) + +Below you can see the segmentation result: +![Segmentation result](images/gst_result.jpg) + +The result has been computed with w = 52, C_Thr = 0.43, LowThr = 35, HighThr = 57. We can see that the algorithm selected only the areas with one single direction. + +References +------ +- [Structure tensor] - structure tensor description on the wikipedia + + +[Structure tensor]: https://en.wikipedia.org/wiki/Structure_tensor diff --git a/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_coherency.jpg b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_coherency.jpg new file mode 100755 index 0000000000..87d0881cfc Binary files /dev/null and b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_coherency.jpg differ diff --git a/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_input.jpg b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_input.jpg new file mode 100755 index 0000000000..5fb3dfe830 Binary files /dev/null and b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_input.jpg differ diff --git a/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_orientation.jpg b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_orientation.jpg new file mode 100755 index 0000000000..976fb24c90 Binary files /dev/null and b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_orientation.jpg differ diff --git a/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_result.jpg b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_result.jpg new file mode 100755 index 0000000000..7a1e7cd672 Binary files /dev/null and b/doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_result.jpg differ diff --git a/doc/tutorials/imgproc/table_of_content_imgproc.markdown b/doc/tutorials/imgproc/table_of_content_imgproc.markdown index bea1e1b9ac..badc30d095 100644 --- a/doc/tutorials/imgproc/table_of_content_imgproc.markdown +++ b/doc/tutorials/imgproc/table_of_content_imgproc.markdown @@ -330,3 +330,13 @@ In this section you will learn about the image processing (manipulation) functio *Author:* Karpushin Vladislav You will learn how to recover an image with motion blur distortion using a Wiener filter. + +- @subpage tutorial_anisotropic_image_segmentation_by_a_gst + + *Languages:* C++ + + *Compatibility:* \> OpenCV 2.0 + + *Author:* Karpushin Vladislav + + You will learn how to segment an anisotropic image with a single local orientation by a gradient structure tensor. diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 321c54b5c3..38264cc58f 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -238,6 +238,14 @@ void Mat::copyTo( OutputArray _dst ) const { CV_INSTRUMENT_REGION(); +#ifdef HAVE_CUDA + if (_dst.isGpuMat()) + { + _dst.getGpuMat().upload(*this); + return; + } +#endif + int dtype = _dst.type(); if( _dst.fixedType() && dtype != type() ) { diff --git a/modules/core/src/matrix_wrap.cpp b/modules/core/src/matrix_wrap.cpp index b5b4514ada..e64d097aad 100644 --- a/modules/core/src/matrix_wrap.cpp +++ b/modules/core/src/matrix_wrap.cpp @@ -1146,6 +1146,10 @@ void _InputArray::copyTo(const _OutputArray& arr) const } else if( k == UMAT ) ((UMat*)obj)->copyTo(arr); +#ifdef HAVE_CUDA + else if (k == CUDA_GPU_MAT) + ((cuda::GpuMat*)obj)->copyTo(arr); +#endif else CV_Error(Error::StsNotImplemented, ""); } @@ -1163,6 +1167,10 @@ void _InputArray::copyTo(const _OutputArray& arr, const _InputArray & mask) cons } else if( k == UMAT ) ((UMat*)obj)->copyTo(arr, mask); +#ifdef HAVE_CUDA + else if (k == CUDA_GPU_MAT) + ((cuda::GpuMat*)obj)->copyTo(arr, mask); +#endif else CV_Error(Error::StsNotImplemented, ""); } diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index 248b679379..27d587b186 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -874,6 +874,14 @@ void UMat::copyTo(OutputArray _dst) const { CV_INSTRUMENT_REGION(); +#ifdef HAVE_CUDA + if (_dst.isGpuMat()) + { + _dst.getGpuMat().upload(*this); + return; + } +#endif + int dtype = _dst.type(); if( _dst.fixedType() && dtype != type() ) { diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 52416731ff..1cb7c467f9 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -10,7 +10,7 @@ set(the_description "Deep neural network module. It allows to load models from d ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX) -ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java js) +ocv_add_module(dnn opencv_core opencv_imgproc WRAP python java js) ocv_option(OPENCV_DNN_OPENCL "Build with OpenCL support" HAVE_OPENCL AND NOT APPLE) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index cd4cca4c28..d77dd181dc 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -234,7 +234,9 @@ CV__DNN_INLINE_NS_BEGIN { public: int type; - Size kernel, stride, pad; + Size kernel, stride; + int pad_l, pad_t, pad_r, pad_b; + CV_DEPRECATED Size pad; bool globalPooling; bool computeMaxIdx; String padMode; diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index bf855011e0..b2a68b15d5 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -836,7 +836,7 @@ CV__DNN_INLINE_NS_BEGIN * @returns 4-dimensional Mat with NCHW dimensions order. */ CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(), - const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true, + const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, int ddepth=CV_32F); /** @brief Creates 4-dimensional blob from image. @@ -845,7 +845,7 @@ CV__DNN_INLINE_NS_BEGIN */ CV_EXPORTS void blobFromImage(InputArray image, OutputArray blob, double scalefactor=1.0, const Size& size = Size(), const Scalar& mean = Scalar(), - bool swapRB=true, bool crop=true, int ddepth=CV_32F); + bool swapRB=false, bool crop=false, int ddepth=CV_32F); /** @brief Creates 4-dimensional blob from series of images. Optionally resizes and @@ -866,7 +866,7 @@ CV__DNN_INLINE_NS_BEGIN * @returns 4-dimensional Mat with NCHW dimensions order. */ CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0, - Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true, + Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, int ddepth=CV_32F); /** @brief Creates 4-dimensional blob from series of images. @@ -875,7 +875,7 @@ CV__DNN_INLINE_NS_BEGIN */ CV_EXPORTS void blobFromImages(InputArrayOfArrays images, OutputArray blob, double scalefactor=1.0, Size size = Size(), - const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true, + const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, int ddepth=CV_32F); /** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure diff --git a/modules/dnn/include/opencv2/dnn/version.hpp b/modules/dnn/include/opencv2/dnn/version.hpp index 6d1568cb45..ff9faa0602 100644 --- a/modules/dnn/include/opencv2/dnn/version.hpp +++ b/modules/dnn/include/opencv2/dnn/version.hpp @@ -6,7 +6,7 @@ #define OPENCV_DNN_VERSION_HPP /// Use with major OpenCV version only. -#define OPENCV_DNN_API_VERSION 20180903 +#define OPENCV_DNN_API_VERSION 20180917 #if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_INLINE_NS #define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 40719f3764..a948c6ef9d 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -64,10 +64,17 @@ public: BaseConvolutionLayerImpl(const LayerParams ¶ms) { setParamsFrom(params); - getConvolutionKernelParams(params, kernel.height, kernel.width, pad.height, - pad.width, stride.height, stride.width, dilation.height, + int pad_t = 0, pad_l = 0, pad_r = 0, pad_b = 0; + getConvolutionKernelParams(params, kernel.height, kernel.width, pad_t, + pad_l, pad_b, pad_r, stride.height, stride.width, dilation.height, dilation.width, padMode); + if (pad_t != pad_b || pad_l != pad_r) + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); + + pad.width = pad_l; + pad.height = pad_t; + numOutput = params.get("num_output"); int ngroups = params.get("group", 1); @@ -100,8 +107,18 @@ public: } Size outSize = Size(outputs[0].size[3], outputs[0].size[2]); + + int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width; + getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize, - kernel, stride, padMode, dilation, pad); + kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r); + + + if (pad_t != pad_b || pad_l != pad_r) + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); + + pad.width = pad_l; + pad.height = pad_t; } bool hasBias() const @@ -1156,9 +1173,17 @@ public: std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); + + int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width; getConvPoolPaddings(Size(outputs[0].size[3], outputs[0].size[2]), Size(inputs[0].size[3], inputs[0].size[2]), - kernel, stride, padMode, dilation, pad); + kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r); + + if (pad_t != pad_b || pad_l != pad_r) + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); + + pad.width = pad_l; + pad.height = pad_t; } class MatMulInvoker : public ParallelLoopBody diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index bf5834c864..2dbb12109d 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -118,9 +118,19 @@ void getKernelSize(const LayerParams ¶ms, int &kernelH, int &kernelW) CV_Assert(kernelH > 0 && kernelW > 0); } -void getStrideAndPadding(const LayerParams ¶ms, int &padH, int &padW, int &strideH, int &strideW, cv::String& padMode) +void getStrideAndPadding(const LayerParams ¶ms, int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode) { - util::getParameter(params, "pad", "pad", padH, padW, true, 0); + if (params.has("pad_l") && params.has("pad_t") && params.has("pad_r") && params.has("pad_b")) { + padT = params.get("pad_t"); + padL = params.get("pad_l"); + padB = params.get("pad_b"); + padR = params.get("pad_r"); + } + else { + util::getParameter(params, "pad", "pad", padT, padL, true, 0); + padB = padT; + padR = padL; + } util::getParameter(params, "stride", "stride", strideH, strideW, true, 1); padMode = ""; @@ -129,15 +139,15 @@ void getStrideAndPadding(const LayerParams ¶ms, int &padH, int &padW, int &s padMode = params.get("pad_mode"); } - CV_Assert(padH >= 0 && padW >= 0 && strideH > 0 && strideW > 0); + CV_Assert(padT >= 0 && padL >= 0 && padB >= 0 && padR >= 0 && strideH > 0 && strideW > 0); } } void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, bool &globalPooling, - int &padH, int &padW, int &strideH, int &strideW, cv::String &padMode) + int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String &padMode) { - util::getStrideAndPadding(params, padH, padW, strideH, strideW, padMode); + util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode); globalPooling = params.has("global_pooling") && params.get("global_pooling"); @@ -148,9 +158,9 @@ void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernel { CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified"); } - if(padH != 0 || padW != 0 || strideH != 1 || strideW != 1) + if(padT != 0 || padL != 0 || padB != 0 || padR != 0 || strideH != 1 || strideW != 1) { - CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pad_h and pad_w must be = 0, and stride_h and stride_w must be = 1"); + CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0, and stride_h and stride_w must be = 1"); } } else @@ -159,12 +169,11 @@ void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernel } } -void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, int &padH, int &padW, +void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String &padMode) { util::getKernelSize(params, kernelH, kernelW); - util::getStrideAndPadding(params, padH, padW, strideH, strideW, padMode); - + util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode); util::getParameter(params, "dilation", "dilation", dilationH, dilationW, true, 1); CV_Assert(dilationH > 0 && dilationW > 0); @@ -201,11 +210,11 @@ void getConvPoolOutParams(const Size& inp, const Size &kernel, void getConvPoolPaddings(const Size& inp, const Size& out, const Size &kernel, const Size &stride, - const String &padMode, const Size &dilation, Size &pad) + const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR) { if (padMode == "VALID") { - pad = cv::Size(0,0); + padT = padL = padB = padR = 0; } else if (padMode == "SAME") { @@ -213,7 +222,8 @@ void getConvPoolPaddings(const Size& inp, const Size& out, int Pw = std::max(0, (out.width - 1) * stride.width + (dilation.width * (kernel.width - 1) + 1) - inp.width); // For odd values of total padding, add more padding at the 'right' // side of the given dimension. - pad = cv::Size(Pw / 2, Ph / 2); + padT= padB = Ph / 2; + padL = padR = Pw / 2; } } diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp index 4bb4c317e4..7fce183d6e 100644 --- a/modules/dnn/src/layers/layers_common.hpp +++ b/modules/dnn/src/layers/layers_common.hpp @@ -60,19 +60,20 @@ namespace cv namespace dnn { -void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, int &padH, int &padW, +void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String& padMode); void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, bool &globalPooling, - int &padH, int &padW, int &strideH, int &strideW, cv::String& padMode); + int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode); void getConvPoolOutParams(const Size& inp, const Size &kernel, const Size &stride, const String &padMode, const Size &dilation, Size& out); + void getConvPoolPaddings(const Size& inp, const Size& out, const Size &kernel, const Size &stride, - const String &padMode, const Size &dilation, Size &pad); + const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR); } } diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index a7464217c7..0b4b0ae850 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -85,8 +85,12 @@ public: type = STOCHASTIC; else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); + getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, - pad.height, pad.width, stride.height, stride.width, padMode); + pad_t, pad_l, pad_b, pad_r, stride.height, stride.width, padMode); + + pad.width = pad_l; + pad.height = pad_t; } else if (params.has("pooled_w") || params.has("pooled_h")) { @@ -130,7 +134,9 @@ public: kernel = inp; } - getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad); + getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad_t, pad_l, pad_b, pad_r); + pad.width = pad_l; + pad.height = pad_t; #ifdef HAVE_OPENCL poolOp.release(); @@ -149,7 +155,7 @@ public: else return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE && haveHalide() && - (type == MAX || type == AVE && !pad.width && !pad.height); + (type == MAX || type == AVE && !pad_t && !pad_l && !pad_b && !pad_r); } #ifdef HAVE_OPENCL @@ -169,7 +175,10 @@ public: config.in_shape = shape(inputs[0]); config.out_shape = shape(outputs[0]); config.kernel = kernel; - config.pad = pad; + config.pad_l = pad_l; + config.pad_t = pad_t; + config.pad_r = pad_r; + config.pad_b = pad_b; config.stride = stride; config.channels = inputs[0].size[1]; config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX : @@ -193,7 +202,6 @@ public: if (!poolOp->Forward(inpMat, outMat, maskMat)) return false; } - return true; } #endif @@ -264,8 +272,10 @@ public: poolLayer->_kernel_y = kernel.height; poolLayer->_stride_x = stride.width; poolLayer->_stride_y = stride.height; - poolLayer->_padding_x = pad.width; - poolLayer->_padding_y = pad.height; + poolLayer->_padding_x = pad_l; + poolLayer->_padding_y = pad_t; + poolLayer->params["pad-r"] = format("%d", pad_r); + poolLayer->params["pad-b"] = format("%d", pad_b); poolLayer->_exclude_pad = type == AVE && padMode == "SAME"; poolLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor"; poolLayer->_type = type == MAX ? InferenceEngine::PoolingLayer::PoolType::MAX : @@ -296,12 +306,14 @@ public: return Ptr(); } + class PoolingInvoker : public ParallelLoopBody { public: const Mat* src, *rois; Mat *dst, *mask; - Size kernel, stride, pad; + Size kernel, stride; + int pad_l, pad_t, pad_r, pad_b; bool avePoolPaddedArea; int nstripes; bool computeMaxIdx; @@ -313,7 +325,7 @@ public: computeMaxIdx(0), poolingType(MAX), spatialScale(0) {} static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel, - Size stride, Size pad, bool avePoolPaddedArea, int poolingType, float spatialScale, + Size stride, int pad_l, int pad_t, int pad_r, int pad_b, bool avePoolPaddedArea, int poolingType, float spatialScale, bool computeMaxIdx, int nstripes) { CV_Assert_N( @@ -332,7 +344,10 @@ public: p.mask = &mask; p.kernel = kernel; p.stride = stride; - p.pad = pad; + p.pad_l = pad_l; + p.pad_t = pad_t; + p.pad_r = pad_r; + p.pad_b = pad_b; p.avePoolPaddedArea = avePoolPaddedArea; p.nstripes = nstripes; p.computeMaxIdx = computeMaxIdx; @@ -359,7 +374,6 @@ public: size_t stripeStart = r.start*stripeSize; size_t stripeEnd = std::min(r.end*stripeSize, total); int kernel_w = kernel.width, kernel_h = kernel.height; - int pad_w = pad.width, pad_h = pad.height; int stride_w = stride.width, stride_h = stride.height; bool compMaxIdx = computeMaxIdx; @@ -411,8 +425,8 @@ public: } else { - ystart = y0 * stride_h - pad_h; - yend = min(ystart + kernel_h, inp_height + pad_h); + ystart = y0 * stride_h - pad_t; + yend = min(ystart + kernel_h, inp_height + pad_b); srcData = src->ptr(n, c); } int ydelta = yend - ystart; @@ -428,7 +442,7 @@ public: if( poolingType == MAX) for( ; x0 < x1; x0++ ) { - int xstart = x0 * stride_w - pad_w; + int xstart = x0 * stride_w - pad_l; int xend = min(xstart + kernel_w, inp_width); xstart = max(xstart, 0); if (xstart >= xend || ystart >= yend) @@ -439,7 +453,7 @@ public: continue; } #if CV_SIMD128 - if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_w + kernel_w < inp_width ) + if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width ) { if( compMaxIdx ) { @@ -578,15 +592,15 @@ public: { for( ; x0 < x1; x0++ ) { - int xstart = x0 * stride_w - pad_w; - int xend = min(xstart + kernel_w, inp_width + pad_w); + int xstart = x0 * stride_w - pad_l; + int xend = min(xstart + kernel_w, inp_width + pad_r); int xdelta = xend - xstart; xstart = max(xstart, 0); xend = min(xend, inp_width); float inv_kernel_area = avePoolPaddedArea ? xdelta * ydelta : ((yend - ystart) * (xend - xstart)); inv_kernel_area = 1.0 / inv_kernel_area; #if CV_SIMD128 - if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_w + kernel_w < inp_width ) + if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width ) { v_float32x4 sum_val0 = v_setzero_f32(), sum_val1 = v_setzero_f32(); v_float32x4 ikarea = v_setall_f32(inv_kernel_area); @@ -695,21 +709,21 @@ public: { const int nstripes = getNumThreads(); Mat rois; - PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes); + PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes); } void avePooling(Mat &src, Mat &dst) { const int nstripes = getNumThreads(); Mat rois, mask; - PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes); + PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes); } void roiPooling(const Mat &src, const Mat &rois, Mat &dst) { const int nstripes = getNumThreads(); Mat mask; - PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes); + PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes); } virtual Ptr initMaxPoolingHalide(const std::vector > &inputs) @@ -723,10 +737,10 @@ public: Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name)); Halide::RDom r(0, kernel.width, 0, kernel.height); Halide::Expr kx, ky; - if (pad.width || pad.height) + if(pad_l || pad_t) { - kx = clamp(x * stride.width + r.x - pad.width, 0, inWidth - 1); - ky = clamp(y * stride.height + r.y - pad.height, 0, inHeight - 1); + kx = clamp(x * stride.width + r.x - pad_l, 0, inWidth - 1); + ky = clamp(y * stride.height + r.y - pad_t, 0, inHeight - 1); } else { @@ -739,11 +753,11 @@ public: // Compute offset from argmax in range [0, kernel_size). Halide::Expr max_index; - if (pad.width || pad.height) + if(pad_l || pad_t) { - max_index = clamp(y * stride.height + res[1] - pad.height, + max_index = clamp(y * stride.height + res[1] - pad_t, 0, inHeight - 1) * inWidth + - clamp(x * stride.width + res[0] - pad.width, + clamp(x * stride.width + res[0] - pad_l, 0, inWidth - 1); } else @@ -852,21 +866,21 @@ public: } else if (padMode.empty()) { - float height = (float)(in.height + 2 * pad.height - kernel.height) / stride.height; - float width = (float)(in.width + 2 * pad.width - kernel.width) / stride.width; + float height = (float)(in.height + pad_t + pad_b - kernel.height) / stride.height; + float width = (float)(in.width + pad_l + pad_r - kernel.width) / stride.width; out.height = 1 + (ceilMode ? ceil(height) : floor(height)); out.width = 1 + (ceilMode ? ceil(width) : floor(width)); - if (pad.height || pad.width) + if (pad_r || pad_b) { // If we have padding, ensure that the last pooling starts strictly // inside the image (instead of at the padding); otherwise clip the last. - if ((out.height - 1) * stride.height >= in.height + pad.height) + if ((out.height - 1) * stride.height >= in.height + pad_b) --out.height; - if ((out.width - 1) * stride.width >= in.width + pad.width) + if ((out.width - 1) * stride.width >= in.width + pad_r) --out.width; - CV_Assert((out.height - 1) * stride.height < in.height + pad.height); - CV_Assert((out.width - 1) * stride.width < in.width + pad.width); + CV_Assert((out.height - 1) * stride.height < in.height + pad_b); + CV_Assert((out.width - 1) * stride.width < in.width + pad_r); } } else @@ -888,6 +902,7 @@ public: dims[1] = psRoiOutChannels; } outputs.assign(type == MAX ? 2 : 1, shape(dims, 4)); + return false; } diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp index e0ca5ca98c..eda2e837c0 100644 --- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp +++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp @@ -345,7 +345,7 @@ struct OCL4DNNPoolConfig { OCL4DNNPoolConfig() : kernel(1, 1), - pad(0, 0), + pad_l(0), pad_t(0), pad_r(0), pad_b(0), stride(1, 1), dilation(1, 1), channels(0), @@ -358,7 +358,7 @@ struct OCL4DNNPoolConfig MatShape in_shape; MatShape out_shape; Size kernel; - Size pad; + int pad_l, pad_t, pad_r, pad_b; Size stride; Size dilation; @@ -381,7 +381,6 @@ class OCL4DNNPool UMat& top_mask); private: // Pooling parameters - std::vector pad_; std::vector stride_; std::vector kernel_shape_; std::vector im_in_shape_; @@ -394,8 +393,10 @@ class OCL4DNNPool int32_t kernel_w_; int32_t stride_h_; int32_t stride_w_; - int32_t pad_h_; - int32_t pad_w_; + int32_t pad_t_; + int32_t pad_l_; + int32_t pad_b_; + int32_t pad_r_; int32_t height_; int32_t width_; int32_t pooled_height_; diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp index 77cd3a6337..47b40cc6c2 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp @@ -62,7 +62,6 @@ OCL4DNNPool::OCL4DNNPool(OCL4DNNPoolConfig config) for (int i = 0; i < spatial_dims; ++i) { kernel_shape_.push_back(i == 0 ? config.kernel.height : config.kernel.width); - pad_.push_back(i == 0 ? config.pad.height : config.pad.width); stride_.push_back(i == 0 ? config.stride.height : config.stride.width); im_in_shape_.push_back(config.in_shape[dims - spatial_dims + i]); im_out_shape_.push_back(config.out_shape[dims - spatial_dims + i]); @@ -72,8 +71,10 @@ OCL4DNNPool::OCL4DNNPool(OCL4DNNPoolConfig config) kernel_w_ = kernel_shape_[1]; stride_h_ = stride_[0]; stride_w_ = stride_[1]; - pad_h_ = pad_[0]; - pad_w_ = pad_[1]; + pad_t_ = config.pad_t; + pad_l_ = config.pad_l; + pad_r_ = config.pad_r; + pad_b_ = config.pad_b; height_ = im_in_shape_[0]; width_ = im_in_shape_[1]; pooled_height_ = im_out_shape_[0]; @@ -113,14 +114,13 @@ bool OCL4DNNPool::Forward(const UMat& bottom, ocl::dnn::ocl4dnn_pooling_oclsrc, format(" -D Dtype=%s -D KERNEL_MAX_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d" " -D STRIDE_W=%d -D STRIDE_H=%d" - " -D PAD_W=%d -D PAD_H=%d%s", + " -D PAD_L=%d -D PAD_T=%d -D PAD_R=%d -D PAD_B=%d%s", (use_half) ? "half" : "float", kernel_w_, kernel_h_, stride_w_, stride_h_, - pad_w_, pad_h_, + pad_l_, pad_t_, pad_r_, pad_b_, computeMaxIdx ? " -D HAVE_MASK=1" : "" )); - if (oclk_max_pool_forward.empty()) return false; @@ -150,11 +150,11 @@ bool OCL4DNNPool::Forward(const UMat& bottom, ocl::dnn::ocl4dnn_pooling_oclsrc, format(" -D Dtype=%s -D KERNEL_AVE_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d" " -D STRIDE_W=%d -D STRIDE_H=%d" - " -D PAD_W=%d -D PAD_H=%d%s", + " -D PAD_L=%d -D PAD_T=%d -D PAD_R=%d -D PAD_B=%d%s", (use_half) ? "half" : "float", kernel_w_, kernel_h_, stride_w_, stride_h_, - pad_w_, pad_h_, + pad_l_, pad_t_, pad_r_, pad_b_, avePoolPaddedArea ? " -D AVE_POOL_PADDING_AREA" : "" )); diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 44d6b41a64..3e58af911d 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -174,9 +174,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot else if(attribute_name == "pads") { CV_Assert(attribute_proto.ints_size() == 4); - lp.set("pad_h", saturate_cast(attribute_proto.ints(0))); - lp.set("pad_w", saturate_cast(attribute_proto.ints(1))); - // push pad_b and pad_r for compute ceil_mode + lp.set("pad_t", saturate_cast(attribute_proto.ints(0))); + lp.set("pad_l", saturate_cast(attribute_proto.ints(1))); lp.set("pad_b", saturate_cast(attribute_proto.ints(2))); lp.set("pad_r", saturate_cast(attribute_proto.ints(3))); } @@ -306,6 +305,7 @@ void ONNXImporter::populateNet(Net dstNet) std::string layer_type = node_proto.op_type(); layerParams.type = layer_type; + if (layer_type == "MaxPool") { layerParams.type = "Pooling"; @@ -551,7 +551,6 @@ void ONNXImporter::populateNet(Net dstNet) for (int j = 0; j < node_proto.input_size(); j++) { layerId = layer_id.find(node_proto.input(j)); - if (layerId != layer_id.end()) { dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j); } diff --git a/modules/dnn/src/opencl/ocl4dnn_pooling.cl b/modules/dnn/src/opencl/ocl4dnn_pooling.cl index 77d2e5ba33..53c61e4bd2 100644 --- a/modules/dnn/src/opencl/ocl4dnn_pooling.cl +++ b/modules/dnn/src/opencl/ocl4dnn_pooling.cl @@ -73,8 +73,8 @@ __kernel void const int xx = index / pooled_width; const int ph = xx % pooled_height; const int ch = xx / pooled_height; - int hstart = ph * STRIDE_H - PAD_H; - int wstart = pw * STRIDE_W - PAD_W; + int hstart = ph * STRIDE_H - PAD_T; + int wstart = pw * STRIDE_W - PAD_L; Dtype maxval = -FLT_MAX; int maxidx = -1; int in_offset = ch * height * width; @@ -117,10 +117,10 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)( const int xx = index / pooled_width; const int ph = xx % pooled_height; const int ch = xx / pooled_height; - int hstart = ph * STRIDE_H - PAD_H; - int wstart = pw * STRIDE_W - PAD_W; - int hend = min(hstart + KERNEL_H, height + PAD_H); - int wend = min(wstart + KERNEL_W, width + PAD_W); + int hstart = ph * STRIDE_H - PAD_T; + int wstart = pw * STRIDE_W - PAD_L; + int hend = min(hstart + KERNEL_H, height + PAD_B); + int wend = min(wstart + KERNEL_W, width + PAD_R); int pool_size; #ifdef AVE_POOL_PADDING_AREA pool_size = (hend - hstart) * (wend - wstart); diff --git a/modules/dnn/src/opencl/pooling.cl b/modules/dnn/src/opencl/pooling.cl index adfd59e6d9..2a92cb2f01 100644 --- a/modules/dnn/src/opencl/pooling.cl +++ b/modules/dnn/src/opencl/pooling.cl @@ -27,7 +27,7 @@ __kernel void MaxPoolForward(const int nthreads, __global T* bottom_data, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, - const int stride_h, const int stride_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, const int pad_t, const int pad_l, const int pad_b, const int pad_r, __global T* top_data #ifdef MASK , __global float* mask @@ -41,8 +41,8 @@ __kernel void MaxPoolForward(const int nthreads, int ph = (index / pooled_width) % pooled_height; int c = (index / pooled_width / pooled_height) % channels; int n = index / pooled_width / pooled_height / channels; - int hstart = ph * stride_h - pad_h; - int wstart = pw * stride_w - pad_w; + int hstart = ph * stride_h - pad_t; + int wstart = pw * stride_w - pad_l; const int hend = min(hstart + kernel_h, height); const int wend = min(wstart + kernel_w, width); hstart = max(hstart, 0); @@ -71,7 +71,7 @@ __kernel void MaxPoolForward(const int nthreads, __kernel void AvePoolForward(const int nthreads, __global T* bottom_data, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, - const int stride_h, const int stride_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, const int pad_t, const int pad_l, const int pad_b, const int pad_r, __global T* top_data #ifdef MASK , __global float* mask // NOT USED @@ -84,9 +84,9 @@ __kernel void AvePoolForward(const int nthreads, int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; int hstart = ph * stride_h - pad_h; int wstart = pw * stride_w - pad_w; - int hend = min(hstart + kernel_h, height + pad_h); - int wend = min(wstart + kernel_w, width + pad_w); + int n = index / pooled_width / pooled_height / channels; int hstart = ph * stride_h - pad_t; int wstart = pw * stride_w - pad_l; + int hend = min(hstart + kernel_h, height + pad_b); + int wend = min(wstart + kernel_w, width + pad_r); const int pool_size = (hend - hstart) * (wend - wstart); hstart = max(hstart, 0); wstart = max(wstart, 0); diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index 4f0041eeb6..6c19093805 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -74,6 +74,18 @@ enum LuaType LEGACY_TYPE_RECUR_FUNCTION = 7 }; +// We use OpenCV's types to manage CV_ELEM_SIZE. +enum TorchType +{ + TYPE_DOUBLE = CV_64F, + TYPE_FLOAT = CV_32F, + TYPE_BYTE = CV_8U, + TYPE_CHAR = CV_8S, + TYPE_SHORT = CV_16S, + TYPE_INT = CV_32S, + TYPE_LONG = CV_32SC2 +}; + template static String toString(const T &v) { @@ -203,19 +215,19 @@ struct TorchImporter String typeStr = str.substr(strlen(prefix), str.length() - strlen(prefix) - strlen(suffix)); if (typeStr == "Double") - return CV_64F; + return TYPE_DOUBLE; else if (typeStr == "Float" || typeStr == "Cuda") - return CV_32F; + return TYPE_FLOAT; else if (typeStr == "Byte") - return CV_8U; + return TYPE_BYTE; else if (typeStr == "Char") - return CV_8S; + return TYPE_CHAR; else if (typeStr == "Short") - return CV_16S; + return TYPE_SHORT; else if (typeStr == "Int") - return CV_32S; - else if (typeStr == "Long") //Carefully! CV_64S type coded as CV_USRTYPE1 - return CV_USRTYPE1; + return TYPE_INT; + else if (typeStr == "Long") + return TYPE_LONG; else CV_Error(Error::StsNotImplemented, "Unknown type \"" + typeStr + "\" of torch class \"" + str + "\""); } @@ -236,36 +248,44 @@ struct TorchImporter void readTorchStorage(int index, int type = -1) { long size = readLong(); - Mat storageMat(1, size, (type != CV_USRTYPE1) ? type : CV_64F); //handle LongStorage as CV_64F Mat + Mat storageMat; switch (type) { - case CV_32F: + case TYPE_FLOAT: + storageMat.create(1, size, CV_32F); THFile_readFloatRaw(file, (float*)storageMat.data, size); break; - case CV_64F: + case TYPE_DOUBLE: + storageMat.create(1, size, CV_64F); THFile_readDoubleRaw(file, (double*)storageMat.data, size); break; - case CV_8S: - case CV_8U: + case TYPE_CHAR: + storageMat.create(1, size, CV_8S); THFile_readByteRaw(file, (uchar*)storageMat.data, size); break; - case CV_16S: - case CV_16U: + case TYPE_BYTE: + storageMat.create(1, size, CV_8U); + THFile_readByteRaw(file, (uchar*)storageMat.data, size); + break; + case TYPE_SHORT: + storageMat.create(1, size, CV_16S); THFile_readShortRaw(file, (short*)storageMat.data, size); break; - case CV_32S: + case TYPE_INT: + storageMat.create(1, size, CV_32S); THFile_readIntRaw(file, (int*)storageMat.data, size); break; - case CV_USRTYPE1: + case TYPE_LONG: { + storageMat.create(1, size, CV_64F); //handle LongStorage as CV_64F Mat double *buf = storageMat.ptr(); THFile_readLongRaw(file, (int64*)buf, size); for (size_t i = (size_t)size; i-- > 0; ) buf[i] = ((int64*)buf)[i]; - } break; + } default: CV_Error(Error::StsInternal, ""); break; diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 85ff7ace21..b6da2f189c 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -307,7 +307,7 @@ TEST_P(Reproducibility_SqueezeNet_v1_1, Accuracy) net.setPreferableBackend(DNN_BACKEND_OPENCV); net.setPreferableTarget(targetId); - Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(227,227), Scalar(), false); + Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(227,227), Scalar(), false, true); ASSERT_TRUE(!input.empty()); Mat out; @@ -403,7 +403,7 @@ TEST_P(Test_Caffe_nets, DenseNet_121) const string model = findDataFile("dnn/DenseNet_121.caffemodel", false); Mat inp = imread(_tf("dog416.png")); - inp = blobFromImage(inp, 1.0 / 255, Size(224, 224)); + inp = blobFromImage(inp, 1.0 / 255, Size(224, 224), Scalar(), true, true); Mat ref = blobFromNPY(_tf("densenet_121_output.npy")); Net net = readNetFromCaffe(proto, model); diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 8d53b63eab..85405803d6 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -346,6 +346,10 @@ TEST_P(Test_ONNX_nets, DenseNet121) testONNXModels("densenet121", pb, l1, lInf); } +TEST_P(Test_ONNX_nets, Inception_v1) +{ + testONNXModels("inception_v1", pb); +} INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets()); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index b05d1f5440..b10c1388f3 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -62,8 +62,7 @@ TEST(Test_TensorFlow, inception_accuracy) Mat sample = imread(_tf("grace_hopper_227.png")); ASSERT_TRUE(!sample.empty()); - resize(sample, sample, Size(224, 224)); - Mat inputBlob = blobFromImage(sample); + Mat inputBlob = blobFromImage(sample, 1.0, Size(224, 224), Scalar(), /*swapRB*/true); net.setInput(inputBlob, "input"); Mat out = net.forward("softmax2"); diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index bd5f11249d..dd7d975af6 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -278,7 +278,7 @@ TEST_P(Test_Torch_nets, OpenFace_accuracy) sampleF32 /= 255; resize(sampleF32, sampleF32, Size(96, 96), 0, 0, INTER_NEAREST); - Mat inputBlob = blobFromImage(sampleF32); + Mat inputBlob = blobFromImage(sampleF32, 1.0, Size(), Scalar(), /*swapRB*/true); net.setInput(inputBlob); Mat out = net.forward(); @@ -305,7 +305,7 @@ TEST_P(Test_Torch_nets, ENet_accuracy) net.setPreferableTarget(target); Mat sample = imread(_tf("street.png", false)); - Mat inputBlob = blobFromImage(sample, 1./255); + Mat inputBlob = blobFromImage(sample, 1./255, Size(), Scalar(), /*swapRB*/true); net.setInput(inputBlob, ""); Mat out = net.forward(); diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index e236e08adb..7f3d1515e5 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -1987,10 +1987,10 @@ transform. @param image 8-bit, single-channel binary source image. The image may be modified by the function. @param lines Output vector of lines. Each line is represented by a 2 or 3 element vector -\f$(\rho, \theta)\f$ or \f$(\rho, \theta, \votes)\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of +\f$(\rho, \theta)\f$ or \f$(\rho, \theta, \textrm{votes})\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of the image). \f$\theta\f$ is the line rotation angle in radians ( \f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ). -\f$\votes\f$ is the value of accumulator. +\f$\textrm{votes}\f$ is the value of accumulator. @param rho Distance resolution of the accumulator in pixels. @param theta Angle resolution of the accumulator in radians. @param threshold Accumulator threshold parameter. Only those lines are returned that get enough diff --git a/modules/python/test/test_dnn.py b/modules/python/test/test_dnn.py new file mode 100644 index 0000000000..a1b55f4358 --- /dev/null +++ b/modules/python/test/test_dnn.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python +import os +import cv2 as cv +import numpy as np + +from tests_common import NewOpenCVTests, unittest + +def normAssert(test, a, b, lInf=1e-5): + test.assertLess(np.max(np.abs(a - b)), lInf) + +def inter_area(box1, box2): + x_min, x_max = max(box1[0], box2[0]), min(box1[2], box2[2]) + y_min, y_max = max(box1[1], box2[1]), min(box1[3], box2[3]) + return (x_max - x_min) * (y_max - y_min) + +def area(box): + return (box[2] - box[0]) * (box[3] - box[1]) + +def box2str(box): + left, top = box[0], box[1] + width, height = box[2] - left, box[3] - top + return '[%f x %f from (%f, %f)]' % (width, height, left, top) + +def normAssertDetections(test, ref, out, confThreshold=0.0, scores_diff=1e-5, boxes_iou_diff=1e-4): + ref = np.array(ref, np.float32) + refClassIds, testClassIds = ref[:, 1], out[:, 1] + refScores, testScores = ref[:, 2], out[:, 2] + refBoxes, testBoxes = ref[:, 3:], out[:, 3:] + + matchedRefBoxes = [False] * len(refBoxes) + errMsg = '' + for i in range(len(refBoxes)): + testScore = testScores[i] + if testScore < confThreshold: + continue + + testClassId, testBox = testClassIds[i], testBoxes[i] + matched = False + for j in range(len(refBoxes)): + if (not matchedRefBoxes[j]) and testClassId == refClassIds[j] and \ + abs(testScore - refScores[j]) < scores_diff: + interArea = inter_area(testBox, refBoxes[j]) + iou = interArea / (area(testBox) + area(refBoxes[j]) - interArea) + if abs(iou - 1.0) < boxes_iou_diff: + matched = True + matchedRefBoxes[j] = True + if not matched: + errMsg += '\nUnmatched prediction: class %d score %f box %s' % (testClassId, testScore, box2str(testBox)) + + for i in range(len(refBoxes)): + if (not matchedRefBoxes[i]) and refScores[i] > confThreshold: + errMsg += '\nUnmatched reference: class %d score %f box %s' % (refClassIds[i], refScores[i], box2str(refBoxes[i])) + if errMsg: + test.fail(errMsg) + + +# Returns a simple one-layer network created from Caffe's format +def getSimpleNet(): + prototxt = """ + name: "simpleNet" + input: "data" + layer { + type: "Identity" + name: "testLayer" + top: "testLayer" + bottom: "data" + } + """ + return cv.dnn.readNetFromCaffe(bytearray(prototxt, 'utf8')) + + +def testBackendAndTarget(backend, target): + net = getSimpleNet() + net.setPreferableBackend(backend) + net.setPreferableTarget(target) + inp = np.random.standard_normal([1, 2, 3, 4]).astype(np.float32) + try: + net.setInput(inp) + net.forward() + except BaseException as e: + return False + return True + + +haveInfEngine = testBackendAndTarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU) +dnnBackendsAndTargets = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], +] + +if haveInfEngine: + dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU]) + if testBackendAndTarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD): + dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD]) + +if cv.ocl.haveOpenCL() and cv.ocl.useOpenCL(): + dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL]) + dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL_FP16]) + if haveInfEngine: # FIXIT Check Intel iGPU only + dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL]) + dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16]) + + +def printParams(backend, target): + backendNames = { + cv.dnn.DNN_BACKEND_OPENCV: 'OCV', + cv.dnn.DNN_BACKEND_INFERENCE_ENGINE: 'DLIE' + } + targetNames = { + cv.dnn.DNN_TARGET_CPU: 'CPU', + cv.dnn.DNN_TARGET_OPENCL: 'OCL', + cv.dnn.DNN_TARGET_OPENCL_FP16: 'OCL_FP16', + cv.dnn.DNN_TARGET_MYRIAD: 'MYRIAD' + } + print('%s/%s' % (backendNames[backend], targetNames[target])) + + +class dnn_test(NewOpenCVTests): + + def find_dnn_file(self, filename, required=True): + return self.find_file(filename, [os.environ.get('OPENCV_DNN_TEST_DATA_PATH', os.getcwd())], required=required) + + def test_blobFromImage(self): + np.random.seed(324) + + width = 6 + height = 7 + scale = 1.0/127.5 + mean = (10, 20, 30) + + # Test arguments names. + img = np.random.randint(0, 255, [4, 5, 3]).astype(np.uint8) + blob = cv.dnn.blobFromImage(img, scale, (width, height), mean, True, False) + blob_args = cv.dnn.blobFromImage(img, scalefactor=scale, size=(width, height), + mean=mean, swapRB=True, crop=False) + normAssert(self, blob, blob_args) + + # Test values. + target = cv.resize(img, (width, height), interpolation=cv.INTER_LINEAR) + target = target.astype(np.float32) + target = target[:,:,[2, 1, 0]] # BGR2RGB + target[:,:,0] -= mean[0] + target[:,:,1] -= mean[1] + target[:,:,2] -= mean[2] + target *= scale + target = target.transpose(2, 0, 1).reshape(1, 3, height, width) # to NCHW + normAssert(self, blob, target) + + + def test_face_detection(self): + testdata_required = bool(os.environ.get('OPENCV_DNN_TEST_REQUIRE_TESTDATA', False)) + proto = self.find_dnn_file('dnn/opencv_face_detector.prototxt2', required=testdata_required) + model = self.find_dnn_file('dnn/opencv_face_detector.caffemodel', required=testdata_required) + if proto is None or model is None: + raise unittest.SkipTest("Missing DNN test files (dnn/opencv_face_detector.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.") + + img = self.get_sample('gpu/lbpcascade/er.png') + blob = cv.dnn.blobFromImage(img, mean=(104, 177, 123), swapRB=False, crop=False) + + ref = [[0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631], + [0, 1, 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168], + [0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290], + [0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477], + [0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494], + [0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801]] + + print('\n') + for backend, target in dnnBackendsAndTargets: + printParams(backend, target) + + net = cv.dnn.readNet(proto, model) + net.setPreferableBackend(backend) + net.setPreferableTarget(target) + net.setInput(blob) + out = net.forward().reshape(-1, 7) + + scoresDiff = 4e-3 if target in [cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD] else 1e-5 + iouDiff = 2e-2 if target in [cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD] else 1e-4 + + normAssertDetections(self, ref, out, 0.5, scoresDiff, iouDiff) + +if __name__ == '__main__': + NewOpenCVTests.bootstrap() diff --git a/modules/python/test/tests_common.py b/modules/python/test/tests_common.py index e6539ae7f4..a938a8e2cb 100644 --- a/modules/python/test/tests_common.py +++ b/modules/python/test/tests_common.py @@ -26,23 +26,25 @@ class NewOpenCVTests(unittest.TestCase): # github repository url repoUrl = 'https://raw.github.com/opencv/opencv/master' + def find_file(self, filename, searchPaths=[], required=True): + searchPaths = searchPaths if searchPaths else [self.repoPath, self.extraTestDataPath] + for path in searchPaths: + if path is not None: + candidate = path + '/' + filename + if os.path.isfile(candidate): + return candidate + if required: + self.fail('File ' + filename + ' not found') + return None + + def get_sample(self, filename, iscolor = None): if iscolor is None: iscolor = cv.IMREAD_COLOR if not filename in self.image_cache: - filedata = None - if NewOpenCVTests.repoPath is not None: - candidate = NewOpenCVTests.repoPath + '/' + filename - if os.path.isfile(candidate): - with open(candidate, 'rb') as f: - filedata = f.read() - if NewOpenCVTests.extraTestDataPath is not None: - candidate = NewOpenCVTests.extraTestDataPath + '/' + filename - if os.path.isfile(candidate): - with open(candidate, 'rb') as f: - filedata = f.read() - if filedata is None: - return None#filedata = urlopen(NewOpenCVTests.repoUrl + '/' + filename).read() + filepath = self.find_file(filename) + with open(filepath, 'rb') as f: + filedata = f.read() self.image_cache[filename] = cv.imdecode(np.fromstring(filedata, dtype=np.uint8), iscolor) return self.image_cache[filename] @@ -102,4 +104,4 @@ def isPointInRect(p, rect): if rect[0] <= p[0] and rect[1] <=p[1] and p[0] <= rect[2] and p[1] <= rect[3]: return True else: - return False \ No newline at end of file + return False diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index e0198be5f8..ce337ea10f 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -58,6 +58,10 @@ # pragma GCC diagnostic ignored "-Wdeprecated-declarations" #endif +#ifndef CV_UNUSED // Required for standalone compilation mode (OpenCV defines this in base.hpp) +#define CV_UNUSED(name) (void)name +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/samples/android/mobilenet-objdetect/src/org/opencv/samples/opencv_mobilenet/MainActivity.java b/samples/android/mobilenet-objdetect/src/org/opencv/samples/opencv_mobilenet/MainActivity.java index 3b62cc1e1a..44b4ba3d6e 100644 --- a/samples/android/mobilenet-objdetect/src/org/opencv/samples/opencv_mobilenet/MainActivity.java +++ b/samples/android/mobilenet-objdetect/src/org/opencv/samples/opencv_mobilenet/MainActivity.java @@ -86,29 +86,13 @@ public class MainActivity extends AppCompatActivity implements CvCameraViewListe // Forward image through network. Mat blob = Dnn.blobFromImage(frame, IN_SCALE_FACTOR, new Size(IN_WIDTH, IN_HEIGHT), - new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), false); + new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), /*swapRB*/false, /*crop*/false); net.setInput(blob); Mat detections = net.forward(); int cols = frame.cols(); int rows = frame.rows(); - Size cropSize; - if ((float)cols / rows > WH_RATIO) { - cropSize = new Size(rows * WH_RATIO, rows); - } else { - cropSize = new Size(cols, cols / WH_RATIO); - } - - int y1 = (int)(rows - cropSize.height) / 2; - int y2 = (int)(y1 + cropSize.height); - int x1 = (int)(cols - cropSize.width) / 2; - int x2 = (int)(x1 + cropSize.width); - Mat subFrame = frame.submat(y1, y2, x1, x2); - - cols = subFrame.cols(); - rows = subFrame.rows(); - detections = detections.reshape(1, (int)detections.total() / 7); for (int i = 0; i < detections.rows(); ++i) { @@ -116,26 +100,24 @@ public class MainActivity extends AppCompatActivity implements CvCameraViewListe if (confidence > THRESHOLD) { int classId = (int)detections.get(i, 1)[0]; - int xLeftBottom = (int)(detections.get(i, 3)[0] * cols); - int yLeftBottom = (int)(detections.get(i, 4)[0] * rows); - int xRightTop = (int)(detections.get(i, 5)[0] * cols); - int yRightTop = (int)(detections.get(i, 6)[0] * rows); + int left = (int)(detections.get(i, 3)[0] * cols); + int top = (int)(detections.get(i, 4)[0] * rows); + int right = (int)(detections.get(i, 5)[0] * cols); + int bottom = (int)(detections.get(i, 6)[0] * rows); // Draw rectangle around detected object. - Imgproc.rectangle(subFrame, new Point(xLeftBottom, yLeftBottom), - new Point(xRightTop, yRightTop), - new Scalar(0, 255, 0)); + Imgproc.rectangle(frame, new Point(left, top), new Point(right, bottom), + new Scalar(0, 255, 0)); String label = classNames[classId] + ": " + confidence; int[] baseLine = new int[1]; Size labelSize = Imgproc.getTextSize(label, Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, 1, baseLine); // Draw background for label. - Imgproc.rectangle(subFrame, new Point(xLeftBottom, yLeftBottom - labelSize.height), - new Point(xLeftBottom + labelSize.width, yLeftBottom + baseLine[0]), - new Scalar(255, 255, 255), Imgproc.FILLED); - + Imgproc.rectangle(frame, new Point(left, top - labelSize.height), + new Point(left + labelSize.width, top + baseLine[0]), + new Scalar(255, 255, 255), Imgproc.FILLED); // Write class name and confidence. - Imgproc.putText(subFrame, label, new Point(xLeftBottom, yLeftBottom), + Imgproc.putText(frame, label, new Point(left, top), Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar(0, 0, 0)); } } diff --git a/samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp b/samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp new file mode 100755 index 0000000000..345fd060a2 --- /dev/null +++ b/samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp @@ -0,0 +1,104 @@ +/** +* @brief You will learn how to segment an anisotropic image with a single local orientation by a gradient structure tensor (GST) +* @author Karpushin Vladislav, karpushin@ngs.ru, https://github.com/VladKarpushin +*/ + +#include +#include "opencv2/imgproc.hpp" +#include "opencv2/imgcodecs.hpp" + +using namespace cv; +using namespace std; + +void calcGST(const Mat& inputImg, Mat& imgCoherencyOut, Mat& imgOrientationOut, int w); + +int main() +{ + int W = 52; // window size is WxW + double C_Thr = 0.43; // threshold for coherency + int LowThr = 35; // threshold1 for orientation, it ranges from 0 to 180 + int HighThr = 57; // threshold2 for orientation, it ranges from 0 to 180 + + Mat imgIn = imread("input.jpg", IMREAD_GRAYSCALE); + if (imgIn.empty()) //check whether the image is loaded or not + { + cout << "ERROR : Image cannot be loaded..!!" << endl; + return -1; + } + + //! [main] + Mat imgCoherency, imgOrientation; + calcGST(imgIn, imgCoherency, imgOrientation, W); + + //! [thresholding] + Mat imgCoherencyBin; + imgCoherencyBin = imgCoherency > C_Thr; + Mat imgOrientationBin; + inRange(imgOrientation, Scalar(LowThr), Scalar(HighThr), imgOrientationBin); + //! [thresholding] + + //! [combining] + Mat imgBin; + imgBin = imgCoherencyBin & imgOrientationBin; + //! [combining] + //! [main] + + normalize(imgCoherency, imgCoherency, 0, 255, NORM_MINMAX); + normalize(imgOrientation, imgOrientation, 0, 255, NORM_MINMAX); + imwrite("result.jpg", 0.5*(imgIn + imgBin)); + imwrite("Coherency.jpg", imgCoherency); + imwrite("Orientation.jpg", imgOrientation); + return 0; +} +//! [calcGST] +void calcGST(const Mat& inputImg, Mat& imgCoherencyOut, Mat& imgOrientationOut, int w) +{ + Mat img; + inputImg.convertTo(img, CV_64F); + + // GST components calculation (start) + // J = (J11 J12; J12 J22) - GST + Mat imgDiffX, imgDiffY, imgDiffXY; + Sobel(img, imgDiffX, CV_64F, 1, 0, 3); + Sobel(img, imgDiffY, CV_64F, 0, 1, 3); + multiply(imgDiffX, imgDiffY, imgDiffXY); + + Mat imgDiffXX, imgDiffYY; + multiply(imgDiffX, imgDiffX, imgDiffXX); + multiply(imgDiffY, imgDiffY, imgDiffYY); + + Mat J11, J22, J12; // J11, J22 and J12 are GST components + boxFilter(imgDiffXX, J11, CV_64F, Size(w, w)); + boxFilter(imgDiffYY, J22, CV_64F, Size(w, w)); + boxFilter(imgDiffXY, J12, CV_64F, Size(w, w)); + // GST components calculation (stop) + + // eigenvalue calculation (start) + // lambda1 = J11 + J22 + sqrt((J11-J22)^2 + 4*J12^2) + // lambda2 = J11 + J22 - sqrt((J11-J22)^2 + 4*J12^2) + Mat tmp1, tmp2, tmp3, tmp4; + tmp1 = J11 + J22; + tmp2 = J11 - J22; + multiply(tmp2, tmp2, tmp2); + multiply(J12, J12, tmp3); + sqrt(tmp2 + 4.0 * tmp3, tmp4); + + Mat lambda1, lambda2; + lambda1 = tmp1 + tmp4; // biggest eigenvalue + lambda2 = tmp1 - tmp4; // smallest eigenvalue + // eigenvalue calculation (stop) + + // Coherency calculation (start) + // Coherency = (lambda1 - lambda2)/(lambda1 + lambda2)) - measure of anisotropism + // Coherency is anisotropy degree (consistency of local orientation) + divide(lambda1 - lambda2, lambda1 + lambda2, imgCoherencyOut); + // Coherency calculation (stop) + + // orientation angle calculation (start) + // tan(2*Alpha) = 2*J12/(J22 - J11) + // Alpha = 0.5 atan2(2*J12/(J22 - J11)) + phase(J22 - J11, 2.0*J12, imgOrientationOut, true); + imgOrientationOut = 0.5*imgOrientationOut; + // orientation angle calculation (stop) +} +//! [calcGST]