Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/12571/head
Alexander Alekhin 6 years ago
commit e6171d17f8
  1. 16
      CMakeLists.txt
  2. 199
      cmake/OpenCVFindMatlab.cmake
  3. 40
      cmake/OpenCVModule.cmake
  4. 6
      cmake/OpenCVUtils.cmake
  5. 34
      doc/opencv.bib
  6. 1
      doc/py_tutorials/py_gui/py_trackbar/py_trackbar.markdown
  7. 91
      doc/tutorials/imgproc/anisotropic_image_segmentation/anisotropic_image_segmentation.markdown
  8. BIN
      doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_coherency.jpg
  9. BIN
      doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_input.jpg
  10. BIN
      doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_orientation.jpg
  11. BIN
      doc/tutorials/imgproc/anisotropic_image_segmentation/images/gst_result.jpg
  12. 10
      doc/tutorials/imgproc/table_of_content_imgproc.markdown
  13. 8
      modules/core/src/copy.cpp
  14. 8
      modules/core/src/matrix_wrap.cpp
  15. 8
      modules/core/src/umatrix.cpp
  16. 2
      modules/dnn/CMakeLists.txt
  17. 4
      modules/dnn/include/opencv2/dnn/all_layers.hpp
  18. 8
      modules/dnn/include/opencv2/dnn/dnn.hpp
  19. 2
      modules/dnn/include/opencv2/dnn/version.hpp
  20. 33
      modules/dnn/src/layers/convolution_layer.cpp
  21. 36
      modules/dnn/src/layers/layers_common.cpp
  22. 7
      modules/dnn/src/layers/layers_common.hpp
  23. 83
      modules/dnn/src/layers/pooling_layer.cpp
  24. 11
      modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
  25. 16
      modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
  26. 7
      modules/dnn/src/onnx/onnx_importer.cpp
  27. 12
      modules/dnn/src/opencl/ocl4dnn_pooling.cl
  28. 14
      modules/dnn/src/opencl/pooling.cl
  29. 56
      modules/dnn/src/torch/torch_importer.cpp
  30. 4
      modules/dnn/test/test_caffe_importer.cpp
  31. 4
      modules/dnn/test/test_onnx_importer.cpp
  32. 3
      modules/dnn/test/test_tf_importer.cpp
  33. 4
      modules/dnn/test/test_torch_importer.cpp
  34. 4
      modules/imgproc/include/opencv2/imgproc.hpp
  35. 182
      modules/python/test/test_dnn.py
  36. 26
      modules/python/test/tests_common.py
  37. 4
      modules/videoio/src/cap_ffmpeg_impl.hpp
  38. 36
      samples/android/mobilenet-objdetect/src/org/opencv/samples/opencv_mobilenet/MainActivity.java
  39. 104
      samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp

@ -271,7 +271,6 @@ OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF (WIN32 AND NOT WINRT) )
OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF (WIN32 AND NOT WINRT) )
OCV_OPTION(WITH_LIBREALSENSE "Include Intel librealsense support" OFF IF (NOT WITH_INTELPERC) )
OCV_OPTION(WITH_MATLAB "Include Matlab support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT))
OCV_OPTION(WITH_VA "Include VA support" OFF IF (UNIX AND NOT ANDROID) )
OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) )
OCV_OPTION(WITH_MFX "Include Intel Media SDK support" OFF IF ((UNIX AND NOT ANDROID) OR (WIN32 AND NOT WINRT AND NOT MINGW)) )
@ -694,11 +693,6 @@ if(WITH_DIRECTX)
include(cmake/OpenCVDetectDirectX.cmake)
endif()
# --- Matlab/Octave ---
if(WITH_MATLAB)
include(cmake/OpenCVFindMatlab.cmake)
endif()
if(WITH_VTK)
include(cmake/OpenCVDetectVTK.cmake)
endif()
@ -1518,15 +1512,7 @@ if(BUILD_JAVA OR BUILD_opencv_java)
status(" Java tests:" BUILD_TESTS AND opencv_test_java_BINARY_DIR THEN YES ELSE NO)
endif()
# ========================= matlab =========================
if(WITH_MATLAB OR MATLAB_FOUND)
status("")
status(" Matlab:" MATLAB_FOUND THEN "YES" ELSE "NO")
if(MATLAB_FOUND)
status(" mex:" MATLAB_MEX_SCRIPT THEN "${MATLAB_MEX_SCRIPT}" ELSE NO)
status(" Compiler/generator:" MEX_WORKS THEN "Working" ELSE "Not working (bindings will not be generated)")
endif()
endif()
ocv_cmake_hook(STATUS_DUMP_EXTRA)
# ========================== auxiliary ==========================
status("")

@ -1,199 +0,0 @@
# ----- Find Matlab/Octave -----
#
# OpenCVFindMatlab.cmake attempts to locate the install path of Matlab in order
# to extract the mex headers, libraries and shell scripts. If found
# successfully, the following variables will be defined
#
# MATLAB_FOUND: true/false
# MATLAB_ROOT_DIR: Root of Matlab installation
# MATLAB_BIN: The main Matlab "executable" (shell script)
# MATLAB_MEX_SCRIPT: The mex script used to compile mex files
# MATLAB_INCLUDE_DIRS:Path to "mex.h"
# MATLAB_LIBRARY_DIRS:Path to mex and matrix libraries
# MATLAB_LIBRARIES: The Matlab libs, usually mx, mex, mat
# MATLAB_MEXEXT: The mex library extension. It will be one of:
# mexwin32, mexwin64, mexglx, mexa64, mexmac,
# mexmaci, mexmaci64, mexsol, mexs64
# MATLAB_ARCH: The installation architecture. It is **usually**
# the MEXEXT with the preceding "mex" removed,
# though it's different for linux distros.
#
# There doesn't appear to be an elegant way to detect all versions of Matlab
# across different platforms. If you know the matlab path and want to avoid
# the search, you can define the path to the Matlab root when invoking cmake:
#
# cmake -DMATLAB_ROOT_DIR='/PATH/TO/ROOT_DIR' ..
# ----- set_library_presuffix -----
#
# Matlab tends to use some non-standard prefixes and suffixes on its libraries.
# For example, libmx.dll on Windows (Windows does not add prefixes) and
# mkl.dylib on OS X (OS X uses "lib" prefixes).
# On some versions of Windows the .dll suffix also appears to not be checked.
#
# This function modifies the library prefixes and suffixes used by
# find_library when finding Matlab libraries. It does not affect scopes
# outside of this file.
function(set_libarch_prefix_suffix)
if (UNIX AND NOT APPLE)
set(CMAKE_FIND_LIBRARY_PREFIXES "lib" PARENT_SCOPE)
set(CMAKE_FIND_LIBRARY_SUFFIXES ".so" ".a" PARENT_SCOPE)
elseif (APPLE)
set(CMAKE_FIND_LIBRARY_PREFIXES "lib" PARENT_SCOPE)
set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".a" PARENT_SCOPE)
elseif (WIN32)
set(CMAKE_FIND_LIBRARY_PREFIXES "lib" PARENT_SCOPE)
set(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".dll" PARENT_SCOPE)
endif()
endfunction()
# ----- locate_matlab_root -----
#
# Attempt to find the path to the Matlab installation. If successful, sets
# the absolute path in the variable MATLAB_ROOT_DIR
function(locate_matlab_root)
# --- UNIX/APPLE ---
if (UNIX)
# possible root locations, in order of likelihood
set(SEARCH_DIRS_ /Applications /usr/local /opt/local /usr /opt)
foreach (DIR_ ${SEARCH_DIRS_})
file(GLOB MATLAB_ROOT_DIR_ ${DIR_}/MATLAB/R* ${DIR_}/MATLAB_R*)
if (MATLAB_ROOT_DIR_)
# sort in order from highest to lowest
# normally it's in the format MATLAB_R[20XX][A/B]
# TODO: numerical rather than lexicographic sort. However,
# CMake does not support floating-point MATH(EXPR ...) at this time.
list(SORT MATLAB_ROOT_DIR_)
list(REVERSE MATLAB_ROOT_DIR_)
list(GET MATLAB_ROOT_DIR_ 0 MATLAB_ROOT_DIR_)
set(MATLAB_ROOT_DIR ${MATLAB_ROOT_DIR_} PARENT_SCOPE)
return()
endif()
endforeach()
# --- WINDOWS ---
elseif (WIN32)
# 1. search the path environment variable
find_program(MATLAB_ROOT_DIR_ matlab PATHS ENV PATH)
if (MATLAB_ROOT_DIR_)
# get the root directory from the full path
# /path/to/matlab/rootdir/bin/matlab.exe
get_filename_component(MATLAB_ROOT_DIR_ ${MATLAB_ROOT_DIR_} PATH)
get_filename_component(MATLAB_ROOT_DIR_ ${MATLAB_ROOT_DIR_} PATH)
set(MATLAB_ROOT_DIR ${MATLAB_ROOT_DIR_} PARENT_SCOPE)
return()
endif()
# 2. search the registry
# determine the available Matlab versions
set(REG_EXTENSION_ "SOFTWARE\\Mathworks\\MATLAB")
set(REG_ROOTS_ "HKEY_LOCAL_MACHINE" "HKEY_CURRENT_USER")
foreach(REG_ROOT_ ${REG_ROOTS_})
execute_process(COMMAND reg query "${REG_ROOT_}\\${REG_EXTENSION_}" OUTPUT_VARIABLE QUERY_RESPONSE_ ERROR_VARIABLE UNUSED_)
if (QUERY_RESPONSE_)
string(REGEX MATCHALL "[0-9]\\.[0-9]" VERSION_STRINGS_ ${QUERY_RESPONSE_})
list(APPEND VERSIONS_ ${VERSION_STRINGS_})
endif()
endforeach()
# select the highest version
list(APPEND VERSIONS_ "0.0")
list(SORT VERSIONS_)
list(REVERSE VERSIONS_)
list(GET VERSIONS_ 0 VERSION_)
# request the MATLABROOT from the registry
foreach(REG_ROOT_ ${REG_ROOTS_})
get_filename_component(QUERY_RESPONSE_ [${REG_ROOT_}\\${REG_EXTENSION_}\\${VERSION_};MATLABROOT] ABSOLUTE)
if (NOT ${QUERY_RESPONSE_} MATCHES "registry$")
set(MATLAB_ROOT_DIR ${QUERY_RESPONSE_} PARENT_SCOPE)
return()
endif()
endforeach()
endif()
endfunction()
# ----- locate_matlab_components -----
#
# Given a directory MATLAB_ROOT_DIR, attempt to find the Matlab components
# (include directory and libraries) under the root. If everything is found,
# sets the variable MATLAB_FOUND to TRUE
function(locate_matlab_components MATLAB_ROOT_DIR)
# get the mex extension
find_file(MATLAB_MEXEXT_SCRIPT_ NAMES mexext mexext.bat PATHS ${MATLAB_ROOT_DIR}/bin NO_DEFAULT_PATH)
execute_process(COMMAND ${MATLAB_MEXEXT_SCRIPT_}
OUTPUT_VARIABLE MATLAB_MEXEXT_
OUTPUT_STRIP_TRAILING_WHITESPACE)
if (NOT MATLAB_MEXEXT_)
return()
endif()
# map the mexext to an architecture extension
set(ARCHITECTURES_ "maci64" "maci" "glnxa64" "glnx64" "sol64" "sola64" "win32" "win64" )
foreach(ARCHITECTURE_ ${ARCHITECTURES_})
if(EXISTS ${MATLAB_ROOT_DIR}/bin/${ARCHITECTURE_})
set(MATLAB_ARCH_ ${ARCHITECTURE_})
break()
endif()
endforeach()
# get the path to the libraries
set(MATLAB_LIBRARY_DIRS_ ${MATLAB_ROOT_DIR}/bin/${MATLAB_ARCH_})
# get the libraries
set_libarch_prefix_suffix()
find_library(MATLAB_LIB_MX_ mx PATHS ${MATLAB_LIBRARY_DIRS_} NO_DEFAULT_PATH)
find_library(MATLAB_LIB_MEX_ mex PATHS ${MATLAB_LIBRARY_DIRS_} NO_DEFAULT_PATH)
find_library(MATLAB_LIB_MAT_ mat PATHS ${MATLAB_LIBRARY_DIRS_} NO_DEFAULT_PATH)
set(MATLAB_LIBRARIES_ ${MATLAB_LIB_MX_} ${MATLAB_LIB_MEX_} ${MATLAB_LIB_MAT_})
# get the include path
find_path(MATLAB_INCLUDE_DIRS_ mex.h ${MATLAB_ROOT_DIR}/extern/include)
# get the mex shell script
find_program(MATLAB_MEX_SCRIPT_ NAMES mex mex.bat PATHS ${MATLAB_ROOT_DIR}/bin NO_DEFAULT_PATH)
# get the Matlab executable
find_program(MATLAB_BIN_ NAMES matlab PATHS ${MATLAB_ROOT_DIR}/bin NO_DEFAULT_PATH)
# export into parent scope
if (MATLAB_MEX_SCRIPT_ AND MATLAB_LIBRARIES_ AND MATLAB_INCLUDE_DIRS_)
set(MATLAB_BIN ${MATLAB_BIN_} PARENT_SCOPE)
set(MATLAB_MEX_SCRIPT ${MATLAB_MEX_SCRIPT_} PARENT_SCOPE)
set(MATLAB_INCLUDE_DIRS ${MATLAB_INCLUDE_DIRS_} PARENT_SCOPE)
set(MATLAB_LIBRARIES ${MATLAB_LIBRARIES_} PARENT_SCOPE)
set(MATLAB_LIBRARY_DIRS ${MATLAB_LIBRARY_DIRS_} PARENT_SCOPE)
set(MATLAB_MEXEXT ${MATLAB_MEXEXT_} PARENT_SCOPE)
set(MATLAB_ARCH ${MATLAB_ARCH_} PARENT_SCOPE)
endif()
endfunction()
# ----------------------------------------------------------------------------
# FIND MATLAB COMPONENTS
# ----------------------------------------------------------------------------
if (NOT MATLAB_FOUND)
# attempt to find the Matlab root folder
if (NOT MATLAB_ROOT_DIR)
locate_matlab_root()
endif()
# given the matlab root folder, find the library locations
if (MATLAB_ROOT_DIR)
locate_matlab_components(${MATLAB_ROOT_DIR})
endif()
find_package_handle_standard_args(Matlab DEFAULT_MSG
MATLAB_MEX_SCRIPT MATLAB_INCLUDE_DIRS
MATLAB_ROOT_DIR MATLAB_LIBRARIES
MATLAB_LIBRARY_DIRS MATLAB_MEXEXT
MATLAB_ARCH MATLAB_BIN)
endif()

@ -296,28 +296,29 @@ endfunction()
# Calls 'add_subdirectory' for each location.
# Note: both input lists should have same length.
# Usage: _add_modules_1(<list with paths> <list with names>)
function(_add_modules_1 paths names)
list(LENGTH ${paths} len)
if(len EQUAL 0)
return()
endif()
list(LENGTH ${names} len_verify)
if(NOT len EQUAL len_verify)
message(FATAL_ERROR "Bad configuration! ${len} != ${len_verify}")
endif()
math(EXPR len "${len} - 1")
foreach(i RANGE ${len})
list(GET ${paths} ${i} path)
list(GET ${names} ${i} name)
#message(STATUS "First pass: ${name} => ${path}")
include("${path}/cmake/init.cmake" OPTIONAL)
add_subdirectory("${path}" "${CMAKE_CURRENT_BINARY_DIR}/.firstpass/${name}")
macro(_add_modules_1 paths names)
ocv_debug_message("_add_modules_1(paths=${paths}, names=${names}, ... " ${ARGN} ")")
list(LENGTH ${paths} __len)
if(NOT __len EQUAL 0)
list(LENGTH ${names} __len_verify)
if(NOT __len EQUAL __len_verify)
message(FATAL_ERROR "Bad configuration! ${__len} != ${__len_verify}")
endif()
math(EXPR __len "${__len} - 1")
foreach(i RANGE ${__len})
list(GET ${paths} ${i} __path)
list(GET ${names} ${i} __name)
#message(STATUS "First pass: ${__name} => ${__path}")
include("${__path}/cmake/init.cmake" OPTIONAL)
add_subdirectory("${__path}" "${CMAKE_CURRENT_BINARY_DIR}/.firstpass/${__name}")
endforeach()
endfunction()
endif()
endmacro()
# Calls 'add_subdirectory' for each module name.
# Usage: _add_modules_2([<module> ...])
function(_add_modules_2)
macro(_add_modules_2)
ocv_debug_message("_add_modules_2(" ${ARGN} ")")
foreach(m ${ARGN})
set(the_module "${m}")
ocv_cmake_hook(PRE_MODULES_CREATE_${the_module})
@ -333,7 +334,8 @@ function(_add_modules_2)
endif()
ocv_cmake_hook(POST_MODULES_CREATE_${the_module})
endforeach()
endfunction()
unset(the_module)
endmacro()
# Check if list of input items is unique.
# Usage: _assert_uniqueness(<failure message> <element> [<element> ...])

@ -121,8 +121,10 @@ macro(ocv_assert)
endmacro()
macro(ocv_debug_message)
# string(REPLACE ";" " " __msg "${ARGN}")
# message(STATUS "${__msg}")
if(OPENCV_CMAKE_DEBUG_MESSAGES)
string(REPLACE ";" " " __msg "${ARGN}")
message(STATUS "${__msg}")
endif()
endmacro()
macro(ocv_check_environment_variables)

@ -1035,3 +1035,37 @@
publisher = {BMVA Press},
author = {Alexander Duda and Udo Frese},
}
@book{jahne2000computer,
title={Computer vision and applications: a guide for students and practitioners},
author={Jahne, Bernd},
year={2000},
publisher={Elsevier}
}
@book{bigun2006vision,
title={Vision with direction},
author={Bigun, Josef},
year={2006},
publisher={Springer}
}
@inproceedings{van1995estimators,
title={Estimators for orientation and anisotropy in digitized images},
author={Van Vliet, Lucas J and Verbeek, Piet W},
booktitle={ASCI},
volume={95},
pages={16--18},
year={1995}
}
@article{yang1996structure,
title={Structure adaptive anisotropic image filtering},
author={Yang, Guang-Zhong and Burger, Peter and Firmin, David N and Underwood, SR},
journal={Image and Vision Computing},
volume={14},
number={2},
pages={135--145},
year={1996},
publisher={Elsevier}
}

@ -37,6 +37,7 @@ cv.namedWindow('image')
# create trackbars for color change
cv.createTrackbar('R','image',0,255,nothing)
cv.createTrackbar('G','image',0,255,nothing)
cv.createTrackbar('B','image',0,255,nothing)

@ -0,0 +1,91 @@
Anisotropic image segmentation by a gradient structure tensor {#tutorial_anisotropic_image_segmentation_by_a_gst}
==========================
Goal
----
In this tutorial you will learn:
- what the gradient structure tensor is
- how to estimate orientation and coherency of an anisotropic image by a gradient structure tensor
- how to segment an anisotropic image with a single local orientation by a gradient structure tensor
Theory
------
@note The explanation is based on the books @cite jahne2000computer, @cite bigun2006vision and @cite van1995estimators. Good physical explanation of a gradient structure tensor is given in @cite yang1996structure. Also, you can refer to a wikipedia page [Structure tensor].
@note A anisotropic image on this page is a real world image.
### What is the gradient structure tensor?
In mathematics, the gradient structure tensor (also referred to as the second-moment matrix, the second order moment tensor, the inertia tensor, etc.) is a matrix derived from the gradient of a function. It summarizes the predominant directions of the gradient in a specified neighborhood of a point, and the degree to which those directions are coherent (coherency). The gradient structure tensor is widely used in image processing and computer vision for 2D/3D image segmentation, motion detection, adaptive filtration, local image features detection, etc.
Important features of anisotropic images include orientation and coherency of a local anisotropy. In this paper we will show how to estimate orientation and coherency, and how to segment an anisotropic image with a single local orientation by a gradient structure tensor.
The gradient structure tensor of an image is a 2x2 symmetric matrix. Eigenvectors of the gradient structure tensor indicate local orientation, whereas eigenvalues give coherency (a measure of anisotropism).
The gradient structure tensor \f$J\f$ of an image \f$Z\f$ can be written as:
\f[J = \begin{bmatrix}
J_{11} & J_{12} \\
J_{12} & J_{22}
\end{bmatrix}\f]
where \f$J_{11} = M[Z_{x}^{2}]\f$, \f$J_{22} = M[Z_{y}^{2}]\f$, \f$J_{12} = M[Z_{x}Z_{y}]\f$ - components of the tensor, \f$M[]\f$ is a symbol of mathematical expectation (we can consider this operation as averaging in a window w), \f$Z_{x}\f$ and \f$Z_{y}\f$ are partial derivatives of an image \f$Z\f$ with respect to \f$x\f$ and \f$y\f$.
The eigenvalues of the tensor can be found in the below formula:
\f[\lambda_{1,2} = J_{11} + J_{22} \pm \sqrt{(J_{11} - J_{22})^{2} + 4J_{12}^{2}}\f]
where \f$\lambda_1\f$ - largest eigenvalue, \f$\lambda_2\f$ - smallest eigenvalue.
### How to estimate orientation and coherency of an anisotropic image by gradient structure tensor?
The orientation of an anisotropic image:
\f[\alpha = 0.5arctg\frac{2J_{12}}{J_{22} - J_{11}}\f]
Coherency:
\f[C = \frac{\lambda_1 - \lambda_2}{\lambda_1 + \lambda_2}\f]
The coherency ranges from 0 to 1. For ideal local orientation (\f$\lambda_2\f$ = 0, \f$\lambda_1\f$ > 0) it is one, for an isotropic gray value structure (\f$\lambda_1\f$ = \f$\lambda_2\f$ > 0) it is zero.
Source code
-----------
You can find source code in the `samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp` of the OpenCV source code library.
@include cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp
Explanation
-----------
An anisotropic image segmentation algorithm consists of a gradient structure tensor calculation, an orientation calculation, a coherency calculation and an orientation and coherency thresholding:
@snippet samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp main
A function calcGST() calculates orientation and coherency by using a gradient structure tensor. An input parameter w defines a window size:
@snippet samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp calcGST
The below code applies a thresholds LowThr and HighThr to image orientation and a threshold C_Thr to image coherency calculated by the previous function. LowThr and HighThr define orientation range:
@snippet samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp thresholding
And finally we combine thresholding results:
@snippet samples/cpp/tutorial_code/ImgProc/anisotropic_image_segmentation/anisotropic_image_segmentation.cpp combining
Result
------
Below you can see the real anisotropic image with single direction:
![Anisotropic image with the single direction](images/gst_input.jpg)
Below you can see the orientation and coherency of the anisotropic image:
![Orientation](images/gst_orientation.jpg)
![Coherency](images/gst_coherency.jpg)
Below you can see the segmentation result:
![Segmentation result](images/gst_result.jpg)
The result has been computed with w = 52, C_Thr = 0.43, LowThr = 35, HighThr = 57. We can see that the algorithm selected only the areas with one single direction.
References
------
- [Structure tensor] - structure tensor description on the wikipedia
<!-- invisible references list -->
[Structure tensor]: https://en.wikipedia.org/wiki/Structure_tensor

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

@ -330,3 +330,13 @@ In this section you will learn about the image processing (manipulation) functio
*Author:* Karpushin Vladislav
You will learn how to recover an image with motion blur distortion using a Wiener filter.
- @subpage tutorial_anisotropic_image_segmentation_by_a_gst
*Languages:* C++
*Compatibility:* \> OpenCV 2.0
*Author:* Karpushin Vladislav
You will learn how to segment an anisotropic image with a single local orientation by a gradient structure tensor.

@ -238,6 +238,14 @@ void Mat::copyTo( OutputArray _dst ) const
{
CV_INSTRUMENT_REGION();
#ifdef HAVE_CUDA
if (_dst.isGpuMat())
{
_dst.getGpuMat().upload(*this);
return;
}
#endif
int dtype = _dst.type();
if( _dst.fixedType() && dtype != type() )
{

@ -1146,6 +1146,10 @@ void _InputArray::copyTo(const _OutputArray& arr) const
}
else if( k == UMAT )
((UMat*)obj)->copyTo(arr);
#ifdef HAVE_CUDA
else if (k == CUDA_GPU_MAT)
((cuda::GpuMat*)obj)->copyTo(arr);
#endif
else
CV_Error(Error::StsNotImplemented, "");
}
@ -1163,6 +1167,10 @@ void _InputArray::copyTo(const _OutputArray& arr, const _InputArray & mask) cons
}
else if( k == UMAT )
((UMat*)obj)->copyTo(arr, mask);
#ifdef HAVE_CUDA
else if (k == CUDA_GPU_MAT)
((cuda::GpuMat*)obj)->copyTo(arr, mask);
#endif
else
CV_Error(Error::StsNotImplemented, "");
}

@ -874,6 +874,14 @@ void UMat::copyTo(OutputArray _dst) const
{
CV_INSTRUMENT_REGION();
#ifdef HAVE_CUDA
if (_dst.isGpuMat())
{
_dst.getGpuMat().upload(*this);
return;
}
#endif
int dtype = _dst.type();
if( _dst.fixedType() && dtype != type() )
{

@ -10,7 +10,7 @@ set(the_description "Deep neural network module. It allows to load models from d
ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX)
ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java js)
ocv_add_module(dnn opencv_core opencv_imgproc WRAP python java js)
ocv_option(OPENCV_DNN_OPENCL "Build with OpenCL support" HAVE_OPENCL AND NOT APPLE)

@ -234,7 +234,9 @@ CV__DNN_INLINE_NS_BEGIN
{
public:
int type;
Size kernel, stride, pad;
Size kernel, stride;
int pad_l, pad_t, pad_r, pad_b;
CV_DEPRECATED Size pad;
bool globalPooling;
bool computeMaxIdx;
String padMode;

@ -836,7 +836,7 @@ CV__DNN_INLINE_NS_BEGIN
* @returns 4-dimensional Mat with NCHW dimensions order.
*/
CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(),
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true,
const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
int ddepth=CV_32F);
/** @brief Creates 4-dimensional blob from image.
@ -845,7 +845,7 @@ CV__DNN_INLINE_NS_BEGIN
*/
CV_EXPORTS void blobFromImage(InputArray image, OutputArray blob, double scalefactor=1.0,
const Size& size = Size(), const Scalar& mean = Scalar(),
bool swapRB=true, bool crop=true, int ddepth=CV_32F);
bool swapRB=false, bool crop=false, int ddepth=CV_32F);
/** @brief Creates 4-dimensional blob from series of images. Optionally resizes and
@ -866,7 +866,7 @@ CV__DNN_INLINE_NS_BEGIN
* @returns 4-dimensional Mat with NCHW dimensions order.
*/
CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0,
Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true,
Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
int ddepth=CV_32F);
/** @brief Creates 4-dimensional blob from series of images.
@ -875,7 +875,7 @@ CV__DNN_INLINE_NS_BEGIN
*/
CV_EXPORTS void blobFromImages(InputArrayOfArrays images, OutputArray blob,
double scalefactor=1.0, Size size = Size(),
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true,
const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false,
int ddepth=CV_32F);
/** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure

@ -6,7 +6,7 @@
#define OPENCV_DNN_VERSION_HPP
/// Use with major OpenCV version only.
#define OPENCV_DNN_API_VERSION 20180903
#define OPENCV_DNN_API_VERSION 20180917
#if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_INLINE_NS
#define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)

@ -64,10 +64,17 @@ public:
BaseConvolutionLayerImpl(const LayerParams &params)
{
setParamsFrom(params);
getConvolutionKernelParams(params, kernel.height, kernel.width, pad.height,
pad.width, stride.height, stride.width, dilation.height,
int pad_t = 0, pad_l = 0, pad_r = 0, pad_b = 0;
getConvolutionKernelParams(params, kernel.height, kernel.width, pad_t,
pad_l, pad_b, pad_r, stride.height, stride.width, dilation.height,
dilation.width, padMode);
if (pad_t != pad_b || pad_l != pad_r)
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
pad.width = pad_l;
pad.height = pad_t;
numOutput = params.get<int>("num_output");
int ngroups = params.get<int>("group", 1);
@ -100,8 +107,18 @@ public:
}
Size outSize = Size(outputs[0].size[3], outputs[0].size[2]);
int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width;
getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize,
kernel, stride, padMode, dilation, pad);
kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r);
if (pad_t != pad_b || pad_l != pad_r)
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
pad.width = pad_l;
pad.height = pad_t;
}
bool hasBias() const
@ -1156,9 +1173,17 @@ public:
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
int pad_t = pad.height, pad_l = pad.width, pad_b = pad.height, pad_r = pad.width;
getConvPoolPaddings(Size(outputs[0].size[3], outputs[0].size[2]),
Size(inputs[0].size[3], inputs[0].size[2]),
kernel, stride, padMode, dilation, pad);
kernel, stride, padMode, dilation, pad_t, pad_l, pad_b, pad_r);
if (pad_t != pad_b || pad_l != pad_r)
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer");
pad.width = pad_l;
pad.height = pad_t;
}
class MatMulInvoker : public ParallelLoopBody

@ -118,9 +118,19 @@ void getKernelSize(const LayerParams &params, int &kernelH, int &kernelW)
CV_Assert(kernelH > 0 && kernelW > 0);
}
void getStrideAndPadding(const LayerParams &params, int &padH, int &padW, int &strideH, int &strideW, cv::String& padMode)
void getStrideAndPadding(const LayerParams &params, int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode)
{
util::getParameter(params, "pad", "pad", padH, padW, true, 0);
if (params.has("pad_l") && params.has("pad_t") && params.has("pad_r") && params.has("pad_b")) {
padT = params.get<int>("pad_t");
padL = params.get<int>("pad_l");
padB = params.get<int>("pad_b");
padR = params.get<int>("pad_r");
}
else {
util::getParameter(params, "pad", "pad", padT, padL, true, 0);
padB = padT;
padR = padL;
}
util::getParameter(params, "stride", "stride", strideH, strideW, true, 1);
padMode = "";
@ -129,15 +139,15 @@ void getStrideAndPadding(const LayerParams &params, int &padH, int &padW, int &s
padMode = params.get<String>("pad_mode");
}
CV_Assert(padH >= 0 && padW >= 0 && strideH > 0 && strideW > 0);
CV_Assert(padT >= 0 && padL >= 0 && padB >= 0 && padR >= 0 && strideH > 0 && strideW > 0);
}
}
void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling,
int &padH, int &padW, int &strideH, int &strideW, cv::String &padMode)
int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String &padMode)
{
util::getStrideAndPadding(params, padH, padW, strideH, strideW, padMode);
util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode);
globalPooling = params.has("global_pooling") &&
params.get<bool>("global_pooling");
@ -148,9 +158,9 @@ void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernel
{
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified");
}
if(padH != 0 || padW != 0 || strideH != 1 || strideW != 1)
if(padT != 0 || padL != 0 || padB != 0 || padR != 0 || strideH != 1 || strideW != 1)
{
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pad_h and pad_w must be = 0, and stride_h and stride_w must be = 1");
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0, and stride_h and stride_w must be = 1");
}
}
else
@ -159,12 +169,11 @@ void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernel
}
}
void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW,
void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR,
int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String &padMode)
{
util::getKernelSize(params, kernelH, kernelW);
util::getStrideAndPadding(params, padH, padW, strideH, strideW, padMode);
util::getStrideAndPadding(params, padT, padL, padB, padR, strideH, strideW, padMode);
util::getParameter(params, "dilation", "dilation", dilationH, dilationW, true, 1);
CV_Assert(dilationH > 0 && dilationW > 0);
@ -201,11 +210,11 @@ void getConvPoolOutParams(const Size& inp, const Size &kernel,
void getConvPoolPaddings(const Size& inp, const Size& out,
const Size &kernel, const Size &stride,
const String &padMode, const Size &dilation, Size &pad)
const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR)
{
if (padMode == "VALID")
{
pad = cv::Size(0,0);
padT = padL = padB = padR = 0;
}
else if (padMode == "SAME")
{
@ -213,7 +222,8 @@ void getConvPoolPaddings(const Size& inp, const Size& out,
int Pw = std::max(0, (out.width - 1) * stride.width + (dilation.width * (kernel.width - 1) + 1) - inp.width);
// For odd values of total padding, add more padding at the 'right'
// side of the given dimension.
pad = cv::Size(Pw / 2, Ph / 2);
padT= padB = Ph / 2;
padL = padR = Pw / 2;
}
}

@ -60,19 +60,20 @@ namespace cv
namespace dnn
{
void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW,
void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &kernelW, int &padT, int &padL, int &padB, int &padR,
int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String& padMode);
void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling,
int &padH, int &padW, int &strideH, int &strideW, cv::String& padMode);
int &padT, int &padL, int &padB, int &padR, int &strideH, int &strideW, cv::String& padMode);
void getConvPoolOutParams(const Size& inp, const Size &kernel,
const Size &stride, const String &padMode,
const Size &dilation, Size& out);
void getConvPoolPaddings(const Size& inp, const Size& out,
const Size &kernel, const Size &stride,
const String &padMode, const Size &dilation, Size &pad);
const String &padMode, const Size &dilation, int &padT, int &padL, int &padB, int &padR);
}
}

@ -85,8 +85,12 @@ public:
type = STOCHASTIC;
else
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
pad.height, pad.width, stride.height, stride.width, padMode);
pad_t, pad_l, pad_b, pad_r, stride.height, stride.width, padMode);
pad.width = pad_l;
pad.height = pad_t;
}
else if (params.has("pooled_w") || params.has("pooled_h"))
{
@ -130,7 +134,9 @@ public:
kernel = inp;
}
getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad);
getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad_t, pad_l, pad_b, pad_r);
pad.width = pad_l;
pad.height = pad_t;
#ifdef HAVE_OPENCL
poolOp.release();
@ -149,7 +155,7 @@ public:
else
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_HALIDE && haveHalide() &&
(type == MAX || type == AVE && !pad.width && !pad.height);
(type == MAX || type == AVE && !pad_t && !pad_l && !pad_b && !pad_r);
}
#ifdef HAVE_OPENCL
@ -169,7 +175,10 @@ public:
config.in_shape = shape(inputs[0]);
config.out_shape = shape(outputs[0]);
config.kernel = kernel;
config.pad = pad;
config.pad_l = pad_l;
config.pad_t = pad_t;
config.pad_r = pad_r;
config.pad_b = pad_b;
config.stride = stride;
config.channels = inputs[0].size[1];
config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :
@ -193,7 +202,6 @@ public:
if (!poolOp->Forward(inpMat, outMat, maskMat))
return false;
}
return true;
}
#endif
@ -264,8 +272,10 @@ public:
poolLayer->_kernel_y = kernel.height;
poolLayer->_stride_x = stride.width;
poolLayer->_stride_y = stride.height;
poolLayer->_padding_x = pad.width;
poolLayer->_padding_y = pad.height;
poolLayer->_padding_x = pad_l;
poolLayer->_padding_y = pad_t;
poolLayer->params["pad-r"] = format("%d", pad_r);
poolLayer->params["pad-b"] = format("%d", pad_b);
poolLayer->_exclude_pad = type == AVE && padMode == "SAME";
poolLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor";
poolLayer->_type = type == MAX ? InferenceEngine::PoolingLayer::PoolType::MAX :
@ -296,12 +306,14 @@ public:
return Ptr<BackendNode>();
}
class PoolingInvoker : public ParallelLoopBody
{
public:
const Mat* src, *rois;
Mat *dst, *mask;
Size kernel, stride, pad;
Size kernel, stride;
int pad_l, pad_t, pad_r, pad_b;
bool avePoolPaddedArea;
int nstripes;
bool computeMaxIdx;
@ -313,7 +325,7 @@ public:
computeMaxIdx(0), poolingType(MAX), spatialScale(0) {}
static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel,
Size stride, Size pad, bool avePoolPaddedArea, int poolingType, float spatialScale,
Size stride, int pad_l, int pad_t, int pad_r, int pad_b, bool avePoolPaddedArea, int poolingType, float spatialScale,
bool computeMaxIdx, int nstripes)
{
CV_Assert_N(
@ -332,7 +344,10 @@ public:
p.mask = &mask;
p.kernel = kernel;
p.stride = stride;
p.pad = pad;
p.pad_l = pad_l;
p.pad_t = pad_t;
p.pad_r = pad_r;
p.pad_b = pad_b;
p.avePoolPaddedArea = avePoolPaddedArea;
p.nstripes = nstripes;
p.computeMaxIdx = computeMaxIdx;
@ -359,7 +374,6 @@ public:
size_t stripeStart = r.start*stripeSize;
size_t stripeEnd = std::min(r.end*stripeSize, total);
int kernel_w = kernel.width, kernel_h = kernel.height;
int pad_w = pad.width, pad_h = pad.height;
int stride_w = stride.width, stride_h = stride.height;
bool compMaxIdx = computeMaxIdx;
@ -411,8 +425,8 @@ public:
}
else
{
ystart = y0 * stride_h - pad_h;
yend = min(ystart + kernel_h, inp_height + pad_h);
ystart = y0 * stride_h - pad_t;
yend = min(ystart + kernel_h, inp_height + pad_b);
srcData = src->ptr<float>(n, c);
}
int ydelta = yend - ystart;
@ -428,7 +442,7 @@ public:
if( poolingType == MAX)
for( ; x0 < x1; x0++ )
{
int xstart = x0 * stride_w - pad_w;
int xstart = x0 * stride_w - pad_l;
int xend = min(xstart + kernel_w, inp_width);
xstart = max(xstart, 0);
if (xstart >= xend || ystart >= yend)
@ -439,7 +453,7 @@ public:
continue;
}
#if CV_SIMD128
if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_w + kernel_w < inp_width )
if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width )
{
if( compMaxIdx )
{
@ -578,15 +592,15 @@ public:
{
for( ; x0 < x1; x0++ )
{
int xstart = x0 * stride_w - pad_w;
int xend = min(xstart + kernel_w, inp_width + pad_w);
int xstart = x0 * stride_w - pad_l;
int xend = min(xstart + kernel_w, inp_width + pad_r);
int xdelta = xend - xstart;
xstart = max(xstart, 0);
xend = min(xend, inp_width);
float inv_kernel_area = avePoolPaddedArea ? xdelta * ydelta : ((yend - ystart) * (xend - xstart));
inv_kernel_area = 1.0 / inv_kernel_area;
#if CV_SIMD128
if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_w + kernel_w < inp_width )
if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_l + kernel_w < inp_width )
{
v_float32x4 sum_val0 = v_setzero_f32(), sum_val1 = v_setzero_f32();
v_float32x4 ikarea = v_setall_f32(inv_kernel_area);
@ -695,21 +709,21 @@ public:
{
const int nstripes = getNumThreads();
Mat rois;
PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
}
void avePooling(Mat &src, Mat &dst)
{
const int nstripes = getNumThreads();
Mat rois, mask;
PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
}
void roiPooling(const Mat &src, const Mat &rois, Mat &dst)
{
const int nstripes = getNumThreads();
Mat mask;
PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad_l, pad_t, pad_r, pad_b, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes);
}
virtual Ptr<BackendNode> initMaxPoolingHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
@ -723,10 +737,10 @@ public:
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::RDom r(0, kernel.width, 0, kernel.height);
Halide::Expr kx, ky;
if (pad.width || pad.height)
if(pad_l || pad_t)
{
kx = clamp(x * stride.width + r.x - pad.width, 0, inWidth - 1);
ky = clamp(y * stride.height + r.y - pad.height, 0, inHeight - 1);
kx = clamp(x * stride.width + r.x - pad_l, 0, inWidth - 1);
ky = clamp(y * stride.height + r.y - pad_t, 0, inHeight - 1);
}
else
{
@ -739,11 +753,11 @@ public:
// Compute offset from argmax in range [0, kernel_size).
Halide::Expr max_index;
if (pad.width || pad.height)
if(pad_l || pad_t)
{
max_index = clamp(y * stride.height + res[1] - pad.height,
max_index = clamp(y * stride.height + res[1] - pad_t,
0, inHeight - 1) * inWidth +
clamp(x * stride.width + res[0] - pad.width,
clamp(x * stride.width + res[0] - pad_l,
0, inWidth - 1);
}
else
@ -852,21 +866,21 @@ public:
}
else if (padMode.empty())
{
float height = (float)(in.height + 2 * pad.height - kernel.height) / stride.height;
float width = (float)(in.width + 2 * pad.width - kernel.width) / stride.width;
float height = (float)(in.height + pad_t + pad_b - kernel.height) / stride.height;
float width = (float)(in.width + pad_l + pad_r - kernel.width) / stride.width;
out.height = 1 + (ceilMode ? ceil(height) : floor(height));
out.width = 1 + (ceilMode ? ceil(width) : floor(width));
if (pad.height || pad.width)
if (pad_r || pad_b)
{
// If we have padding, ensure that the last pooling starts strictly
// inside the image (instead of at the padding); otherwise clip the last.
if ((out.height - 1) * stride.height >= in.height + pad.height)
if ((out.height - 1) * stride.height >= in.height + pad_b)
--out.height;
if ((out.width - 1) * stride.width >= in.width + pad.width)
if ((out.width - 1) * stride.width >= in.width + pad_r)
--out.width;
CV_Assert((out.height - 1) * stride.height < in.height + pad.height);
CV_Assert((out.width - 1) * stride.width < in.width + pad.width);
CV_Assert((out.height - 1) * stride.height < in.height + pad_b);
CV_Assert((out.width - 1) * stride.width < in.width + pad_r);
}
}
else
@ -888,6 +902,7 @@ public:
dims[1] = psRoiOutChannels;
}
outputs.assign(type == MAX ? 2 : 1, shape(dims, 4));
return false;
}

@ -345,7 +345,7 @@ struct OCL4DNNPoolConfig
{
OCL4DNNPoolConfig() :
kernel(1, 1),
pad(0, 0),
pad_l(0), pad_t(0), pad_r(0), pad_b(0),
stride(1, 1),
dilation(1, 1),
channels(0),
@ -358,7 +358,7 @@ struct OCL4DNNPoolConfig
MatShape in_shape;
MatShape out_shape;
Size kernel;
Size pad;
int pad_l, pad_t, pad_r, pad_b;
Size stride;
Size dilation;
@ -381,7 +381,6 @@ class OCL4DNNPool
UMat& top_mask);
private:
// Pooling parameters
std::vector<int32_t> pad_;
std::vector<int32_t> stride_;
std::vector<int32_t> kernel_shape_;
std::vector<int32_t> im_in_shape_;
@ -394,8 +393,10 @@ class OCL4DNNPool
int32_t kernel_w_;
int32_t stride_h_;
int32_t stride_w_;
int32_t pad_h_;
int32_t pad_w_;
int32_t pad_t_;
int32_t pad_l_;
int32_t pad_b_;
int32_t pad_r_;
int32_t height_;
int32_t width_;
int32_t pooled_height_;

@ -62,7 +62,6 @@ OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
for (int i = 0; i < spatial_dims; ++i)
{
kernel_shape_.push_back(i == 0 ? config.kernel.height : config.kernel.width);
pad_.push_back(i == 0 ? config.pad.height : config.pad.width);
stride_.push_back(i == 0 ? config.stride.height : config.stride.width);
im_in_shape_.push_back(config.in_shape[dims - spatial_dims + i]);
im_out_shape_.push_back(config.out_shape[dims - spatial_dims + i]);
@ -72,8 +71,10 @@ OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
kernel_w_ = kernel_shape_[1];
stride_h_ = stride_[0];
stride_w_ = stride_[1];
pad_h_ = pad_[0];
pad_w_ = pad_[1];
pad_t_ = config.pad_t;
pad_l_ = config.pad_l;
pad_r_ = config.pad_r;
pad_b_ = config.pad_b;
height_ = im_in_shape_[0];
width_ = im_in_shape_[1];
pooled_height_ = im_out_shape_[0];
@ -113,14 +114,13 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
ocl::dnn::ocl4dnn_pooling_oclsrc,
format(" -D Dtype=%s -D KERNEL_MAX_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
" -D STRIDE_W=%d -D STRIDE_H=%d"
" -D PAD_W=%d -D PAD_H=%d%s",
" -D PAD_L=%d -D PAD_T=%d -D PAD_R=%d -D PAD_B=%d%s",
(use_half) ? "half" : "float",
kernel_w_, kernel_h_,
stride_w_, stride_h_,
pad_w_, pad_h_,
pad_l_, pad_t_, pad_r_, pad_b_,
computeMaxIdx ? " -D HAVE_MASK=1" : ""
));
if (oclk_max_pool_forward.empty())
return false;
@ -150,11 +150,11 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
ocl::dnn::ocl4dnn_pooling_oclsrc,
format(" -D Dtype=%s -D KERNEL_AVE_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
" -D STRIDE_W=%d -D STRIDE_H=%d"
" -D PAD_W=%d -D PAD_H=%d%s",
" -D PAD_L=%d -D PAD_T=%d -D PAD_R=%d -D PAD_B=%d%s",
(use_half) ? "half" : "float",
kernel_w_, kernel_h_,
stride_w_, stride_h_,
pad_w_, pad_h_,
pad_l_, pad_t_, pad_r_, pad_b_,
avePoolPaddedArea ? " -D AVE_POOL_PADDING_AREA" : ""
));

@ -174,9 +174,8 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot
else if(attribute_name == "pads")
{
CV_Assert(attribute_proto.ints_size() == 4);
lp.set("pad_h", saturate_cast<int32_t>(attribute_proto.ints(0)));
lp.set("pad_w", saturate_cast<int32_t>(attribute_proto.ints(1)));
// push pad_b and pad_r for compute ceil_mode
lp.set("pad_t", saturate_cast<int32_t>(attribute_proto.ints(0)));
lp.set("pad_l", saturate_cast<int32_t>(attribute_proto.ints(1)));
lp.set("pad_b", saturate_cast<int32_t>(attribute_proto.ints(2)));
lp.set("pad_r", saturate_cast<int32_t>(attribute_proto.ints(3)));
}
@ -306,6 +305,7 @@ void ONNXImporter::populateNet(Net dstNet)
std::string layer_type = node_proto.op_type();
layerParams.type = layer_type;
if (layer_type == "MaxPool")
{
layerParams.type = "Pooling";
@ -551,7 +551,6 @@ void ONNXImporter::populateNet(Net dstNet)
for (int j = 0; j < node_proto.input_size(); j++) {
layerId = layer_id.find(node_proto.input(j));
if (layerId != layer_id.end()) {
dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j);
}

@ -73,8 +73,8 @@ __kernel void
const int xx = index / pooled_width;
const int ph = xx % pooled_height;
const int ch = xx / pooled_height;
int hstart = ph * STRIDE_H - PAD_H;
int wstart = pw * STRIDE_W - PAD_W;
int hstart = ph * STRIDE_H - PAD_T;
int wstart = pw * STRIDE_W - PAD_L;
Dtype maxval = -FLT_MAX;
int maxidx = -1;
int in_offset = ch * height * width;
@ -117,10 +117,10 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)(
const int xx = index / pooled_width;
const int ph = xx % pooled_height;
const int ch = xx / pooled_height;
int hstart = ph * STRIDE_H - PAD_H;
int wstart = pw * STRIDE_W - PAD_W;
int hend = min(hstart + KERNEL_H, height + PAD_H);
int wend = min(wstart + KERNEL_W, width + PAD_W);
int hstart = ph * STRIDE_H - PAD_T;
int wstart = pw * STRIDE_W - PAD_L;
int hend = min(hstart + KERNEL_H, height + PAD_B);
int wend = min(wstart + KERNEL_W, width + PAD_R);
int pool_size;
#ifdef AVE_POOL_PADDING_AREA
pool_size = (hend - hstart) * (wend - wstart);

@ -27,7 +27,7 @@
__kernel void MaxPoolForward(const int nthreads,
__global T* bottom_data, const int num, const int channels, const int height, const int width,
const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w,
const int stride_h, const int stride_w, const int pad_h, const int pad_w,
const int stride_h, const int stride_w, const int pad_t, const int pad_l, const int pad_b, const int pad_r,
__global T* top_data
#ifdef MASK
, __global float* mask
@ -41,8 +41,8 @@ __kernel void MaxPoolForward(const int nthreads,
int ph = (index / pooled_width) % pooled_height;
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels;
int hstart = ph * stride_h - pad_h;
int wstart = pw * stride_w - pad_w;
int hstart = ph * stride_h - pad_t;
int wstart = pw * stride_w - pad_l;
const int hend = min(hstart + kernel_h, height);
const int wend = min(wstart + kernel_w, width);
hstart = max(hstart, 0);
@ -71,7 +71,7 @@ __kernel void MaxPoolForward(const int nthreads,
__kernel void AvePoolForward(const int nthreads,
__global T* bottom_data, const int num, const int channels, const int height, const int width,
const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w,
const int stride_h, const int stride_w, const int pad_h, const int pad_w,
const int stride_h, const int stride_w, const int pad_t, const int pad_l, const int pad_b, const int pad_r,
__global T* top_data
#ifdef MASK
, __global float* mask // NOT USED
@ -84,9 +84,9 @@ __kernel void AvePoolForward(const int nthreads,
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels; int hstart = ph * stride_h - pad_h; int wstart = pw * stride_w - pad_w;
int hend = min(hstart + kernel_h, height + pad_h);
int wend = min(wstart + kernel_w, width + pad_w);
int n = index / pooled_width / pooled_height / channels; int hstart = ph * stride_h - pad_t; int wstart = pw * stride_w - pad_l;
int hend = min(hstart + kernel_h, height + pad_b);
int wend = min(wstart + kernel_w, width + pad_r);
const int pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, 0);
wstart = max(wstart, 0);

@ -74,6 +74,18 @@ enum LuaType
LEGACY_TYPE_RECUR_FUNCTION = 7
};
// We use OpenCV's types to manage CV_ELEM_SIZE.
enum TorchType
{
TYPE_DOUBLE = CV_64F,
TYPE_FLOAT = CV_32F,
TYPE_BYTE = CV_8U,
TYPE_CHAR = CV_8S,
TYPE_SHORT = CV_16S,
TYPE_INT = CV_32S,
TYPE_LONG = CV_32SC2
};
template<typename T>
static String toString(const T &v)
{
@ -203,19 +215,19 @@ struct TorchImporter
String typeStr = str.substr(strlen(prefix), str.length() - strlen(prefix) - strlen(suffix));
if (typeStr == "Double")
return CV_64F;
return TYPE_DOUBLE;
else if (typeStr == "Float" || typeStr == "Cuda")
return CV_32F;
return TYPE_FLOAT;
else if (typeStr == "Byte")
return CV_8U;
return TYPE_BYTE;
else if (typeStr == "Char")
return CV_8S;
return TYPE_CHAR;
else if (typeStr == "Short")
return CV_16S;
return TYPE_SHORT;
else if (typeStr == "Int")
return CV_32S;
else if (typeStr == "Long") //Carefully! CV_64S type coded as CV_USRTYPE1
return CV_USRTYPE1;
return TYPE_INT;
else if (typeStr == "Long")
return TYPE_LONG;
else
CV_Error(Error::StsNotImplemented, "Unknown type \"" + typeStr + "\" of torch class \"" + str + "\"");
}
@ -236,36 +248,44 @@ struct TorchImporter
void readTorchStorage(int index, int type = -1)
{
long size = readLong();
Mat storageMat(1, size, (type != CV_USRTYPE1) ? type : CV_64F); //handle LongStorage as CV_64F Mat
Mat storageMat;
switch (type)
{
case CV_32F:
case TYPE_FLOAT:
storageMat.create(1, size, CV_32F);
THFile_readFloatRaw(file, (float*)storageMat.data, size);
break;
case CV_64F:
case TYPE_DOUBLE:
storageMat.create(1, size, CV_64F);
THFile_readDoubleRaw(file, (double*)storageMat.data, size);
break;
case CV_8S:
case CV_8U:
case TYPE_CHAR:
storageMat.create(1, size, CV_8S);
THFile_readByteRaw(file, (uchar*)storageMat.data, size);
break;
case CV_16S:
case CV_16U:
case TYPE_BYTE:
storageMat.create(1, size, CV_8U);
THFile_readByteRaw(file, (uchar*)storageMat.data, size);
break;
case TYPE_SHORT:
storageMat.create(1, size, CV_16S);
THFile_readShortRaw(file, (short*)storageMat.data, size);
break;
case CV_32S:
case TYPE_INT:
storageMat.create(1, size, CV_32S);
THFile_readIntRaw(file, (int*)storageMat.data, size);
break;
case CV_USRTYPE1:
case TYPE_LONG:
{
storageMat.create(1, size, CV_64F); //handle LongStorage as CV_64F Mat
double *buf = storageMat.ptr<double>();
THFile_readLongRaw(file, (int64*)buf, size);
for (size_t i = (size_t)size; i-- > 0; )
buf[i] = ((int64*)buf)[i];
}
break;
}
default:
CV_Error(Error::StsInternal, "");
break;

@ -307,7 +307,7 @@ TEST_P(Reproducibility_SqueezeNet_v1_1, Accuracy)
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(targetId);
Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(227,227), Scalar(), false);
Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(227,227), Scalar(), false, true);
ASSERT_TRUE(!input.empty());
Mat out;
@ -403,7 +403,7 @@ TEST_P(Test_Caffe_nets, DenseNet_121)
const string model = findDataFile("dnn/DenseNet_121.caffemodel", false);
Mat inp = imread(_tf("dog416.png"));
inp = blobFromImage(inp, 1.0 / 255, Size(224, 224));
inp = blobFromImage(inp, 1.0 / 255, Size(224, 224), Scalar(), true, true);
Mat ref = blobFromNPY(_tf("densenet_121_output.npy"));
Net net = readNetFromCaffe(proto, model);

@ -346,6 +346,10 @@ TEST_P(Test_ONNX_nets, DenseNet121)
testONNXModels("densenet121", pb, l1, lInf);
}
TEST_P(Test_ONNX_nets, Inception_v1)
{
testONNXModels("inception_v1", pb);
}
INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets());

@ -62,8 +62,7 @@ TEST(Test_TensorFlow, inception_accuracy)
Mat sample = imread(_tf("grace_hopper_227.png"));
ASSERT_TRUE(!sample.empty());
resize(sample, sample, Size(224, 224));
Mat inputBlob = blobFromImage(sample);
Mat inputBlob = blobFromImage(sample, 1.0, Size(224, 224), Scalar(), /*swapRB*/true);
net.setInput(inputBlob, "input");
Mat out = net.forward("softmax2");

@ -278,7 +278,7 @@ TEST_P(Test_Torch_nets, OpenFace_accuracy)
sampleF32 /= 255;
resize(sampleF32, sampleF32, Size(96, 96), 0, 0, INTER_NEAREST);
Mat inputBlob = blobFromImage(sampleF32);
Mat inputBlob = blobFromImage(sampleF32, 1.0, Size(), Scalar(), /*swapRB*/true);
net.setInput(inputBlob);
Mat out = net.forward();
@ -305,7 +305,7 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
net.setPreferableTarget(target);
Mat sample = imread(_tf("street.png", false));
Mat inputBlob = blobFromImage(sample, 1./255);
Mat inputBlob = blobFromImage(sample, 1./255, Size(), Scalar(), /*swapRB*/true);
net.setInput(inputBlob, "");
Mat out = net.forward();

@ -1987,10 +1987,10 @@ transform.
@param image 8-bit, single-channel binary source image. The image may be modified by the function.
@param lines Output vector of lines. Each line is represented by a 2 or 3 element vector
\f$(\rho, \theta)\f$ or \f$(\rho, \theta, \votes)\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of
\f$(\rho, \theta)\f$ or \f$(\rho, \theta, \textrm{votes})\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of
the image). \f$\theta\f$ is the line rotation angle in radians (
\f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ).
\f$\votes\f$ is the value of accumulator.
\f$\textrm{votes}\f$ is the value of accumulator.
@param rho Distance resolution of the accumulator in pixels.
@param theta Angle resolution of the accumulator in radians.
@param threshold Accumulator threshold parameter. Only those lines are returned that get enough

@ -0,0 +1,182 @@
#!/usr/bin/env python
import os
import cv2 as cv
import numpy as np
from tests_common import NewOpenCVTests, unittest
def normAssert(test, a, b, lInf=1e-5):
test.assertLess(np.max(np.abs(a - b)), lInf)
def inter_area(box1, box2):
x_min, x_max = max(box1[0], box2[0]), min(box1[2], box2[2])
y_min, y_max = max(box1[1], box2[1]), min(box1[3], box2[3])
return (x_max - x_min) * (y_max - y_min)
def area(box):
return (box[2] - box[0]) * (box[3] - box[1])
def box2str(box):
left, top = box[0], box[1]
width, height = box[2] - left, box[3] - top
return '[%f x %f from (%f, %f)]' % (width, height, left, top)
def normAssertDetections(test, ref, out, confThreshold=0.0, scores_diff=1e-5, boxes_iou_diff=1e-4):
ref = np.array(ref, np.float32)
refClassIds, testClassIds = ref[:, 1], out[:, 1]
refScores, testScores = ref[:, 2], out[:, 2]
refBoxes, testBoxes = ref[:, 3:], out[:, 3:]
matchedRefBoxes = [False] * len(refBoxes)
errMsg = ''
for i in range(len(refBoxes)):
testScore = testScores[i]
if testScore < confThreshold:
continue
testClassId, testBox = testClassIds[i], testBoxes[i]
matched = False
for j in range(len(refBoxes)):
if (not matchedRefBoxes[j]) and testClassId == refClassIds[j] and \
abs(testScore - refScores[j]) < scores_diff:
interArea = inter_area(testBox, refBoxes[j])
iou = interArea / (area(testBox) + area(refBoxes[j]) - interArea)
if abs(iou - 1.0) < boxes_iou_diff:
matched = True
matchedRefBoxes[j] = True
if not matched:
errMsg += '\nUnmatched prediction: class %d score %f box %s' % (testClassId, testScore, box2str(testBox))
for i in range(len(refBoxes)):
if (not matchedRefBoxes[i]) and refScores[i] > confThreshold:
errMsg += '\nUnmatched reference: class %d score %f box %s' % (refClassIds[i], refScores[i], box2str(refBoxes[i]))
if errMsg:
test.fail(errMsg)
# Returns a simple one-layer network created from Caffe's format
def getSimpleNet():
prototxt = """
name: "simpleNet"
input: "data"
layer {
type: "Identity"
name: "testLayer"
top: "testLayer"
bottom: "data"
}
"""
return cv.dnn.readNetFromCaffe(bytearray(prototxt, 'utf8'))
def testBackendAndTarget(backend, target):
net = getSimpleNet()
net.setPreferableBackend(backend)
net.setPreferableTarget(target)
inp = np.random.standard_normal([1, 2, 3, 4]).astype(np.float32)
try:
net.setInput(inp)
net.forward()
except BaseException as e:
return False
return True
haveInfEngine = testBackendAndTarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU)
dnnBackendsAndTargets = [
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
]
if haveInfEngine:
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU])
if testBackendAndTarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD):
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD])
if cv.ocl.haveOpenCL() and cv.ocl.useOpenCL():
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL])
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL_FP16])
if haveInfEngine: # FIXIT Check Intel iGPU only
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL])
dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16])
def printParams(backend, target):
backendNames = {
cv.dnn.DNN_BACKEND_OPENCV: 'OCV',
cv.dnn.DNN_BACKEND_INFERENCE_ENGINE: 'DLIE'
}
targetNames = {
cv.dnn.DNN_TARGET_CPU: 'CPU',
cv.dnn.DNN_TARGET_OPENCL: 'OCL',
cv.dnn.DNN_TARGET_OPENCL_FP16: 'OCL_FP16',
cv.dnn.DNN_TARGET_MYRIAD: 'MYRIAD'
}
print('%s/%s' % (backendNames[backend], targetNames[target]))
class dnn_test(NewOpenCVTests):
def find_dnn_file(self, filename, required=True):
return self.find_file(filename, [os.environ.get('OPENCV_DNN_TEST_DATA_PATH', os.getcwd())], required=required)
def test_blobFromImage(self):
np.random.seed(324)
width = 6
height = 7
scale = 1.0/127.5
mean = (10, 20, 30)
# Test arguments names.
img = np.random.randint(0, 255, [4, 5, 3]).astype(np.uint8)
blob = cv.dnn.blobFromImage(img, scale, (width, height), mean, True, False)
blob_args = cv.dnn.blobFromImage(img, scalefactor=scale, size=(width, height),
mean=mean, swapRB=True, crop=False)
normAssert(self, blob, blob_args)
# Test values.
target = cv.resize(img, (width, height), interpolation=cv.INTER_LINEAR)
target = target.astype(np.float32)
target = target[:,:,[2, 1, 0]] # BGR2RGB
target[:,:,0] -= mean[0]
target[:,:,1] -= mean[1]
target[:,:,2] -= mean[2]
target *= scale
target = target.transpose(2, 0, 1).reshape(1, 3, height, width) # to NCHW
normAssert(self, blob, target)
def test_face_detection(self):
testdata_required = bool(os.environ.get('OPENCV_DNN_TEST_REQUIRE_TESTDATA', False))
proto = self.find_dnn_file('dnn/opencv_face_detector.prototxt2', required=testdata_required)
model = self.find_dnn_file('dnn/opencv_face_detector.caffemodel', required=testdata_required)
if proto is None or model is None:
raise unittest.SkipTest("Missing DNN test files (dnn/opencv_face_detector.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
img = self.get_sample('gpu/lbpcascade/er.png')
blob = cv.dnn.blobFromImage(img, mean=(104, 177, 123), swapRB=False, crop=False)
ref = [[0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631],
[0, 1, 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168],
[0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290],
[0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477],
[0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494],
[0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801]]
print('\n')
for backend, target in dnnBackendsAndTargets:
printParams(backend, target)
net = cv.dnn.readNet(proto, model)
net.setPreferableBackend(backend)
net.setPreferableTarget(target)
net.setInput(blob)
out = net.forward().reshape(-1, 7)
scoresDiff = 4e-3 if target in [cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD] else 1e-5
iouDiff = 2e-2 if target in [cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD] else 1e-4
normAssertDetections(self, ref, out, 0.5, scoresDiff, iouDiff)
if __name__ == '__main__':
NewOpenCVTests.bootstrap()

@ -26,23 +26,25 @@ class NewOpenCVTests(unittest.TestCase):
# github repository url
repoUrl = 'https://raw.github.com/opencv/opencv/master'
def find_file(self, filename, searchPaths=[], required=True):
searchPaths = searchPaths if searchPaths else [self.repoPath, self.extraTestDataPath]
for path in searchPaths:
if path is not None:
candidate = path + '/' + filename
if os.path.isfile(candidate):
return candidate
if required:
self.fail('File ' + filename + ' not found')
return None
def get_sample(self, filename, iscolor = None):
if iscolor is None:
iscolor = cv.IMREAD_COLOR
if not filename in self.image_cache:
filedata = None
if NewOpenCVTests.repoPath is not None:
candidate = NewOpenCVTests.repoPath + '/' + filename
if os.path.isfile(candidate):
with open(candidate, 'rb') as f:
filedata = f.read()
if NewOpenCVTests.extraTestDataPath is not None:
candidate = NewOpenCVTests.extraTestDataPath + '/' + filename
if os.path.isfile(candidate):
with open(candidate, 'rb') as f:
filepath = self.find_file(filename)
with open(filepath, 'rb') as f:
filedata = f.read()
if filedata is None:
return None#filedata = urlopen(NewOpenCVTests.repoUrl + '/' + filename).read()
self.image_cache[filename] = cv.imdecode(np.fromstring(filedata, dtype=np.uint8), iscolor)
return self.image_cache[filename]

@ -58,6 +58,10 @@
# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
#ifndef CV_UNUSED // Required for standalone compilation mode (OpenCV defines this in base.hpp)
#define CV_UNUSED(name) (void)name
#endif
#ifdef __cplusplus
extern "C" {
#endif

@ -86,29 +86,13 @@ public class MainActivity extends AppCompatActivity implements CvCameraViewListe
// Forward image through network.
Mat blob = Dnn.blobFromImage(frame, IN_SCALE_FACTOR,
new Size(IN_WIDTH, IN_HEIGHT),
new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), false);
new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), /*swapRB*/false, /*crop*/false);
net.setInput(blob);
Mat detections = net.forward();
int cols = frame.cols();
int rows = frame.rows();
Size cropSize;
if ((float)cols / rows > WH_RATIO) {
cropSize = new Size(rows * WH_RATIO, rows);
} else {
cropSize = new Size(cols, cols / WH_RATIO);
}
int y1 = (int)(rows - cropSize.height) / 2;
int y2 = (int)(y1 + cropSize.height);
int x1 = (int)(cols - cropSize.width) / 2;
int x2 = (int)(x1 + cropSize.width);
Mat subFrame = frame.submat(y1, y2, x1, x2);
cols = subFrame.cols();
rows = subFrame.rows();
detections = detections.reshape(1, (int)detections.total() / 7);
for (int i = 0; i < detections.rows(); ++i) {
@ -116,26 +100,24 @@ public class MainActivity extends AppCompatActivity implements CvCameraViewListe
if (confidence > THRESHOLD) {
int classId = (int)detections.get(i, 1)[0];
int xLeftBottom = (int)(detections.get(i, 3)[0] * cols);
int yLeftBottom = (int)(detections.get(i, 4)[0] * rows);
int xRightTop = (int)(detections.get(i, 5)[0] * cols);
int yRightTop = (int)(detections.get(i, 6)[0] * rows);
int left = (int)(detections.get(i, 3)[0] * cols);
int top = (int)(detections.get(i, 4)[0] * rows);
int right = (int)(detections.get(i, 5)[0] * cols);
int bottom = (int)(detections.get(i, 6)[0] * rows);
// Draw rectangle around detected object.
Imgproc.rectangle(subFrame, new Point(xLeftBottom, yLeftBottom),
new Point(xRightTop, yRightTop),
Imgproc.rectangle(frame, new Point(left, top), new Point(right, bottom),
new Scalar(0, 255, 0));
String label = classNames[classId] + ": " + confidence;
int[] baseLine = new int[1];
Size labelSize = Imgproc.getTextSize(label, Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, 1, baseLine);
// Draw background for label.
Imgproc.rectangle(subFrame, new Point(xLeftBottom, yLeftBottom - labelSize.height),
new Point(xLeftBottom + labelSize.width, yLeftBottom + baseLine[0]),
Imgproc.rectangle(frame, new Point(left, top - labelSize.height),
new Point(left + labelSize.width, top + baseLine[0]),
new Scalar(255, 255, 255), Imgproc.FILLED);
// Write class name and confidence.
Imgproc.putText(subFrame, label, new Point(xLeftBottom, yLeftBottom),
Imgproc.putText(frame, label, new Point(left, top),
Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar(0, 0, 0));
}
}

@ -0,0 +1,104 @@
/**
* @brief You will learn how to segment an anisotropic image with a single local orientation by a gradient structure tensor (GST)
* @author Karpushin Vladislav, karpushin@ngs.ru, https://github.com/VladKarpushin
*/
#include <iostream>
#include "opencv2/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
using namespace cv;
using namespace std;
void calcGST(const Mat& inputImg, Mat& imgCoherencyOut, Mat& imgOrientationOut, int w);
int main()
{
int W = 52; // window size is WxW
double C_Thr = 0.43; // threshold for coherency
int LowThr = 35; // threshold1 for orientation, it ranges from 0 to 180
int HighThr = 57; // threshold2 for orientation, it ranges from 0 to 180
Mat imgIn = imread("input.jpg", IMREAD_GRAYSCALE);
if (imgIn.empty()) //check whether the image is loaded or not
{
cout << "ERROR : Image cannot be loaded..!!" << endl;
return -1;
}
//! [main]
Mat imgCoherency, imgOrientation;
calcGST(imgIn, imgCoherency, imgOrientation, W);
//! [thresholding]
Mat imgCoherencyBin;
imgCoherencyBin = imgCoherency > C_Thr;
Mat imgOrientationBin;
inRange(imgOrientation, Scalar(LowThr), Scalar(HighThr), imgOrientationBin);
//! [thresholding]
//! [combining]
Mat imgBin;
imgBin = imgCoherencyBin & imgOrientationBin;
//! [combining]
//! [main]
normalize(imgCoherency, imgCoherency, 0, 255, NORM_MINMAX);
normalize(imgOrientation, imgOrientation, 0, 255, NORM_MINMAX);
imwrite("result.jpg", 0.5*(imgIn + imgBin));
imwrite("Coherency.jpg", imgCoherency);
imwrite("Orientation.jpg", imgOrientation);
return 0;
}
//! [calcGST]
void calcGST(const Mat& inputImg, Mat& imgCoherencyOut, Mat& imgOrientationOut, int w)
{
Mat img;
inputImg.convertTo(img, CV_64F);
// GST components calculation (start)
// J = (J11 J12; J12 J22) - GST
Mat imgDiffX, imgDiffY, imgDiffXY;
Sobel(img, imgDiffX, CV_64F, 1, 0, 3);
Sobel(img, imgDiffY, CV_64F, 0, 1, 3);
multiply(imgDiffX, imgDiffY, imgDiffXY);
Mat imgDiffXX, imgDiffYY;
multiply(imgDiffX, imgDiffX, imgDiffXX);
multiply(imgDiffY, imgDiffY, imgDiffYY);
Mat J11, J22, J12; // J11, J22 and J12 are GST components
boxFilter(imgDiffXX, J11, CV_64F, Size(w, w));
boxFilter(imgDiffYY, J22, CV_64F, Size(w, w));
boxFilter(imgDiffXY, J12, CV_64F, Size(w, w));
// GST components calculation (stop)
// eigenvalue calculation (start)
// lambda1 = J11 + J22 + sqrt((J11-J22)^2 + 4*J12^2)
// lambda2 = J11 + J22 - sqrt((J11-J22)^2 + 4*J12^2)
Mat tmp1, tmp2, tmp3, tmp4;
tmp1 = J11 + J22;
tmp2 = J11 - J22;
multiply(tmp2, tmp2, tmp2);
multiply(J12, J12, tmp3);
sqrt(tmp2 + 4.0 * tmp3, tmp4);
Mat lambda1, lambda2;
lambda1 = tmp1 + tmp4; // biggest eigenvalue
lambda2 = tmp1 - tmp4; // smallest eigenvalue
// eigenvalue calculation (stop)
// Coherency calculation (start)
// Coherency = (lambda1 - lambda2)/(lambda1 + lambda2)) - measure of anisotropism
// Coherency is anisotropy degree (consistency of local orientation)
divide(lambda1 - lambda2, lambda1 + lambda2, imgCoherencyOut);
// Coherency calculation (stop)
// orientation angle calculation (start)
// tan(2*Alpha) = 2*J12/(J22 - J11)
// Alpha = 0.5 atan2(2*J12/(J22 - J11))
phase(J22 - J11, 2.0*J12, imgOrientationOut, true);
imgOrientationOut = 0.5*imgOrientationOut;
// orientation angle calculation (stop)
}
//! [calcGST]
Loading…
Cancel
Save