Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/17646/head
Alexander Alekhin 5 years ago
commit c81d785ada
  1. 14
      cmake/OpenCVDetectCUDA.cmake
  2. 3
      doc/opencv.bib
  3. 2
      modules/dnn/src/darknet/darknet_io.cpp
  4. 5
      modules/dnn/src/layers/elementwise_layers.cpp

@ -82,13 +82,14 @@ if(CUDA_FOUND)
message(STATUS "CUDA detected: " ${CUDA_VERSION}) message(STATUS "CUDA detected: " ${CUDA_VERSION})
set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Turing") set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "Ampere")
set(_arch_fermi "2.0") set(_arch_fermi "2.0")
set(_arch_kepler "3.0;3.5;3.7") set(_arch_kepler "3.0;3.5;3.7")
set(_arch_maxwell "5.0;5.2") set(_arch_maxwell "5.0;5.2")
set(_arch_pascal "6.0;6.1") set(_arch_pascal "6.0;6.1")
set(_arch_volta "7.0") set(_arch_volta "7.0")
set(_arch_turing "7.5") set(_arch_turing "7.5")
set(_arch_ampere "8.0")
if(NOT CMAKE_CROSSCOMPILING) if(NOT CMAKE_CROSSCOMPILING)
list(APPEND _generations "Auto") list(APPEND _generations "Auto")
endif() endif()
@ -163,6 +164,8 @@ if(CUDA_FOUND)
set(__cuda_arch_bin ${_arch_volta}) set(__cuda_arch_bin ${_arch_volta})
elseif(CUDA_GENERATION STREQUAL "Turing") elseif(CUDA_GENERATION STREQUAL "Turing")
set(__cuda_arch_bin ${_arch_turing}) set(__cuda_arch_bin ${_arch_turing})
elseif(CUDA_GENERATION STREQUAL "Ampere")
set(__cuda_arch_bin ${_arch_ampere})
elseif(CUDA_GENERATION STREQUAL "Auto") elseif(CUDA_GENERATION STREQUAL "Auto")
ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out) ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
if(NOT _nvcc_res EQUAL 0) if(NOT _nvcc_res EQUAL 0)
@ -180,7 +183,13 @@ if(CUDA_FOUND)
ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out) ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
if(NOT _nvcc_res EQUAL 0) if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
set(__cuda_arch_bin "5.3 6.2 7.2") # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0)
ocv_filter_available_architecture(__cuda_arch_bin
5.3
6.2
7.2
7.0
)
else() else()
set(__cuda_arch_bin "${_nvcc_out}") set(__cuda_arch_bin "${_nvcc_out}")
endif() endif()
@ -193,6 +202,7 @@ if(CUDA_FOUND)
${_arch_pascal} ${_arch_pascal}
${_arch_volta} ${_arch_volta}
${_arch_turing} ${_arch_turing}
${_arch_ampere}
) )
endif() endif()
endif() endif()

@ -346,7 +346,8 @@
year = {2003}, year = {2003},
pages = {363--370}, pages = {363--370},
publisher = {Springer}, publisher = {Springer},
url = {https://arxiv.org/pdf/1808.01752} url = {https://doi.org/10.1007/3-540-45103-X_50},
doi = {10.1007/3-540-45103-X_50}
} }
@inproceedings{Farsiu03, @inproceedings{Farsiu03,
author = {Farsiu, Sina and Robinson, Dirk and Elad, Michael and Milanfar, Peyman}, author = {Farsiu, Sina and Robinson, Dirk and Elad, Michael and Milanfar, Peyman},

@ -797,7 +797,7 @@ namespace cv {
int classes = getParam<int>(layer_params, "classes", -1); int classes = getParam<int>(layer_params, "classes", -1);
int num_of_anchors = getParam<int>(layer_params, "num", -1); int num_of_anchors = getParam<int>(layer_params, "num", -1);
float thresh = getParam<float>(layer_params, "thresh", 0.2); float thresh = getParam<float>(layer_params, "thresh", 0.2);
float nms_threshold = getParam<float>(layer_params, "nms_threshold", 0.4); float nms_threshold = getParam<float>(layer_params, "nms_threshold", 0.0);
float scale_x_y = getParam<float>(layer_params, "scale_x_y", 1.0); float scale_x_y = getParam<float>(layer_params, "scale_x_y", 1.0);
std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string()); std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());

@ -763,8 +763,11 @@ struct MishFunctor : public BaseFunctor
{ {
for( int i = 0; i < len; i++ ) for( int i = 0; i < len; i++ )
{ {
// Use fast approximation introduced in https://github.com/opencv/opencv/pull/17200
float x = srcptr[i]; float x = srcptr[i];
dstptr[i] = x * tanh(log(1.0f + exp(x))); float eX = exp(std::min(x, 20.f));
float n = (eX + 2) * eX;
dstptr[i] = (x * n) / (n + 2);
} }
} }
} }

Loading…
Cancel
Save