From e0f9eac521f960ddced80b969a18445ce02d0bcb Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 8 Jul 2020 07:28:40 +0000 Subject: [PATCH] cmake: backport CUDA scripts --- cmake/OpenCVDetectCUDA.cmake | 167 +++++++++++++++--- cmake/templates/cvconfig.h.in | 3 - .../include/opencv2/core/private.cuda.hpp | 3 - 3 files changed, 143 insertions(+), 30 deletions(-) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index 2586a53f80..ef3e0184a0 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -1,13 +1,14 @@ -if(WIN32 AND NOT MSVC) +if((WIN32 AND NOT MSVC) OR OPENCV_CMAKE_FORCE_CUDA) message(STATUS "CUDA compilation is disabled (due to only Visual Studio compiler supported on your platform).") return() endif() -if(NOT UNIX AND CV_CLANG) +if((NOT UNIX AND CV_CLANG) OR OPENCV_CMAKE_FORCE_CUDA) message(STATUS "CUDA compilation is disabled (due to Clang unsupported on your platform).") return() endif() +#set(OPENCV_CMAKE_CUDA_DEBUG 1) if(((NOT CMAKE_VERSION VERSION_LESS "3.9.0") # requires https://gitlab.kitware.com/cmake/cmake/merge_requests/663 OR OPENCV_CUDA_FORCE_EXTERNAL_CMAKE_MODULE) @@ -43,7 +44,7 @@ if(CUDA_FOUND) endif() if(WITH_NVCUVID) - macro(SEARCH_NVCUVID_HEADER _filename _result) + macro(ocv_cuda_SEARCH_NVCUVID_HEADER _filename _result) # place header file under CUDA_TOOLKIT_TARGET_DIR or CUDA_TOOLKIT_ROOT_DIR find_path(_header_result ${_filename} @@ -60,8 +61,8 @@ if(CUDA_FOUND) endif() unset(_header_result CACHE) endmacro() - SEARCH_NVCUVID_HEADER("nvcuvid.h" HAVE_NVCUVID_HEADER) - SEARCH_NVCUVID_HEADER("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER) + ocv_cuda_SEARCH_NVCUVID_HEADER("nvcuvid.h" HAVE_NVCUVID_HEADER) + ocv_cuda_SEARCH_NVCUVID_HEADER("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER) find_cuda_helper_libs(nvcuvid) if(WIN32) find_cuda_helper_libs(nvcuvenc) @@ -102,32 +103,89 @@ if(CUDA_FOUND) unset(CUDA_ARCH_PTX CACHE) endif() + if(OPENCV_CUDA_DETECTION_NVCC_FLAGS MATCHES "-ccbin") + # already specified by user + elseif(CUDA_HOST_COMPILER AND EXISTS "${CUDA_HOST_COMPILER}") + LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${CUDA_HOST_COMPILER}") + elseif(WIN32 AND CMAKE_LINKER) # Workaround for VS cl.exe not being in the env. path + get_filename_component(host_compiler_bindir ${CMAKE_LINKER} DIRECTORY) + LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${host_compiler_bindir}") + else() + if(CUDA_HOST_COMPILER) + message(STATUS "CUDA: CUDA_HOST_COMPILER='${CUDA_HOST_COMPILER}' is not valid, autodetection may not work. Specify OPENCV_CUDA_DETECTION_NVCC_FLAGS with -ccbin option for fix that") + endif() + endif() + macro(ocv_filter_available_architecture result_list) - if(DEFINED CUDA_SUPPORTED_CC) - set(${result_list} "${CUDA_SUPPORTED_CC}") + set(__cache_key_check "${ARGN} : ${CUDA_NVCC_EXECUTABLE} ${OPENCV_CUDA_DETECTION_NVCC_FLAGS}") + if(DEFINED OPENCV_CACHE_CUDA_SUPPORTED_CC AND OPENCV_CACHE_CUDA_SUPPORTED_CC_check STREQUAL __cache_key_check) + set(${result_list} "${OPENCV_CACHE_CUDA_SUPPORTED_CC}") else() set(CC_LIST ${ARGN}) foreach(target_arch ${CC_LIST}) string(REPLACE "." "" target_arch_short "${target_arch}") set(NVCC_OPTION "-gencode;arch=compute_${target_arch_short},code=sm_${target_arch_short}") - execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" - WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" - RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + set(_cmd "${CUDA_NVCC_EXECUTABLE}" ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" --compile) + execute_process( + COMMAND ${_cmd} + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" + RESULT_VARIABLE _nvcc_res + OUTPUT_VARIABLE _nvcc_out + ERROR_VARIABLE _nvcc_err + #ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(OPENCV_CMAKE_CUDA_DEBUG) + message(WARNING "COMMAND: ${_cmd}") + message(STATUS "Result: ${_nvcc_res}") + message(STATUS "Out: ${_nvcc_out}") + message(STATUS "Err: ${_nvcc_err}") + endif() if(_nvcc_res EQUAL 0) - set(${result_list} "${${result_list}} ${target_arch}") + LIST(APPEND ${result_list} "${target_arch}") endif() endforeach() string(STRIP "${${result_list}}" ${result_list}) - set(CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "List of supported compute capability") + if(" ${${result_list}}" STREQUAL " ") + message(WARNING "CUDA: Autodetection arch list is empty. Please enable OPENCV_CMAKE_CUDA_DEBUG=1 and check/specify OPENCV_CUDA_DETECTION_NVCC_FLAGS variable") + endif() + + # cache detected values + set(OPENCV_CACHE_CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "") + set(OPENCV_CACHE_CUDA_SUPPORTED_CC_check "${__cache_key_check}" CACHE INTERNAL "") endif() endmacro() macro(ocv_detect_native_cuda_arch status output) - execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run" - WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" - RESULT_VARIABLE ${status} OUTPUT_VARIABLE ${output} - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + set(OPENCV_CUDA_DETECT_ARCHS_COMMAND "${CUDA_NVCC_EXECUTABLE}" ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run") + set(__cache_key_check "${OPENCV_CUDA_DETECT_ARCHS_COMMAND}") + if(DEFINED OPENCV_CACHE_CUDA_ACTIVE_CC AND OPENCV_CACHE_CUDA_ACTIVE_CC_check STREQUAL __cache_key_check) + set(${output} "${OPENCV_CACHE_CUDA_ACTIVE_CC}") + set(${status} 0) + else() + execute_process( + COMMAND ${OPENCV_CUDA_DETECT_ARCHS_COMMAND} + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" + RESULT_VARIABLE ${status} + OUTPUT_VARIABLE _nvcc_out + ERROR_VARIABLE _nvcc_err + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(OPENCV_CMAKE_CUDA_DEBUG) + message(WARNING "COMMAND: ${OPENCV_CUDA_DETECT_ARCHS_COMMAND}") + message(STATUS "Result: ${${status}}") + message(STATUS "Out: ${_nvcc_out}") + message(STATUS "Err: ${_nvcc_err}") + endif() + string(REGEX REPLACE ".*\n" "" ${output} "${_nvcc_out}") #Strip leading warning messages, if any + + if(${status} EQUAL 0) + # cache detected values + set(OPENCV_CACHE_CUDA_ACTIVE_CC ${${result_list}} CACHE INTERNAL "") + set(OPENCV_CACHE_CUDA_ACTIVE_CC_check "${__cache_key_check}" CACHE INTERNAL "") + endif() + endif() endmacro() macro(ocv_wipeout_deprecated _arch_bin_list) @@ -156,6 +214,9 @@ if(CUDA_FOUND) else() string(REGEX MATCHALL "[0-9]+\\.[0-9]" __cuda_arch_bin "${_nvcc_out}") endif() + elseif(CUDA_ARCH_BIN) + message(STATUS "CUDA: Using CUDA_ARCH_BIN=${CUDA_ARCH_BIN}") + set(__cuda_arch_bin ${CUDA_ARCH_BIN}) endif() if(NOT DEFINED __cuda_arch_bin) @@ -163,7 +224,11 @@ if(CUDA_FOUND) set(__cuda_arch_bin "3.2") set(__cuda_arch_ptx "") elseif(AARCH64) - ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out) + if(NOT CMAKE_CROSSCOMPILING) + ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out) + else() + set(_nvcc_res -1) # emulate error, see below + endif() if(NOT _nvcc_res EQUAL 0) message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) @@ -197,11 +262,9 @@ if(CUDA_FOUND) string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}") string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}") - # Ckeck if user specified 1.0 compute capability: we don't support it - string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}") - set(CUDA_ARCH_BIN_OR_PTX_10 0) - if(NOT ${HAS_ARCH_10} STREQUAL "") - set(CUDA_ARCH_BIN_OR_PTX_10 1) + # Check if user specified 1.0 compute capability: we don't support it + if(" ${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}" MATCHES " 1.0") + message(SEND_ERROR "CUDA: 1.0 compute capability is not supported - exclude it from ARCH/PTX list are re-run CMake") endif() # NVCC flags to be set @@ -312,6 +375,16 @@ if(CUDA_FOUND) if(UNIX OR APPLE) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC) + if( + ENABLE_CXX11 + AND NOT " ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_DEBUG} ${CUDA_NVCC_FLAGS}" MATCHES "-std=" + ) + if(CUDA_VERSION VERSION_LESS "11.0") + list(APPEND CUDA_NVCC_FLAGS "--std=c++11") + else() + list(APPEND CUDA_NVCC_FLAGS "--std=c++14") + endif() + endif() endif() if(APPLE) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only) @@ -379,7 +452,53 @@ if(HAVE_CUDA) if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT OPENCV_SKIP_CUDA_CMAKE_SUPPRESS_REGENERATION ) - message(WARNING "CUDA with MSVS generator is detected. Disabling CMake re-run checks (CMAKE_SUPPRESS_REGENERATION=ON). You need to run CMake manually if updates are required.") + message(STATUS "CUDA: MSVS generator is detected. Disabling CMake re-run checks (CMAKE_SUPPRESS_REGENERATION=ON). You need to run CMake manually if updates are required.") set(CMAKE_SUPPRESS_REGENERATION ON) endif() endif() + + +# ---------------------------------------------------------------------------- +# Add CUDA libraries (needed for apps/tools, samples) +# ---------------------------------------------------------------------------- +if(HAVE_CUDA) + # details: https://github.com/NVIDIA/nvidia-docker/issues/775 + if(" ${CUDA_CUDA_LIBRARY}" MATCHES "/stubs/libcuda.so" AND NOT OPENCV_SKIP_CUDA_STUB_WORKAROUND) + set(CUDA_STUB_ENABLED_LINK_WORKAROUND 1) + if(EXISTS "${CUDA_CUDA_LIBRARY}" AND NOT OPENCV_SKIP_CUDA_STUB_WORKAROUND_RPATH_LINK) + set(CUDA_STUB_TARGET_PATH "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/") + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink "${CUDA_CUDA_LIBRARY}" "${CUDA_STUB_TARGET_PATH}/libcuda.so.1" + RESULT_VARIABLE CUDA_STUB_SYMLINK_RESULT) + if(NOT CUDA_STUB_SYMLINK_RESULT EQUAL 0) + execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_CUDA_LIBRARY}" "${CUDA_STUB_TARGET_PATH}/libcuda.so.1" + RESULT_VARIABLE CUDA_STUB_COPY_RESULT) + if(NOT CUDA_STUB_COPY_RESULT EQUAL 0) + set(CUDA_STUB_ENABLED_LINK_WORKAROUND 0) + endif() + endif() + if(CUDA_STUB_ENABLED_LINK_WORKAROUND) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath-link,\"${CUDA_STUB_TARGET_PATH}\"") + endif() + else() + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--allow-shlib-undefined") + endif() + if(NOT CUDA_STUB_ENABLED_LINK_WORKAROUND) + message(WARNING "CUDA: workaround for stubs/libcuda.so.1 is not applied") + endif() + endif() + + set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) + if(HAVE_CUBLAS) + set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_cublas_LIBRARY}) + endif() + if(HAVE_CUFFT) + set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_cufft_LIBRARY}) + endif() + foreach(p ${CUDA_LIBS_PATH}) + if(MSVC AND CMAKE_GENERATOR MATCHES "Ninja|JOM") + set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CMAKE_LIBRARY_PATH_FLAG}"${p}") + else() + set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CMAKE_LIBRARY_PATH_FLAG}${p}) + endif() + endforeach() +endif() diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index 9a42bfa3d1..4b73c4ee0c 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -13,9 +13,6 @@ /* Compile for 'real' NVIDIA GPU architectures */ #define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}" -/* Create PTX or BIN for 1.0 compute capability */ -#cmakedefine CUDA_ARCH_BIN_OR_PTX_10 - /* NVIDIA GPU features are used */ #define CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES}" diff --git a/modules/core/include/opencv2/core/private.cuda.hpp b/modules/core/include/opencv2/core/private.cuda.hpp index b9955018bb..36edd8ab31 100644 --- a/modules/core/include/opencv2/core/private.cuda.hpp +++ b/modules/core/include/opencv2/core/private.cuda.hpp @@ -82,9 +82,6 @@ # error "Insufficient Cuda Runtime library version, please update it." # endif -# if defined(CUDA_ARCH_BIN_OR_PTX_10) -# error "OpenCV CUDA module doesn't support NVIDIA compute capability 1.0" -# endif #endif //! @cond IGNORED