From 156406b56ca38ff7f6440fb16219078b00130007 Mon Sep 17 00:00:00 2001 From: Tomoaki Teshima Date: Tue, 2 Jun 2020 05:07:53 +0900 Subject: [PATCH] select the architecture based on nvcc result * cache the result * DRY * brush up based on review --- cmake/OpenCVDetectCUDA.cmake | 80 +++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index 84319f5971..98a00fdd87 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -53,6 +53,12 @@ if(CUDA_FOUND) message(STATUS "CUDA detected: " ${CUDA_VERSION}) set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "Turing") + set(_arch_fermi "2.0") + set(_arch_kepler "3.0;3.5;3.7") + set(_arch_maxwell "5.0;5.2") + set(_arch_pascal "6.0;6.1") + set(_arch_volta "7.0") + set(_arch_turing "7.5") if(NOT CMAKE_CROSSCOMPILING) list(APPEND _generations "Auto") endif() @@ -70,29 +76,57 @@ if(CUDA_FOUND) unset(CUDA_ARCH_PTX CACHE) endif() + macro(ocv_filter_available_architecture result_list) + if(DEFINED CUDA_SUPPORTED_CC) + set(${result_list} "${CUDA_SUPPORTED_CC}") + else() + set(CC_LIST ${ARGN}) + foreach(target_arch ${CC_LIST}) + string(REPLACE "." "" target_arch_short ${target_arch}) + set(NVCC_OPTION "-gencode;arch=compute_${target_arch_short},code=sm_${target_arch_short}") + execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" + RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(_nvcc_res EQUAL 0) + set(${result_list} "${${result_list}} ${target_arch}") + endif() + endforeach() + string(STRIP ${${result_list}} ${result_list}) + set(CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "List of supported compute capability") + endif() + endmacro() + + macro(ocv_detect_native_cuda_arch status output) + execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run" + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" + RESULT_VARIABLE ${status} OUTPUT_VARIABLE ${output} + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + endmacro() + + macro(ocv_wipeout_deprecated _arch_bin_list) + string(REPLACE "2.1" "2.1(2.0)" ${_arch_bin_list} ${${_arch_bin_list}}) + endmacro() + set(__cuda_arch_ptx "") if(CUDA_GENERATION STREQUAL "Fermi") - set(__cuda_arch_bin "2.0") + set(__cuda_arch_bin ${_arch_fermi}) elseif(CUDA_GENERATION STREQUAL "Kepler") - set(__cuda_arch_bin "3.0 3.5 3.7") + set(__cuda_arch_bin ${_arch_kepler}) elseif(CUDA_GENERATION STREQUAL "Maxwell") - set(__cuda_arch_bin "5.0 5.2") + set(__cuda_arch_bin ${_arch_maxwell}) elseif(CUDA_GENERATION STREQUAL "Pascal") - set(__cuda_arch_bin "6.0 6.1") + set(__cuda_arch_bin ${_arch_pascal}) elseif(CUDA_GENERATION STREQUAL "Volta") - set(__cuda_arch_bin "7.0") + set(__cuda_arch_bin ${_arch_volta}) elseif(CUDA_GENERATION STREQUAL "Turing") - set(__cuda_arch_bin "7.5") + set(__cuda_arch_bin ${_arch_turing}) elseif(CUDA_GENERATION STREQUAL "Auto") - execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run" - WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" - RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out) if(NOT _nvcc_res EQUAL 0) message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") else() - set(__cuda_arch_bin "${_nvcc_out}") - string(REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}") + string(REGEX MATCHALL "[0-9]+\\.[0-9]" __cuda_arch_bin "${_nvcc_out}") endif() endif() @@ -101,28 +135,26 @@ if(CUDA_FOUND) set(__cuda_arch_bin "3.2") set(__cuda_arch_ptx "") elseif(AARCH64) - execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run" - WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" - RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out) if(NOT _nvcc_res EQUAL 0) message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") set(__cuda_arch_bin "5.3 6.2 7.2") else() set(__cuda_arch_bin "${_nvcc_out}") - string(REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}") endif() set(__cuda_arch_ptx "") else() - if(CUDA_VERSION VERSION_LESS "9.0") - set(__cuda_arch_bin "2.0 3.0 3.5 3.7 5.0 5.2 6.0 6.1") - elseif(CUDA_VERSION VERSION_LESS "10.0") - set(__cuda_arch_bin "3.0 3.5 3.7 5.0 5.2 6.0 6.1 7.0") - else() - set(__cuda_arch_bin "3.0 3.5 3.7 5.0 5.2 6.0 6.1 7.0 7.5") - endif() + ocv_filter_available_architecture(__cuda_arch_bin + ${_arch_fermi} + ${_arch_kepler} + ${_arch_maxwell} + ${_arch_pascal} + ${_arch_volta} + ${_arch_turing} + ) endif() endif() + ocv_wipeout_deprecated(__cuda_arch_bin) set(CUDA_ARCH_BIN ${__cuda_arch_bin} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") set(CUDA_ARCH_PTX ${__cuda_arch_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")