diff --git a/CMakeLists.txt b/CMakeLists.txt index df8e9a2165..aebdcd2967 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -711,44 +711,49 @@ if(WITH_CUDA) set(CUDA_ARCH_BIN "1.3 2.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") set(CUDA_ARCH_PTX "1.1 1.3" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") - # These variables are used in config templates string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}") string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}") - # Ckeck if user specified 1.0 compute capability + # Ckeck if user specified 1.0 compute capability: we don't support it string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}") set(CUDA_ARCH_BIN_OR_PTX_10 0) if(NOT ${HAS_ARCH_10} STREQUAL "") set(CUDA_ARCH_BIN_OR_PTX_10 1) endif() - # Flags to be set + # NVCC flags to be set set(NVCC_FLAGS_EXTRA "") - # These variables are passed into the template + # These vars will be passed into the templates set(OPENCV_CUDA_ARCH_BIN "") set(OPENCV_CUDA_ARCH_PTX "") + set(OPENCV_CUDA_ARCH_FEATURES "") - # Tell nvcc to add binaries for the specified GPUs + # Tell NVCC to add binaries for the specified GPUs string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_NO_POINTS}") foreach(ARCH IN LISTS ARCH_LIST) if (ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)") + # User explicitly specified PTX for the concrete BIN set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${CMAKE_MATCH_1}") + set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${CMAKE_MATCH_2}") else() + # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH}) set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${ARCH}") + set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}") endif() endforeach() - # Tell nvcc to add PTX intermediate code for the specified architectures + # Tell NVCC to add PTX intermediate code for the specified architectures string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}") foreach(ARCH IN LISTS ARCH_LIST) set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH}) set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}") - endforeach() + set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}") + endforeach() - # Wil; be processed in other scripts + # These vars will be processed in other scripts set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA}) set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}") diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp index 9f2cc5d69d..d11b95c520 100644 --- a/modules/gpu/src/initialization.cpp +++ b/modules/gpu/src/initialization.cpp @@ -72,9 +72,9 @@ namespace CV_EXPORTS bool cv::gpu::TargetArchs::builtWith(cv::gpu::GpuFeature feature) { if (feature == NATIVE_DOUBLE) - return hasEqualOrGreater(1, 3); + return ::compareToSet(CUDA_ARCH_FEATURES, 13, std::greater_equal()); if (feature == ATOMICS) - return hasEqualOrGreater(1, 1); + return ::compareToSet(CUDA_ARCH_FEATURES, 11, std::greater_equal()); return true; }