From 13a6d0b92a2ff891acfd0563cb1a8e9003cbe7a0 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Thu, 27 Jan 2011 08:26:10 +0000 Subject: [PATCH] fixed parsing GPU archs in BIN(PTX) format --- CMakeLists.txt | 18 ++++++++++++++---- cvconfig.h.cmake | 8 ++++---- modules/gpu/src/initialization.cpp | 10 +++++----- modules/gpu/src/precomp.hpp | 2 +- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 586f8eb5ec..df8e9a2165 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -712,24 +712,32 @@ if(WITH_CUDA) set(CUDA_ARCH_PTX "1.1 1.3" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") # These variables are used in config templates - string(REGEX REPLACE "\\." "" ARCH_GPU_NO_POINTS "${CUDA_ARCH_BIN}") + string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}") string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}") # Ckeck if user specified 1.0 compute capability string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}") + set(CUDA_ARCH_BIN_OR_PTX_10 0) if(NOT ${HAS_ARCH_10} STREQUAL "") - set(OPENCV_ARCH_GPU_OR_PTX_10 1) + set(CUDA_ARCH_BIN_OR_PTX_10 1) endif() - set(NVCC_FLAGS_EXTRA "") + # Flags to be set + set(NVCC_FLAGS_EXTRA "") + + # These variables are passed into the template + set(OPENCV_CUDA_ARCH_BIN "") + set(OPENCV_CUDA_ARCH_PTX "") # Tell nvcc to add binaries for the specified GPUs - string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_GPU_NO_POINTS}") + string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_NO_POINTS}") foreach(ARCH IN LISTS ARCH_LIST) if (ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)") set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) + set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${CMAKE_MATCH_1}") else() set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH}) + set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${ARCH}") endif() endforeach() @@ -737,8 +745,10 @@ if(WITH_CUDA) string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}") foreach(ARCH IN LISTS ARCH_LIST) set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH}) + set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}") endforeach() + # Wil; be processed in other scripts set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA}) set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}") diff --git a/cvconfig.h.cmake b/cvconfig.h.cmake index 5b35d247df..e04cc47da0 100644 --- a/cvconfig.h.cmake +++ b/cvconfig.h.cmake @@ -167,13 +167,13 @@ #cmakedefine HAVE_CUDA /* Compile for 'real' NVIDIA GPU architectures */ -#define OPENCV_ARCH_GPU "${ARCH_GPU_NO_POINTS}" +#define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}" /* Compile for 'virtual' NVIDIA PTX architectures */ -#define OPENCV_ARCH_PTX "${ARCH_PTX_NO_POINTS}" +#define CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX}" -/* Create PTX or CUBIN for 1.0 compute capability */ -#cmakedefine OPENCV_ARCH_GPU_OR_PTX_10 +/* Create PTX or BIN for 1.0 compute capability */ +#cmakedefine CUDA_ARCH_BIN_OR_PTX_10 /* VideoInput library */ #cmakedefine HAVE_VIDEOINPUT diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp index 0104bd02a2..17bd150f0f 100644 --- a/modules/gpu/src/initialization.cpp +++ b/modules/gpu/src/initialization.cpp @@ -164,33 +164,33 @@ namespace CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor) { - return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, std::equal_to()); + return ::compare(CUDA_ARCH_PTX, major * 10 + minor, std::equal_to()); } CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) { - return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, + return ::compare(CUDA_ARCH_PTX, major * 10 + minor, std::less_equal()); } CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) { - return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, + return ::compare(CUDA_ARCH_PTX, major * 10 + minor, std::greater_equal()); } CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) { - return ::compare(OPENCV_ARCH_GPU, major * 10 + minor, std::equal_to()); + return ::compare(CUDA_ARCH_BIN, major * 10 + minor, std::equal_to()); } CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) { - return ::compare(OPENCV_ARCH_GPU, major * 10 + minor, + return ::compare(CUDA_ARCH_BIN, major * 10 + minor, std::greater_equal()); } diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp index 624e67fce1..d5ad3cc727 100644 --- a/modules/gpu/src/precomp.hpp +++ b/modules/gpu/src/precomp.hpp @@ -85,7 +85,7 @@ #error "Insufficient NPP version, please update it." #endif -#if defined(OPENCV_ARCH_GPU_OR_PTX_10) +#if defined(CUDA_ARCH_BIN_OR_PTX_10) #error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0" #endif