diff --git a/modules/dnn/src/cuda4dnn/init.hpp b/modules/dnn/src/cuda4dnn/init.hpp index e9d997311f..f5bb7714f8 100644 --- a/modules/dnn/src/cuda4dnn/init.hpp +++ b/modules/dnn/src/cuda4dnn/init.hpp @@ -17,28 +17,18 @@ namespace cv { namespace dnn { namespace cuda4dnn { void checkVersions() { - int cudart_version = 0; - CUDA4DNN_CHECK_CUDA(cudaRuntimeGetVersion(&cudart_version)); - if (cudart_version != CUDART_VERSION) + // https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#programming-model + // cuDNN API Compatibility + // Beginning in cuDNN 7, the binary compatibility of a patch and minor releases is maintained as follows: + // Any patch release x.y.z is forward or backward-compatible with applications built against another cuDNN patch release x.y.w (meaning, of the same major and minor version number, but having w!=z). + // cuDNN minor releases beginning with cuDNN 7 are binary backward-compatible with applications built against the same or earlier patch release (meaning, an application built against cuDNN 7.x is binary compatible with cuDNN library 7.y, where y>=x). + // Applications compiled with a cuDNN version 7.y are not guaranteed to work with 7.x release when y > x. + auto cudnn_bversion = cudnnGetVersion(); + auto cudnn_major_bversion = cudnn_bversion / 1000, cudnn_minor_bversion = cudnn_bversion % 1000 / 100; + if (cudnn_major_bversion != CUDNN_MAJOR || cudnn_minor_bversion < CUDNN_MINOR) { std::ostringstream oss; - oss << "CUDART reports version " << cudart_version << " which does not match with the version " << CUDART_VERSION << " with which OpenCV was built"; - CV_LOG_WARNING(NULL, oss.str().c_str()); - } - - auto cudnn_version = cudnnGetVersion(); - if (cudnn_version != CUDNN_VERSION) - { - std::ostringstream oss; - oss << "cuDNN reports version " << cudnn_version << " which does not match with the version " << CUDNN_VERSION << " with which OpenCV was built"; - CV_LOG_WARNING(NULL, oss.str().c_str()); - } - - auto cudnn_cudart_version = cudnnGetCudartVersion(); - if (cudart_version != cudnn_cudart_version) - { - std::ostringstream oss; - oss << "CUDART version " << cudnn_cudart_version << " reported by cuDNN " << cudnn_version << " does not match with the version reported by CUDART " << cudart_version; + oss << "cuDNN reports version " << cudnn_major_bversion << "." << cudnn_minor_bversion << " which is not compatible with the version " << CUDNN_MAJOR << "." << CUDNN_MINOR << " with which OpenCV was built"; CV_LOG_WARNING(NULL, oss.str().c_str()); } } @@ -57,9 +47,6 @@ namespace cv { namespace dnn { namespace cuda4dnn { bool isDeviceCompatible() { - if (getDeviceCount() <= 0) - return false; - int device_id = getDevice(); if (device_id < 0) return false; @@ -80,9 +67,6 @@ namespace cv { namespace dnn { namespace cuda4dnn { bool doesDeviceSupportFP16() { - if (getDeviceCount() <= 0) - return false; - int device_id = getDevice(); if (device_id < 0) return false; diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 6a2bea595b..8b8de38d2a 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -239,11 +239,10 @@ private: #endif #ifdef HAVE_CUDA - if (haveCUDA() && cuda4dnn::isDeviceCompatible()) + if (haveCUDA()) { backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)); - if (cuda4dnn::doesDeviceSupportFP16()) - backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16)); + backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16)); } #endif } @@ -2363,6 +2362,9 @@ struct Net::Impl : public detail::NetImplBase CV_Assert(preferableBackend == DNN_BACKEND_CUDA); #ifdef HAVE_CUDA + if (!cudaInfo) /* we need to check only once */ + cuda4dnn::checkVersions(); + if (cuda4dnn::getDeviceCount() <= 0) CV_Error(Error::StsError, "No CUDA capable device found."); @@ -2373,7 +2375,10 @@ struct Net::Impl : public detail::NetImplBase CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration."); if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16()) - CV_Error(Error::StsError, "The selected CUDA device does not support FP16 operations."); + { + CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target."); + preferableTarget = DNN_TARGET_CUDA; + } if (!cudaInfo) { @@ -2384,7 +2389,6 @@ struct Net::Impl : public detail::NetImplBase auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers cudaInfo = std::unique_ptr(new CudaInfo_t(std::move(context), std::move(d2h_stream))); - cuda4dnn::checkVersions(); } cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any