From d0fe6ad10967fd2b007a4cf83b00d6f8446deb42 Mon Sep 17 00:00:00 2001
From: YashasSamaga <yashas_2010@yahoo.com>
Date: Sat, 6 Mar 2021 19:03:03 +0530
Subject: [PATCH] fix checkVersions()

---
 modules/dnn/src/cuda4dnn/init.hpp | 36 +++++++++----------------------
 modules/dnn/src/dnn.cpp           | 14 +++++++-----
 2 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/modules/dnn/src/cuda4dnn/init.hpp b/modules/dnn/src/cuda4dnn/init.hpp
index e9d997311f..f5bb7714f8 100644
--- a/modules/dnn/src/cuda4dnn/init.hpp
+++ b/modules/dnn/src/cuda4dnn/init.hpp
@@ -17,28 +17,18 @@ namespace cv { namespace dnn { namespace cuda4dnn {
 
     void checkVersions()
     {
-        int cudart_version = 0;
-        CUDA4DNN_CHECK_CUDA(cudaRuntimeGetVersion(&cudart_version));
-        if (cudart_version != CUDART_VERSION)
+        // https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#programming-model
+        // cuDNN API Compatibility
+        // Beginning in cuDNN 7, the binary compatibility of a patch and minor releases is maintained as follows:
+        //     Any patch release x.y.z is forward or backward-compatible with applications built against another cuDNN patch release x.y.w (meaning, of the same major and minor version number, but having w!=z).
+        //     cuDNN minor releases beginning with cuDNN 7 are binary backward-compatible with applications built against the same or earlier patch release (meaning, an application built against cuDNN 7.x is binary compatible with cuDNN library 7.y, where y>=x).
+        //     Applications compiled with a cuDNN version 7.y are not guaranteed to work with 7.x release when y > x.
+        auto cudnn_bversion = cudnnGetVersion();
+        auto cudnn_major_bversion = cudnn_bversion / 1000, cudnn_minor_bversion = cudnn_bversion % 1000 / 100;
+        if (cudnn_major_bversion != CUDNN_MAJOR || cudnn_minor_bversion < CUDNN_MINOR)
         {
             std::ostringstream oss;
-            oss << "CUDART reports version " << cudart_version << " which does not match with the version " << CUDART_VERSION << " with which OpenCV was built";
-            CV_LOG_WARNING(NULL, oss.str().c_str());
-        }
-
-        auto cudnn_version = cudnnGetVersion();
-        if (cudnn_version != CUDNN_VERSION)
-        {
-            std::ostringstream oss;
-            oss << "cuDNN reports version " << cudnn_version << " which does not match with the version " << CUDNN_VERSION << " with which OpenCV was built";
-            CV_LOG_WARNING(NULL, oss.str().c_str());
-        }
-
-        auto cudnn_cudart_version = cudnnGetCudartVersion();
-        if (cudart_version != cudnn_cudart_version)
-        {
-            std::ostringstream oss;
-            oss << "CUDART version " << cudnn_cudart_version << " reported by cuDNN " << cudnn_version << " does not match with the version reported by CUDART " << cudart_version;
+            oss << "cuDNN reports version " << cudnn_major_bversion << "." << cudnn_minor_bversion << " which is not compatible with the version " << CUDNN_MAJOR << "." << CUDNN_MINOR << " with which OpenCV was built";
             CV_LOG_WARNING(NULL, oss.str().c_str());
         }
     }
@@ -57,9 +47,6 @@ namespace cv { namespace dnn { namespace cuda4dnn {
 
     bool isDeviceCompatible()
     {
-        if (getDeviceCount() <= 0)
-            return false;
-
         int device_id = getDevice();
         if (device_id < 0)
             return false;
@@ -80,9 +67,6 @@ namespace cv { namespace dnn { namespace cuda4dnn {
 
     bool doesDeviceSupportFP16()
     {
-        if (getDeviceCount() <= 0)
-            return false;
-
         int device_id = getDevice();
         if (device_id < 0)
             return false;
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index 6a2bea595b..8b8de38d2a 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -239,11 +239,10 @@ private:
 #endif
 
 #ifdef HAVE_CUDA
-        if (haveCUDA() && cuda4dnn::isDeviceCompatible())
+        if (haveCUDA())
         {
             backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA));
-            if (cuda4dnn::doesDeviceSupportFP16())
-                backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
+            backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
         }
 #endif
     }
@@ -2363,6 +2362,9 @@ struct Net::Impl : public detail::NetImplBase
         CV_Assert(preferableBackend == DNN_BACKEND_CUDA);
 
 #ifdef HAVE_CUDA
+        if (!cudaInfo) /* we need to check only once */
+            cuda4dnn::checkVersions();
+
         if (cuda4dnn::getDeviceCount() <= 0)
             CV_Error(Error::StsError, "No CUDA capable device found.");
 
@@ -2373,7 +2375,10 @@ struct Net::Impl : public detail::NetImplBase
             CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration.");
 
         if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16())
-            CV_Error(Error::StsError, "The selected CUDA device does not support FP16 operations.");
+        {
+            CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target.");
+            preferableTarget = DNN_TARGET_CUDA;
+        }
 
         if (!cudaInfo)
         {
@@ -2384,7 +2389,6 @@ struct Net::Impl : public detail::NetImplBase
 
             auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers
             cudaInfo = std::unique_ptr<CudaInfo_t>(new CudaInfo_t(std::move(context), std::move(d2h_stream)));
-            cuda4dnn::checkVersions();
         }
 
         cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any