From c494a5db92083c2525615c61d87f6f7de38df6b3 Mon Sep 17 00:00:00 2001 From: Vishal Bhaskar Chiluka Date: Mon, 5 Aug 2019 14:37:11 +0530 Subject: [PATCH] Lazy loading nvcuda.dll --- modules/cudaoptflow/src/nvidiaOpticalFlow.cpp | 160 +++++++++++++----- 1 file changed, 120 insertions(+), 40 deletions(-) diff --git a/modules/cudaoptflow/src/nvidiaOpticalFlow.cpp b/modules/cudaoptflow/src/nvidiaOpticalFlow.cpp index 7b3aa0699..afe91e609 100644 --- a/modules/cudaoptflow/src/nvidiaOpticalFlow.cpp +++ b/modules/cudaoptflow/src/nvidiaOpticalFlow.cpp @@ -29,11 +29,14 @@ CV_Error(cv::Error::HeaderIsNull, "Nvidia Optical Flow headers not found. Make s //macro for dll loading #if defined(_WIN64) -#define MODULENAME TEXT("nvofapi64.dll") +#define OF_MODULENAME TEXT("nvofapi64.dll") +#define CUDA_MODULENAME TEXT("nvcuda.dll") #elif defined(_WIN32) -#define MODULENAME TEXT("nvofapi.dll") +#define OF_MODULENAME TEXT("nvofapi.dll") +#define CUDA_MODULENAME TEXT("nvcuda.dll") #else -#define MODULENAME "libnvidia-opticalflow.so.1" +#define OF_MODULENAME "libnvidia-opticalflow.so.1" +#define CUDA_MODULENAME "libcuda.so" #endif #define NVOF_API_CALL(nvOFAPI) \ @@ -112,6 +115,114 @@ using namespace cv::cuda; namespace { +class LoadNvidiaModules +{ +private: + typedef int(*PFNCudaCuCtxGetCurrent)(CUcontext*); + typedef NV_OF_STATUS(NVOFAPI *PFNNvOFAPICreateInstanceCuda) + (uint32_t apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf); + + PFNCudaCuCtxGetCurrent m_cudaDriverAPIGetCurrentCtx; + PFNNvOFAPICreateInstanceCuda m_NvOFAPICreateInstanceCuda; + HMODULE m_hOFModule; + HMODULE m_hCudaModule; + bool m_isFailed; + + LoadNvidiaModules() : + m_cudaDriverAPIGetCurrentCtx(NULL), + m_NvOFAPICreateInstanceCuda(NULL), + m_isFailed(false) + { +//Loading Cuda Library +#if defined(_WIN32) || defined(_WIN64) + HMODULE hCudaModule = LoadLibrary(CUDA_MODULENAME); +#else + void *hCudaModule = dlopen(CUDA_MODULENAME, RTLD_LAZY); +#endif + + if (hCudaModule == NULL) + { + m_isFailed = true; + CV_Error(Error::StsBadFunc, "Cannot find Cuda library."); + } + m_hCudaModule = hCudaModule; + +#if defined(_WIN32) + m_cudaDriverAPIGetCurrentCtx = (PFNCudaCuCtxGetCurrent)GetProcAddress(m_hCudaModule, "cuCtxGetCurrent"); +#else + m_cudaDriverAPIGetCurrentCtx = (PFNCudaCuCtxGetCurrent)dlsym(m_hCudaModule, "cuCtxGetCurrent"); +#endif + if (!m_cudaDriverAPIGetCurrentCtx) + { + m_isFailed = true; + CV_Error(Error::StsBadFunc, + "Cannot find Cuda Driver API : cuCtxGetCurrent() entry in Cuda library"); + } + +//Loading Optical Flow Library +#if defined(_WIN32) || defined(_WIN64) + HMODULE hOFModule = LoadLibrary(OF_MODULENAME); +#else + void *hOFModule = dlopen(OF_MODULENAME, RTLD_LAZY); +#endif + + if (hOFModule == NULL) + { + m_isFailed = true; + CV_Error(Error::StsBadFunc, "Cannot find NvOF library."); + } + m_hOFModule = hOFModule; + +#if defined(_WIN32) + m_NvOFAPICreateInstanceCuda = (PFNNvOFAPICreateInstanceCuda)GetProcAddress(m_hOFModule, "NvOFAPICreateInstanceCuda"); +#else + m_NvOFAPICreateInstanceCuda = (PFNNvOFAPICreateInstanceCuda)dlsym(m_hOFModule, "NvOFAPICreateInstanceCuda"); +#endif + if (!m_NvOFAPICreateInstanceCuda) + { + m_isFailed = true; + CV_Error(Error::StsBadFunc, + "Cannot find NvOFAPICreateInstanceCuda() entry in NVOF library"); + } + }; + + ~LoadNvidiaModules() + { + if (NULL != m_hCudaModule) + { +#if defined(_WIN32) || defined(_WIN64) + FreeLibrary(m_hCudaModule); +#else + dlclose(m_hCudaModule); +#endif + } + if (NULL != m_hOFModule) + { +#if defined(_WIN32) || defined(_WIN64) + FreeLibrary(m_hOFModule); +#else + dlclose(m_hOFModule); +#endif + } + m_hCudaModule = NULL; + m_hOFModule = NULL; + m_cudaDriverAPIGetCurrentCtx = NULL; + m_NvOFAPICreateInstanceCuda = NULL; + } + +public: + static LoadNvidiaModules& Init() + { + static LoadNvidiaModules LoadLibraryObj; + if (LoadLibraryObj.m_isFailed) + CV_Error(Error::StsError, "Can't initialize LoadNvidiaModules Class Object"); + return LoadLibraryObj; + } + + PFNCudaCuCtxGetCurrent GetCudaLibraryFunctionPtr() { return m_cudaDriverAPIGetCurrentCtx; } + PFNNvOFAPICreateInstanceCuda GetOFLibraryFunctionPtr() { return m_NvOFAPICreateInstanceCuda; } +}; + class NvidiaOpticalFlowImpl : public cv::cuda::NvidiaOpticalFlow_1_0 { private: @@ -169,7 +280,6 @@ private: NvOFHandle GetHandle() { return m_hOF; } protected: - HMODULE m_hModule; //module handle to load nvof dll std::mutex m_lock; public: @@ -198,6 +308,8 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl( m_cuContext(nullptr), m_format(NV_OF_BUFFER_FORMAT_GRAYSCALE8), m_gridSize(NV_OF_OUTPUT_VECTOR_GRID_SIZE_4) { + LoadNvidiaModules& LoadNvidiaModulesObj = LoadNvidiaModules::Init(); + int nGpu = 0; cuSafeCall(cudaGetDeviceCount(&nGpu)); @@ -208,7 +320,8 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl( cuSafeCall(cudaSetDevice(m_gpuId)); cuSafeCall(cudaFree(m_cuContext)); - cuSafeCall(cuCtxGetCurrent(&m_cuContext)); + + cuSafeCall(LoadNvidiaModulesObj.GetCudaLibraryFunctionPtr()(&m_cuContext)); if (m_gridSize != NV_OF_OUTPUT_VECTOR_GRID_SIZE_4) { @@ -253,38 +366,9 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl( m_costBufElementSize = sizeof(uint32_t); } -#if defined(_WIN32) || defined(_WIN64) -HMODULE hModule = LoadLibrary(MODULENAME); -#else -void *hModule = dlopen(MODULENAME, RTLD_LAZY); -#endif - - if (hModule == NULL) - { - CV_Error(Error::StsBadFunc, - "Cannot find NvOF library."); - } - m_hModule = hModule; - - typedef NV_OF_STATUS(NVOFAPI *PFNNvOFAPICreateInstanceCuda) - (uint32_t apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf); - -#if defined(_WIN32) -PFNNvOFAPICreateInstanceCuda NvOFAPICreateInstanceCuda - = (PFNNvOFAPICreateInstanceCuda)GetProcAddress(m_hModule, "NvOFAPICreateInstanceCuda"); -#else -PFNNvOFAPICreateInstanceCuda NvOFAPICreateInstanceCuda - = (PFNNvOFAPICreateInstanceCuda)dlsym(m_hModule, "NvOFAPICreateInstanceCuda"); -#endif - if (!NvOFAPICreateInstanceCuda) - { - CV_Error(Error::StsBadFunc, - "Cannot find NvOFAPICreateInstanceCuda() entry in NVOF library"); - } - m_ofAPI.reset(new NV_OF_CUDA_API_FUNCTION_LIST()); - NVOF_API_CALL(NvOFAPICreateInstanceCuda(NV_OF_API_VERSION, m_ofAPI.get())); + NVOF_API_CALL(LoadNvidiaModulesObj.GetOFLibraryFunctionPtr()(NV_OF_API_VERSION, m_ofAPI.get())); NVOF_API_CALL(GetAPI()->nvCreateOpticalFlowCuda(m_cuContext, &m_hOF)); memset(&m_initParams, 0, sizeof(m_initParams)); @@ -416,9 +500,7 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu } } - cuSafeCall(cuCtxPushCurrent(m_cuContext)); inputStream.waitForCompletion(); - cuSafeCall(cuCtxPopCurrent(&m_cuContext)); //Execute Call NV_OF_EXECUTE_INPUT_PARAMS exeInParams; @@ -436,9 +518,7 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu m_hCostBuffer : nullptr;; NVOF_API_CALL(GetAPI()->nvOFExecute(GetHandle(), &exeInParams, &exeOutParams)); - cuSafeCall(cuCtxPushCurrent(m_cuContext)); outputStream.waitForCompletion(); - cuSafeCall(cuCtxPopCurrent(&m_cuContext)); if (_flow.isMat()) flowXYGpuMat.download(_flow); @@ -460,7 +540,7 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu else CV_Error(Error::StsBadArg, "Incorrect cost buffer passed. Pass Mat or GpuMat"); } - cuSafeCall(cuCtxSynchronize()); + cuSafeCall(cudaDeviceSynchronize()); } void NvidiaOpticalFlowImpl::collectGarbage()