From 843df494c7eec6492d1e7c46b6612c0cdb321e9a Mon Sep 17 00:00:00 2001 From: Kevin Christensen Date: Tue, 9 Aug 2022 20:21:39 -0700 Subject: [PATCH 1/2] fix cuda mem leak and move cuda malloc out of critical path --- modules/cudaimgproc/src/cuda/gftt.cu | 13 +++++-------- modules/cudaimgproc/src/gftt.cpp | 14 +++++++++++--- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/modules/cudaimgproc/src/cuda/gftt.cu b/modules/cudaimgproc/src/cuda/gftt.cu index 66bd6e0db..b56556110 100644 --- a/modules/cudaimgproc/src/cuda/gftt.cu +++ b/modules/cudaimgproc/src/cuda/gftt.cu @@ -87,25 +87,22 @@ namespace cv { namespace cuda { namespace device } } - int findCorners_gpu(const cudaTextureObject_t &eigTex, const int &rows, const int &cols, float threshold, PtrStepSzb mask, float2* corners, int max_count, cudaStream_t stream) + int findCorners_gpu(const cudaTextureObject_t &eigTex, const int &rows, const int &cols, float threshold, PtrStepSzb mask, float2* corners, int max_count, int* counterPtr, cudaStream_t stream) { - int* counter_ptr; - cudaSafeCall( cudaMalloc(&counter_ptr, sizeof(int)) ); - - cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) ); + cudaSafeCall( cudaMemsetAsync(counterPtr, 0, sizeof(int), stream) ); dim3 block(16, 16); dim3 grid(divUp(cols, block.x), divUp(rows, block.y)); if (mask.data) - findCorners<<>>(threshold, SingleMask(mask), corners, max_count, rows, cols, eigTex, counter_ptr); + findCorners<<>>(threshold, SingleMask(mask), corners, max_count, rows, cols, eigTex, counterPtr); else - findCorners<<>>(threshold, WithOutMask(), corners, max_count, rows, cols, eigTex, counter_ptr); + findCorners<<>>(threshold, WithOutMask(), corners, max_count, rows, cols, eigTex, counterPtr); cudaSafeCall( cudaGetLastError() ); int count; - cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) ); + cudaSafeCall( cudaMemcpyAsync(&count, counterPtr, sizeof(int), cudaMemcpyDeviceToHost, stream) ); if (stream) cudaSafeCall(cudaStreamSynchronize(stream)); else diff --git a/modules/cudaimgproc/src/gftt.cpp b/modules/cudaimgproc/src/gftt.cpp index f25158a68..544cd8834 100644 --- a/modules/cudaimgproc/src/gftt.cpp +++ b/modules/cudaimgproc/src/gftt.cpp @@ -55,7 +55,7 @@ namespace cv { namespace cuda { namespace device { namespace gfft { - int findCorners_gpu(const cudaTextureObject_t &eigTex_, const int &rows, const int &cols, float threshold, PtrStepSzb mask, float2* corners, int max_count, cudaStream_t stream); + int findCorners_gpu(const cudaTextureObject_t &eigTex_, const int &rows, const int &cols, float threshold, PtrStepSzb mask, float2* corners, int max_count, int* counterPtr, cudaStream_t stream); void sortCorners_gpu(const cudaTextureObject_t &eigTex_, float2* corners, int count, cudaStream_t stream); } }}} @@ -67,7 +67,7 @@ namespace public: GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance, int blockSize, bool useHarrisDetector, double harrisK); - + ~GoodFeaturesToTrackDetector(); void detect(InputArray image, OutputArray corners, InputArray mask, Stream& stream); private: @@ -82,6 +82,8 @@ namespace GpuMat buf_; GpuMat eig_; GpuMat tmpCorners_; + + int* counterPtr_; }; GoodFeaturesToTrackDetector::GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance, @@ -93,6 +95,12 @@ namespace cornerCriteria_ = useHarrisDetector ? cuda::createHarrisCorner(srcType, blockSize, 3, harrisK) : cuda::createMinEigenValCorner(srcType, blockSize, 3); + cudaSafeCall(cudaMalloc(&counterPtr_, sizeof(int))); + } + + GoodFeaturesToTrackDetector::~GoodFeaturesToTrackDetector() + { + cudaSafeCall(cudaFree(counterPtr_)); } void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask, Stream& stream) @@ -125,7 +133,7 @@ namespace PtrStepSzf eig = eig_; cv::cuda::device::createTextureObjectPitch2D(&eigTex_, eig, texDesc); - int total = findCorners_gpu(eigTex_, eig_.rows, eig_.cols, static_cast(maxVal * qualityLevel_), mask, tmpCorners_.ptr(), tmpCorners_.cols, stream_); + int total = findCorners_gpu(eigTex_, eig_.rows, eig_.cols, static_cast(maxVal * qualityLevel_), mask, tmpCorners_.ptr(), tmpCorners_.cols, counterPtr_, stream_); if (total == 0) From 1d1dbe37f49704811af1528b9e51e1277cf61aa8 Mon Sep 17 00:00:00 2001 From: Kevin Christensen Date: Wed, 10 Aug 2022 07:17:37 -0700 Subject: [PATCH 2/2] destroy texture object --- modules/cudaimgproc/src/gftt.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/cudaimgproc/src/gftt.cpp b/modules/cudaimgproc/src/gftt.cpp index 544cd8834..ae19087aa 100644 --- a/modules/cudaimgproc/src/gftt.cpp +++ b/modules/cudaimgproc/src/gftt.cpp @@ -135,15 +135,17 @@ namespace int total = findCorners_gpu(eigTex_, eig_.rows, eig_.cols, static_cast(maxVal * qualityLevel_), mask, tmpCorners_.ptr(), tmpCorners_.cols, counterPtr_, stream_); - if (total == 0) { _corners.release(); + cudaSafeCall( cudaDestroyTextureObject(eigTex_) ); return; } sortCorners_gpu(eigTex_, tmpCorners_.ptr(), total, stream_); + cudaSafeCall( cudaDestroyTextureObject(eigTex_) ); + if (minDistance_ < 1) { tmpCorners_.colRange(0, maxCorners_ > 0 ? std::min(maxCorners_, total) : total).copyTo(_corners, stream);