From 6a769c92b3b6bcfb6c312fe4df9dd908fd354ae2 Mon Sep 17 00:00:00 2001
From: Ernest Galbrun <ernest.galbrun@univ-lorraine.fr>
Date: Wed, 23 Jul 2014 14:16:53 +0200
Subject: [PATCH 1/2] modified default stream initialization to allow
 concurrent calls modified cuda surf.cuda.cpp to allow concurrent call

---
 modules/core/src/cuda_buffer_pool.cpp | 31 +++++++++++++++++++++------
 modules/core/src/cuda_stream.cpp      | 16 ++++++++++++--
 modules/nonfree/src/surf.cuda.cpp     |  8 ++++++-
 3 files changed, 45 insertions(+), 10 deletions(-)
diff --git a/modules/core/src/cuda_buffer_pool.cpp b/modules/core/src/cuda_buffer_pool.cpp
index ea060a7c20..e5caf6ef25 100644
--- a/modules/core/src/cuda_buffer_pool.cpp
+++ b/modules/core/src/cuda_buffer_pool.cpp
@@ -207,7 +207,6 @@ namespace
     MemoryStack* MemoryPool::getFreeMemStack()
     {
         AutoLock lock(mtx_);
-
         if (!initialized_)
             initilizeImpl();
 
@@ -256,22 +255,31 @@ namespace
 
 namespace
 {
+    Mutex mtx_;
+    bool memory_pool_manager_initialized;
+
     class MemoryPoolManager
     {
     public:
         MemoryPoolManager();
         ~MemoryPoolManager();
+        void Init();
 
         MemoryPool* getPool(int deviceId);
 
     private:
         std::vector<MemoryPool> pools_;
-    };
+    } manager;
+
+    //MemoryPoolManager ;
 
     MemoryPoolManager::MemoryPoolManager()
     {
-        int deviceCount = getCudaEnabledDeviceCount();
+    }
 
+    void MemoryPoolManager::Init()
+    {
+        int deviceCount = getCudaEnabledDeviceCount();
         if (deviceCount > 0)
             pools_.resize(deviceCount);
     }
@@ -280,7 +288,7 @@ namespace
     {
         for (size_t i = 0; i < pools_.size(); ++i)
         {
-            cudaSetDevice(i);
+            cudaSetDevice(static_cast<int>(i));
             pools_[i].release();
         }
     }
@@ -293,7 +301,14 @@ namespace
 
     MemoryPool* memPool(int deviceId)
     {
-        static MemoryPoolManager manager;
+        {
+            AutoLock lock(mtx_);
+            if (!memory_pool_manager_initialized)
+            {
+                memory_pool_manager_initialized = true;
+                manager.Init();
+            }
+        }
         return manager.getPool(deviceId);
     }
 }
@@ -311,8 +326,10 @@ cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream),
     if (enableMemoryPool)
     {
         const int deviceId = getDevice();
-        memStack_ = memPool(deviceId)->getFreeMemStack();
-
+        {
+            AutoLock lock(mtx_);
+            memStack_ = memPool(deviceId)->getFreeMemStack();
+        }
         DeviceInfo devInfo(deviceId);
         alignment_ = devInfo.textureAlignment();
     }
diff --git a/modules/core/src/cuda_stream.cpp b/modules/core/src/cuda_stream.cpp
index 9f190c3fab..1f73a8e5a5 100644
--- a/modules/core/src/cuda_stream.cpp
+++ b/modules/core/src/cuda_stream.cpp
@@ -190,10 +190,22 @@ void cv::cuda::Stream::enqueueHostCallback(StreamCallback callback, void* userDa
 #endif
 }
 
+namespace 
+{
+    bool default_stream_is_initialized;
+    Mutex mtx;
+    Ptr<Stream> default_stream;
+}
+
 Stream& cv::cuda::Stream::Null()
 {
-    static Stream s(Ptr<Impl>(new Impl(0)));
-    return s;
+    AutoLock lock(mtx);
+    if (!default_stream_is_initialized)
+    {
+        default_stream = Ptr<Stream>(new Stream(Ptr<Impl>(new Impl(0))));
+        default_stream_is_initialized = true;
+    }
+    return *default_stream;
 }
 
 cv::cuda::Stream::operator bool_type() const
diff --git a/modules/nonfree/src/surf.cuda.cpp b/modules/nonfree/src/surf.cuda.cpp
index 4089b506b3..461ba0f7ef 100644
--- a/modules/nonfree/src/surf.cuda.cpp
+++ b/modules/nonfree/src/surf.cuda.cpp
@@ -93,6 +93,8 @@ using namespace ::cv::cuda::device::surf;
 
 namespace
 {
+    Mutex mtx;
+
     int calcSize(int octave, int layer)
     {
         /* Wavelet size at first layer of first octave. */
@@ -166,7 +168,6 @@ namespace
             {
                 const int layer_rows = img_rows >> octave;
                 const int layer_cols = img_cols >> octave;
-
                 loadOctaveConstants(octave, layer_rows, layer_cols);
 
                 icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, img_rows, img_cols, octave, surf_.nOctaveLayers);
@@ -354,6 +355,7 @@ void cv::cuda::SURF_CUDA::downloadDescriptors(const GpuMat& descriptorsGPU, std:
 
 void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
 {
+    AutoLock lock(mtx);
     if (!img.empty())
     {
         SURF_CUDA_Invoker surf(*this, img, mask);
@@ -365,6 +367,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM
 void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
                                    bool useProvidedKeypoints)
 {
+    AutoLock lock(mtx);
     if (!img.empty())
     {
         SURF_CUDA_Invoker surf(*this, img, mask);
@@ -382,6 +385,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM
 
 void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
 {
+    AutoLock lock(mtx);
     GpuMat keypointsGPU;
 
     (*this)(img, mask, keypointsGPU);
@@ -392,6 +396,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std:
 void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
     GpuMat& descriptors, bool useProvidedKeypoints)
 {
+    AutoLock lock(mtx);
     GpuMat keypointsGPU;
 
     if (useProvidedKeypoints)
@@ -405,6 +410,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std:
 void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
     std::vector<float>& descriptors, bool useProvidedKeypoints)
 {
+    AutoLock lock(mtx);
     GpuMat descriptorsGPU;
 
     (*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints);

From 551ab83cf21c727a67151a9316cbfa2fdeccca44 Mon Sep 17 00:00:00 2001
From: Ernest Galbrun <ernest.galbrun@univ-lorraine.fr>
Date: Wed, 23 Jul 2014 14:34:22 +0200
Subject: [PATCH 2/2] trailing whitespace

---
 modules/core/src/cuda_stream.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/core/src/cuda_stream.cpp b/modules/core/src/cuda_stream.cpp
index 1f73a8e5a5..98a29df19b 100644
--- a/modules/core/src/cuda_stream.cpp
+++ b/modules/core/src/cuda_stream.cpp
@@ -190,7 +190,7 @@ void cv::cuda::Stream::enqueueHostCallback(StreamCallback callback, void* userDa
 #endif
 }
 
-namespace 
+namespace
 {
     bool default_stream_is_initialized;
     Mutex mtx;