Merge pull request #2998 from ernest-galbrun:cuda_concurrency

pull/2981/merge
Vadim Pisarevsky 10 years ago
commit 0c749fd7e5
  1. 31
      modules/core/src/cuda_buffer_pool.cpp
  2. 16
      modules/core/src/cuda_stream.cpp
  3. 8
      modules/nonfree/src/surf.cuda.cpp

@ -207,7 +207,6 @@ namespace
MemoryStack* MemoryPool::getFreeMemStack()
{
AutoLock lock(mtx_);
if (!initialized_)
initilizeImpl();
@ -256,22 +255,31 @@ namespace
namespace
{
Mutex mtx_;
bool memory_pool_manager_initialized;
class MemoryPoolManager
{
public:
MemoryPoolManager();
~MemoryPoolManager();
void Init();
MemoryPool* getPool(int deviceId);
private:
std::vector<MemoryPool> pools_;
};
} manager;
//MemoryPoolManager ;
MemoryPoolManager::MemoryPoolManager()
{
int deviceCount = getCudaEnabledDeviceCount();
}
void MemoryPoolManager::Init()
{
int deviceCount = getCudaEnabledDeviceCount();
if (deviceCount > 0)
pools_.resize(deviceCount);
}
@ -280,7 +288,7 @@ namespace
{
for (size_t i = 0; i < pools_.size(); ++i)
{
cudaSetDevice(i);
cudaSetDevice(static_cast<int>(i));
pools_[i].release();
}
}
@ -293,7 +301,14 @@ namespace
MemoryPool* memPool(int deviceId)
{
static MemoryPoolManager manager;
{
AutoLock lock(mtx_);
if (!memory_pool_manager_initialized)
{
memory_pool_manager_initialized = true;
manager.Init();
}
}
return manager.getPool(deviceId);
}
}
@ -311,8 +326,10 @@ cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream),
if (enableMemoryPool)
{
const int deviceId = getDevice();
memStack_ = memPool(deviceId)->getFreeMemStack();
{
AutoLock lock(mtx_);
memStack_ = memPool(deviceId)->getFreeMemStack();
}
DeviceInfo devInfo(deviceId);
alignment_ = devInfo.textureAlignment();
}

@ -190,10 +190,22 @@ void cv::cuda::Stream::enqueueHostCallback(StreamCallback callback, void* userDa
#endif
}
namespace
{
bool default_stream_is_initialized;
Mutex mtx;
Ptr<Stream> default_stream;
}
Stream& cv::cuda::Stream::Null()
{
static Stream s(Ptr<Impl>(new Impl(0)));
return s;
AutoLock lock(mtx);
if (!default_stream_is_initialized)
{
default_stream = Ptr<Stream>(new Stream(Ptr<Impl>(new Impl(0))));
default_stream_is_initialized = true;
}
return *default_stream;
}
cv::cuda::Stream::operator bool_type() const

@ -93,6 +93,8 @@ using namespace ::cv::cuda::device::surf;
namespace
{
Mutex mtx;
int calcSize(int octave, int layer)
{
/* Wavelet size at first layer of first octave. */
@ -166,7 +168,6 @@ namespace
{
const int layer_rows = img_rows >> octave;
const int layer_cols = img_cols >> octave;
loadOctaveConstants(octave, layer_rows, layer_cols);
icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, img_rows, img_cols, octave, surf_.nOctaveLayers);
@ -354,6 +355,7 @@ void cv::cuda::SURF_CUDA::downloadDescriptors(const GpuMat& descriptorsGPU, std:
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
{
AutoLock lock(mtx);
if (!img.empty())
{
SURF_CUDA_Invoker surf(*this, img, mask);
@ -365,6 +367,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints)
{
AutoLock lock(mtx);
if (!img.empty())
{
SURF_CUDA_Invoker surf(*this, img, mask);
@ -382,6 +385,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
{
AutoLock lock(mtx);
GpuMat keypointsGPU;
(*this)(img, mask, keypointsGPU);
@ -392,6 +396,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std:
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
GpuMat& descriptors, bool useProvidedKeypoints)
{
AutoLock lock(mtx);
GpuMat keypointsGPU;
if (useProvidedKeypoints)
@ -405,6 +410,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std:
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
std::vector<float>& descriptors, bool useProvidedKeypoints)
{
AutoLock lock(mtx);
GpuMat descriptorsGPU;
(*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints);

Loading…
Cancel
Save