From 5ea8085220ae32a6b368f8a00146b73e39af3fb2 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 7 Oct 2013 18:25:55 +0400 Subject: [PATCH] added BufferPool class --- modules/core/include/opencv2/core/cuda.hpp | 1 + .../include/opencv2/core/private.cuda.hpp | 24 ++++++++++---- modules/core/src/cuda_buffer_pool.cpp | 31 ++++++++++--------- modules/core/src/cuda_stream.cpp | 10 ++++++ modules/cuda/perf/perf_buffer_pool.cpp | 10 +++--- modules/cuda/test/test_buffer_pool.cpp | 18 +++++------ 6 files changed, 60 insertions(+), 34 deletions(-) diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp index 90c21cf64c..1911959d67 100644 --- a/modules/core/include/opencv2/core/cuda.hpp +++ b/modules/core/include/opencv2/core/cuda.hpp @@ -398,6 +398,7 @@ private: Stream(const Ptr& impl); friend struct StreamAccessor; + friend class BufferPool; }; class CV_EXPORTS Event diff --git a/modules/core/include/opencv2/core/private.cuda.hpp b/modules/core/include/opencv2/core/private.cuda.hpp index 3760c18880..20888849dc 100644 --- a/modules/core/include/opencv2/core/private.cuda.hpp +++ b/modules/core/include/opencv2/core/private.cuda.hpp @@ -92,24 +92,36 @@ namespace cv { namespace cuda { class MemoryStack; - class CV_EXPORTS BufferAllocator : public GpuMat::Allocator + class CV_EXPORTS StackAllocator : public GpuMat::Allocator { public: - explicit BufferAllocator(Stream& stream); - ~BufferAllocator(); + explicit StackAllocator(cudaStream_t stream); + ~StackAllocator(); bool allocate(uchar** devPtr, size_t* step, int** refcount, int rows, int cols, size_t elemSize); void free(uchar* devPtr, int* refcount); private: - BufferAllocator(const BufferAllocator&); - BufferAllocator& operator =(const BufferAllocator&); + StackAllocator(const StackAllocator&); + StackAllocator& operator =(const StackAllocator&); + cudaStream_t stream_; MemoryStack* memStack_; - Stream stream_; size_t alignment_; }; + class CV_EXPORTS BufferPool + { + public: + explicit BufferPool(Stream& stream); + + GpuMat getBuffer(int rows, int cols, int type); + GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); } + + private: + GpuMat::Allocator* allocator_; + }; + CV_EXPORTS void setBufferAllocatorUsage(bool on); CV_EXPORTS void allocateMemoryPool(int deviceId, size_t stackSize, int stackCount); diff --git a/modules/core/src/cuda_buffer_pool.cpp b/modules/core/src/cuda_buffer_pool.cpp index 162811eba0..dd24f4f2d6 100644 --- a/modules/core/src/cuda_buffer_pool.cpp +++ b/modules/core/src/cuda_buffer_pool.cpp @@ -299,14 +299,14 @@ namespace } ///////////////////////////////////////////////////////////// -/// BufferAllocator +/// StackAllocator namespace { bool enableMemoryPool = true; } -cv::cuda::BufferAllocator::BufferAllocator(Stream& stream) : memStack_(0), stream_(stream) +cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream), memStack_(0) { if (enableMemoryPool) { @@ -318,19 +318,12 @@ cv::cuda::BufferAllocator::BufferAllocator(Stream& stream) : memStack_(0), strea } } -namespace +cv::cuda::StackAllocator::~StackAllocator() { - void CUDART_CB returnMemStackCallback(cudaStream_t, cudaError_t, void* userData) - { - MemoryStack* memStack = static_cast(userData); - memStack->pool->returnMemStack(memStack); - } -} + cudaStreamSynchronize(stream_); -cv::cuda::BufferAllocator::~BufferAllocator() -{ if (memStack_ != 0) - CV_CUDEV_SAFE_CALL( cudaStreamAddCallback(StreamAccessor::getStream(stream_), returnMemStackCallback, memStack_, 0) ); + memStack_->pool->returnMemStack(memStack_); } namespace @@ -344,7 +337,7 @@ namespace } } -bool cv::cuda::BufferAllocator::allocate(uchar** devPtr, size_t* step, int** refcount, int rows, int cols, size_t elemSize) +bool cv::cuda::StackAllocator::allocate(uchar** devPtr, size_t* step, int** refcount, int rows, int cols, size_t elemSize) { if (memStack_ == 0) return false; @@ -376,7 +369,7 @@ bool cv::cuda::BufferAllocator::allocate(uchar** devPtr, size_t* step, int** ref return true; } -void cv::cuda::BufferAllocator::free(uchar* devPtr, int* refcount) +void cv::cuda::StackAllocator::free(uchar* devPtr, int* refcount) { if (memStack_ == 0) return; @@ -413,4 +406,14 @@ void cv::cuda::allocateMemoryPool(int deviceId, size_t stackSize, int stackCount setDevice(currentDevice); } +///////////////////////////////////////////////////////////// +/// BufferPool + +GpuMat cv::cuda::BufferPool::getBuffer(int rows, int cols, int type) +{ + GpuMat buf(allocator_); + buf.create(rows, cols, type); + return buf; +} + #endif diff --git a/modules/core/src/cuda_stream.cpp b/modules/core/src/cuda_stream.cpp index be298919b7..9f190c3fab 100644 --- a/modules/core/src/cuda_stream.cpp +++ b/modules/core/src/cuda_stream.cpp @@ -66,6 +66,7 @@ class cv::cuda::Stream::Impl { public: cudaStream_t stream; + Ptr stackAllocator_; Impl(); Impl(cudaStream_t stream); @@ -73,17 +74,26 @@ public: ~Impl(); }; +cv::cuda::BufferPool::BufferPool(Stream& stream) : allocator_(stream.impl_->stackAllocator_.get()) +{ +} + cv::cuda::Stream::Impl::Impl() : stream(0) { cudaSafeCall( cudaStreamCreate(&stream) ); + + stackAllocator_ = makePtr(stream); } cv::cuda::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_) { + stackAllocator_ = makePtr(stream); } cv::cuda::Stream::Impl::~Impl() { + stackAllocator_.release(); + if (stream) cudaStreamDestroy(stream); } diff --git a/modules/cuda/perf/perf_buffer_pool.cpp b/modules/cuda/perf/perf_buffer_pool.cpp index 9bab24bbc8..7a0dca3894 100644 --- a/modules/cuda/perf/perf_buffer_pool.cpp +++ b/modules/cuda/perf/perf_buffer_pool.cpp @@ -56,9 +56,9 @@ namespace { void func1(const GpuMat& src, GpuMat& dst, Stream& stream) { - BufferAllocator bufAlloc(stream); + BufferPool pool(stream); - GpuMat buf(&bufAlloc); + GpuMat buf = pool.getBuffer(src.size(), CV_32FC(src.channels())); src.convertTo(buf, CV_32F, 1.0 / 255.0, stream); @@ -67,13 +67,13 @@ namespace void func2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) { - BufferAllocator bufAlloc(stream); + BufferPool pool(stream); - GpuMat buf1(&bufAlloc); + GpuMat buf1 = pool.getBuffer(src1.size(), CV_32FC(src1.channels())); func1(src1, buf1, stream); - GpuMat buf2(&bufAlloc); + GpuMat buf2 = pool.getBuffer(src2.size(), CV_32FC(src2.channels())); func1(src2, buf2, stream); diff --git a/modules/cuda/test/test_buffer_pool.cpp b/modules/cuda/test/test_buffer_pool.cpp index ea3ca80e49..2526358d95 100644 --- a/modules/cuda/test/test_buffer_pool.cpp +++ b/modules/cuda/test/test_buffer_pool.cpp @@ -52,7 +52,7 @@ using namespace testing; using namespace cv; using namespace cv::cuda; -struct BufferPool : TestWithParam +struct BufferPoolTest : TestWithParam { }; @@ -60,9 +60,9 @@ namespace { void func1(const GpuMat& src, GpuMat& dst, Stream& stream) { - BufferAllocator bufAlloc(stream); + BufferPool pool(stream); - GpuMat buf(&bufAlloc); + GpuMat buf = pool.getBuffer(src.size(), CV_32FC(src.channels())); src.convertTo(buf, CV_32F, 1.0 / 255.0, stream); @@ -71,17 +71,17 @@ namespace void func2(const GpuMat& src, GpuMat& dst, Stream& stream) { - BufferAllocator bufAlloc(stream); + BufferPool pool(stream); - GpuMat buf1(&bufAlloc); + GpuMat buf1 = pool.getBuffer(saturate_cast(src.rows * 0.5), saturate_cast(src.cols * 0.5), src.type()); cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST, stream); - GpuMat buf2(&bufAlloc); + GpuMat buf2 = pool.getBuffer(buf1.size(), CV_32FC(buf1.channels())); func1(buf1, buf2, stream); - GpuMat buf3(&bufAlloc); + GpuMat buf3 = pool.getBuffer(src.size(), buf2.type()); cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST, stream); @@ -89,7 +89,7 @@ namespace } } -CUDA_TEST_P(BufferPool, Test) +CUDA_TEST_P(BufferPoolTest, SimpleUsage) { DeviceInfo devInfo = GetParam(); setDevice(devInfo.deviceID()); @@ -115,6 +115,6 @@ CUDA_TEST_P(BufferPool, Test) ASSERT_MAT_NEAR(dst_gold, dst, 0); } -INSTANTIATE_TEST_CASE_P(CUDA_Stream, BufferPool, ALL_DEVICES); +INSTANTIATE_TEST_CASE_P(CUDA_Stream, BufferPoolTest, ALL_DEVICES); #endif // HAVE_CUDA