diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp index 6bca5413f9..bc6dd31438 100644 --- a/modules/core/include/opencv2/core/cuda.hpp +++ b/modules/core/include/opencv2/core/cuda.hpp @@ -61,16 +61,30 @@ namespace cv { namespace cuda { class CV_EXPORTS GpuMat { public: + class CV_EXPORTS Allocator + { + public: + virtual ~Allocator() {} + + // allocator must fill data, step and refcount fields + virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0; + virtual void free(GpuMat* mat) = 0; + }; + + //! default allocator + static Allocator* defaultAllocator(); + static void setDefaultAllocator(Allocator* allocator); + //! default constructor - GpuMat(); + explicit GpuMat(Allocator* allocator = defaultAllocator()); //! constructs GpuMat of the specified size and type - GpuMat(int rows, int cols, int type); - GpuMat(Size size, int type); + GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator()); + GpuMat(Size size, int type, Allocator* allocator = defaultAllocator()); //! constucts GpuMat and fills it with the specified value _s - GpuMat(int rows, int cols, int type, Scalar s); - GpuMat(Size size, int type, Scalar s); + GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator()); + GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator()); //! copy constructor GpuMat(const GpuMat& m); @@ -84,7 +98,7 @@ public: GpuMat(const GpuMat& m, Rect roi); //! builds GpuMat from host memory (Blocking call) - explicit GpuMat(InputArray arr); + explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator()); //! destructor - calls release() ~GpuMat(); @@ -249,6 +263,9 @@ public: //! helper fields used in locateROI and adjustROI uchar* datastart; uchar* dataend; + + //! allocator + Allocator* allocator; }; //! creates continuous matrix @@ -260,6 +277,10 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat); +//! BufferPool management (must be called before Stream creation) +CV_EXPORTS void setBufferPoolUsage(bool on); +CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount); + //////////////////////////////// CudaMem //////////////////////////////// // CudaMem is limited cv::Mat with page locked memory allocation. @@ -382,6 +403,7 @@ private: Stream(const Ptr& impl); friend struct StreamAccessor; + friend class BufferPool; }; class CV_EXPORTS Event diff --git a/modules/core/include/opencv2/core/cuda.inl.hpp b/modules/core/include/opencv2/core/cuda.inl.hpp index 7410074b5e..170d0affb3 100644 --- a/modules/core/include/opencv2/core/cuda.inl.hpp +++ b/modules/core/include/opencv2/core/cuda.inl.hpp @@ -51,29 +51,29 @@ namespace cv { namespace cuda { //////////////////////////////// GpuMat /////////////////////////////// inline -GpuMat::GpuMat() - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +GpuMat::GpuMat(Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) {} inline -GpuMat::GpuMat(int rows_, int cols_, int type_) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) { if (rows_ > 0 && cols_ > 0) create(rows_, cols_, type_); } inline -GpuMat::GpuMat(Size size_, int type_) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) { if (size_.height > 0 && size_.width > 0) create(size_.height, size_.width, type_); } inline -GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) { if (rows_ > 0 && cols_ > 0) { @@ -83,8 +83,8 @@ GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_) } inline -GpuMat::GpuMat(Size size_, int type_, Scalar s_) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) { if (size_.height > 0 && size_.width > 0) { @@ -95,15 +95,15 @@ GpuMat::GpuMat(Size size_, int type_, Scalar s_) inline GpuMat::GpuMat(const GpuMat& m) - : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend) + : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator) { if (refcount) CV_XADD(refcount, 1); } inline -GpuMat::GpuMat(InputArray arr) : - flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +GpuMat::GpuMat(InputArray arr, Allocator* allocator_) : + flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_) { upload(arr); } diff --git a/modules/core/include/opencv2/core/private.cuda.hpp b/modules/core/include/opencv2/core/private.cuda.hpp index 3c45231832..894220ef56 100644 --- a/modules/core/include/opencv2/core/private.cuda.hpp +++ b/modules/core/include/opencv2/core/private.cuda.hpp @@ -90,6 +90,38 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The namespace cv { namespace cuda { + class MemoryStack; + + class CV_EXPORTS StackAllocator : public GpuMat::Allocator + { + public: + explicit StackAllocator(cudaStream_t stream); + ~StackAllocator(); + + bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize); + void free(GpuMat* mat); + + private: + StackAllocator(const StackAllocator&); + StackAllocator& operator =(const StackAllocator&); + + cudaStream_t stream_; + MemoryStack* memStack_; + size_t alignment_; + }; + + class CV_EXPORTS BufferPool + { + public: + explicit BufferPool(Stream& stream); + + GpuMat getBuffer(int rows, int cols, int type); + GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); } + + private: + GpuMat::Allocator* allocator_; + }; + static inline void checkNppError(int code, const char* file, const int line, const char* func) { if (code < 0) diff --git a/modules/core/src/cuda/gpu_mat.cu b/modules/core/src/cuda/gpu_mat.cu index af0a4f5d0d..f4c9bbdca4 100644 --- a/modules/core/src/cuda/gpu_mat.cu +++ b/modules/core/src/cuda/gpu_mat.cu @@ -55,6 +55,54 @@ using namespace cv; using namespace cv::cuda; using namespace cv::cudev; +namespace +{ + class DefaultAllocator : public GpuMat::Allocator + { + public: + bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize); + void free(GpuMat* mat); + }; + + bool DefaultAllocator::allocate(GpuMat* mat, int rows, int cols, size_t elemSize) + { + if (rows > 1 && cols > 1) + { + CV_CUDEV_SAFE_CALL( cudaMallocPitch(&mat->data, &mat->step, elemSize * cols, rows) ); + } + else + { + // Single row or single column must be continuous + CV_CUDEV_SAFE_CALL( cudaMalloc(&mat->data, elemSize * cols * rows) ); + mat->step = elemSize * cols; + } + + mat->refcount = (int*) fastMalloc(sizeof(int)); + + return true; + } + + void DefaultAllocator::free(GpuMat* mat) + { + cudaFree(mat->datastart); + fastFree(mat->refcount); + } + + DefaultAllocator cudaDefaultAllocator; + GpuMat::Allocator* g_defaultAllocator = &cudaDefaultAllocator; +} + +GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator() +{ + return g_defaultAllocator; +} + +void cv::cuda::GpuMat::setDefaultAllocator(Allocator* allocator) +{ + CV_Assert( allocator != 0 ); + g_defaultAllocator = allocator; +} + ///////////////////////////////////////////////////// /// create @@ -76,19 +124,16 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type) rows = _rows; cols = _cols; - size_t esz = elemSize(); + const size_t esz = elemSize(); - void* devPtr; + bool allocSuccess = allocator->allocate(this, rows, cols, esz); - if (rows > 1 && cols > 1) + if (!allocSuccess) { - CV_CUDEV_SAFE_CALL( cudaMallocPitch(&devPtr, &step, esz * cols, rows) ); - } - else - { - // Single row or single column must be continuous - CV_CUDEV_SAFE_CALL( cudaMalloc(&devPtr, esz * cols * rows) ); - step = esz * cols; + // custom allocator fails, try default allocator + allocator = defaultAllocator(); + allocSuccess = allocator->allocate(this, rows, cols, esz); + CV_Assert( allocSuccess ); } if (esz * cols == step) @@ -97,11 +142,11 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type) int64 _nettosize = static_cast(step) * rows; size_t nettosize = static_cast(_nettosize); - datastart = data = static_cast(devPtr); + datastart = data; dataend = data + nettosize; - refcount = static_cast(fastMalloc(sizeof(*refcount))); - *refcount = 1; + if (refcount) + *refcount = 1; } } @@ -110,11 +155,10 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type) void cv::cuda::GpuMat::release() { + CV_DbgAssert( allocator != 0 ); + if (refcount && CV_XADD(refcount, -1) == 1) - { - cudaFree(datastart); - fastFree(refcount); - } + allocator->free(this); data = datastart = dataend = 0; step = rows = cols = 0; diff --git a/modules/core/src/cuda_buffer_pool.cpp b/modules/core/src/cuda_buffer_pool.cpp new file mode 100644 index 0000000000..ea060a7c20 --- /dev/null +++ b/modules/core/src/cuda_buffer_pool.cpp @@ -0,0 +1,418 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::cuda; + +#ifdef HAVE_CUDA + +#include "opencv2/cudev/common.hpp" + +///////////////////////////////////////////////////////////// +/// MemoryStack + +namespace +{ + class MemoryPool; +} + +class cv::cuda::MemoryStack +{ +public: + uchar* requestMemory(size_t size); + void returnMemory(uchar* ptr); + + uchar* datastart; + uchar* dataend; + uchar* tip; + + bool isFree; + MemoryPool* pool; + +#if defined(DEBUG) || defined(_DEBUG) + std::vector allocations; +#endif +}; + +uchar* cv::cuda::MemoryStack::requestMemory(size_t size) +{ + const size_t freeMem = dataend - tip; + + if (size > freeMem) + return 0; + + uchar* ptr = tip; + + tip += size; + +#if defined(DEBUG) || defined(_DEBUG) + allocations.push_back(size); +#endif + + return ptr; +} + +void cv::cuda::MemoryStack::returnMemory(uchar* ptr) +{ + CV_DbgAssert( ptr >= datastart && ptr < dataend ); + +#if defined(DEBUG) || defined(_DEBUG) + const size_t allocSize = tip - ptr; + CV_Assert( allocSize == allocations.back() ); + allocations.pop_back(); +#endif + + tip = ptr; +} + +///////////////////////////////////////////////////////////// +/// MemoryPool + +namespace +{ + class MemoryPool + { + public: + MemoryPool(); + + void initialize(size_t stackSize, int stackCount); + void release(); + + MemoryStack* getFreeMemStack(); + void returnMemStack(MemoryStack* memStack); + + private: + void initilizeImpl(); + + Mutex mtx_; + + bool initialized_; + size_t stackSize_; + int stackCount_; + + uchar* mem_; + + std::vector stacks_; + }; + + MemoryPool::MemoryPool() : initialized_(false), mem_(0) + { + // default : 10 Mb, 5 stacks + stackSize_ = 10 * 1024 * 1024; + stackCount_ = 5; + } + + void MemoryPool::initialize(size_t stackSize, int stackCount) + { + AutoLock lock(mtx_); + + release(); + + stackSize_ = stackSize; + stackCount_ = stackCount; + + initilizeImpl(); + } + + void MemoryPool::initilizeImpl() + { + const size_t totalSize = stackSize_ * stackCount_; + + if (totalSize > 0) + { + cudaError_t err = cudaMalloc(&mem_, totalSize); + if (err != cudaSuccess) + return; + + stacks_.resize(stackCount_); + + uchar* ptr = mem_; + + for (int i = 0; i < stackCount_; ++i) + { + stacks_[i].datastart = ptr; + stacks_[i].dataend = ptr + stackSize_; + stacks_[i].tip = ptr; + stacks_[i].isFree = true; + stacks_[i].pool = this; + + ptr += stackSize_; + } + + initialized_ = true; + } + } + + void MemoryPool::release() + { + if (mem_) + { +#if defined(DEBUG) || defined(_DEBUG) + for (int i = 0; i < stackCount_; ++i) + { + CV_DbgAssert( stacks_[i].isFree ); + CV_DbgAssert( stacks_[i].tip == stacks_[i].datastart ); + } +#endif + + cudaFree( mem_ ); + + mem_ = 0; + initialized_ = false; + } + } + + MemoryStack* MemoryPool::getFreeMemStack() + { + AutoLock lock(mtx_); + + if (!initialized_) + initilizeImpl(); + + if (!mem_) + return 0; + + for (int i = 0; i < stackCount_; ++i) + { + if (stacks_[i].isFree) + { + stacks_[i].isFree = false; + return &stacks_[i]; + } + } + + return 0; + } + + void MemoryPool::returnMemStack(MemoryStack* memStack) + { + AutoLock lock(mtx_); + + CV_DbgAssert( !memStack->isFree ); + +#if defined(DEBUG) || defined(_DEBUG) + bool found = false; + for (int i = 0; i < stackCount_; ++i) + { + if (memStack == &stacks_[i]) + { + found = true; + break; + } + } + CV_DbgAssert( found ); +#endif + + CV_DbgAssert( memStack->tip == memStack->datastart ); + + memStack->isFree = true; + } +} + +///////////////////////////////////////////////////////////// +/// MemoryPoolManager + +namespace +{ + class MemoryPoolManager + { + public: + MemoryPoolManager(); + ~MemoryPoolManager(); + + MemoryPool* getPool(int deviceId); + + private: + std::vector pools_; + }; + + MemoryPoolManager::MemoryPoolManager() + { + int deviceCount = getCudaEnabledDeviceCount(); + + if (deviceCount > 0) + pools_.resize(deviceCount); + } + + MemoryPoolManager::~MemoryPoolManager() + { + for (size_t i = 0; i < pools_.size(); ++i) + { + cudaSetDevice(i); + pools_[i].release(); + } + } + + MemoryPool* MemoryPoolManager::getPool(int deviceId) + { + CV_DbgAssert( deviceId >= 0 && deviceId < static_cast(pools_.size()) ); + return &pools_[deviceId]; + } + + MemoryPool* memPool(int deviceId) + { + static MemoryPoolManager manager; + return manager.getPool(deviceId); + } +} + +///////////////////////////////////////////////////////////// +/// StackAllocator + +namespace +{ + bool enableMemoryPool = true; +} + +cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream), memStack_(0) +{ + if (enableMemoryPool) + { + const int deviceId = getDevice(); + memStack_ = memPool(deviceId)->getFreeMemStack(); + + DeviceInfo devInfo(deviceId); + alignment_ = devInfo.textureAlignment(); + } +} + +cv::cuda::StackAllocator::~StackAllocator() +{ + cudaStreamSynchronize(stream_); + + if (memStack_ != 0) + memStack_->pool->returnMemStack(memStack_); +} + +namespace +{ + size_t alignUp(size_t what, size_t alignment) + { + size_t alignMask = alignment-1; + size_t inverseAlignMask = ~alignMask; + size_t res = (what + alignMask) & inverseAlignMask; + return res; + } +} + +bool cv::cuda::StackAllocator::allocate(GpuMat* mat, int rows, int cols, size_t elemSize) +{ + if (memStack_ == 0) + return false; + + size_t pitch, memSize; + + if (rows > 1 && cols > 1) + { + pitch = alignUp(cols * elemSize, alignment_); + memSize = pitch * rows; + } + else + { + // Single row or single column must be continuous + pitch = elemSize * cols; + memSize = alignUp(elemSize * cols * rows, 64); + } + + uchar* ptr = memStack_->requestMemory(memSize); + + if (ptr == 0) + return false; + + mat->data = ptr; + mat->step = pitch; + mat->refcount = (int*) fastMalloc(sizeof(int)); + + return true; +} + +void cv::cuda::StackAllocator::free(GpuMat* mat) +{ + if (memStack_ == 0) + return; + + memStack_->returnMemory(mat->datastart); + fastFree(mat->refcount); +} + +void cv::cuda::setBufferPoolUsage(bool on) +{ + enableMemoryPool = on; +} + +void cv::cuda::setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount) +{ + const int currentDevice = getDevice(); + + if (deviceId >= 0) + { + setDevice(deviceId); + memPool(deviceId)->initialize(stackSize, stackCount); + } + else + { + const int deviceCount = getCudaEnabledDeviceCount(); + + for (deviceId = 0; deviceId < deviceCount; ++deviceId) + { + setDevice(deviceId); + memPool(deviceId)->initialize(stackSize, stackCount); + } + } + + setDevice(currentDevice); +} + +///////////////////////////////////////////////////////////// +/// BufferPool + +GpuMat cv::cuda::BufferPool::getBuffer(int rows, int cols, int type) +{ + GpuMat buf(allocator_); + buf.create(rows, cols, type); + return buf; +} + +#endif diff --git a/modules/core/src/cuda_gpu_mat.cpp b/modules/core/src/cuda_gpu_mat.cpp index 2303f11ee8..80a7462cb4 100644 --- a/modules/core/src/cuda_gpu_mat.cpp +++ b/modules/core/src/cuda_gpu_mat.cpp @@ -49,7 +49,8 @@ using namespace cv::cuda; cv::cuda::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) : flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(rows_), cols(cols_), step(step_), data((uchar*)data_), refcount(0), - datastart((uchar*)data_), dataend((uchar*)data_) + datastart((uchar*)data_), dataend((uchar*)data_), + allocator(defaultAllocator()) { size_t minstep = cols * elemSize(); @@ -74,7 +75,8 @@ cv::cuda::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t st cv::cuda::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) : flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(size_.height), cols(size_.width), step(step_), data((uchar*)data_), refcount(0), - datastart((uchar*)data_), dataend((uchar*)data_) + datastart((uchar*)data_), dataend((uchar*)data_), + allocator(defaultAllocator()) { size_t minstep = cols * elemSize(); @@ -92,6 +94,7 @@ cv::cuda::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) : flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0; } + dataend += step * (rows - 1) + minstep; } @@ -100,6 +103,7 @@ cv::cuda::GpuMat::GpuMat(const GpuMat& m, Range rowRange_, Range colRange_) flags = m.flags; step = m.step; refcount = m.refcount; data = m.data; datastart = m.datastart; dataend = m.dataend; + allocator = m.allocator; if (rowRange_ == Range::all()) { @@ -139,7 +143,8 @@ cv::cuda::GpuMat::GpuMat(const GpuMat& m, Range rowRange_, Range colRange_) cv::cuda::GpuMat::GpuMat(const GpuMat& m, Rect roi) : flags(m.flags), rows(roi.height), cols(roi.width), step(m.step), data(m.data + roi.y*step), refcount(m.refcount), - datastart(m.datastart), dataend(m.dataend) + datastart(m.datastart), dataend(m.dataend), + allocator(m.allocator) { flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1; data += roi.x * elemSize(); @@ -347,6 +352,17 @@ GpuMat cv::cuda::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat) #ifndef HAVE_CUDA +GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator() +{ + return 0; +} + +void cv::cuda::GpuMat::setDefaultAllocator(Allocator* allocator) +{ + (void) allocator; + throw_no_cuda(); +} + void cv::cuda::GpuMat::create(int _rows, int _cols, int _type) { (void) _rows; diff --git a/modules/core/src/cuda_stream.cpp b/modules/core/src/cuda_stream.cpp index 3fdc83867b..9f190c3fab 100644 --- a/modules/core/src/cuda_stream.cpp +++ b/modules/core/src/cuda_stream.cpp @@ -66,6 +66,7 @@ class cv::cuda::Stream::Impl { public: cudaStream_t stream; + Ptr stackAllocator_; Impl(); Impl(cudaStream_t stream); @@ -73,17 +74,26 @@ public: ~Impl(); }; +cv::cuda::BufferPool::BufferPool(Stream& stream) : allocator_(stream.impl_->stackAllocator_.get()) +{ +} + cv::cuda::Stream::Impl::Impl() : stream(0) { cudaSafeCall( cudaStreamCreate(&stream) ); + + stackAllocator_ = makePtr(stream); } cv::cuda::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_) { + stackAllocator_ = makePtr(stream); } cv::cuda::Stream::Impl::~Impl() { + stackAllocator_.release(); + if (stream) cudaStreamDestroy(stream); } @@ -197,7 +207,7 @@ cv::cuda::Stream::operator bool_type() const //////////////////////////////////////////////////////////////// -// Stream +// Event #ifndef HAVE_CUDA diff --git a/modules/cuda/perf/perf_buffer_pool.cpp b/modules/cuda/perf/perf_buffer_pool.cpp new file mode 100644 index 0000000000..72bd47a070 --- /dev/null +++ b/modules/cuda/perf/perf_buffer_pool.cpp @@ -0,0 +1,114 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +#ifdef HAVE_CUDA + +#include "opencv2/cudaarithm.hpp" +#include "opencv2/core/private.cuda.hpp" + +using namespace testing; +using namespace perf; +using namespace cv; +using namespace cv::cuda; + +namespace +{ + void func1(const GpuMat& src, GpuMat& dst, Stream& stream) + { + BufferPool pool(stream); + + GpuMat buf = pool.getBuffer(src.size(), CV_32FC(src.channels())); + + src.convertTo(buf, CV_32F, 1.0 / 255.0, stream); + + cuda::exp(buf, dst, stream); + } + + void func2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) + { + BufferPool pool(stream); + + GpuMat buf1 = pool.getBuffer(src1.size(), CV_32FC(src1.channels())); + + func1(src1, buf1, stream); + + GpuMat buf2 = pool.getBuffer(src2.size(), CV_32FC(src2.channels())); + + func1(src2, buf2, stream); + + cuda::add(buf1, buf2, dst, noArray(), -1, stream); + } +} + +PERF_TEST_P(Sz, BufferPool, CUDA_TYPICAL_MAT_SIZES) +{ + static bool first = true; + + const Size size = GetParam(); + + const bool useBufferPool = PERF_RUN_CUDA(); + + Mat host_src(size, CV_8UC1); + declare.in(host_src, WARMUP_RNG); + + GpuMat src1(host_src), src2(host_src); + GpuMat dst; + + setBufferPoolUsage(useBufferPool); + if (useBufferPool && first) + { + setBufferPoolConfig(-1, 25 * 1024 * 1024, 2); + first = false; + } + + TEST_CYCLE() + { + func2(src1, src2, dst, Stream::Null()); + } + + Mat h_dst(dst); + SANITY_CHECK(h_dst); +} + +#endif diff --git a/modules/cuda/test/test_buffer_pool.cpp b/modules/cuda/test/test_buffer_pool.cpp new file mode 100644 index 0000000000..2526358d95 --- /dev/null +++ b/modules/cuda/test/test_buffer_pool.cpp @@ -0,0 +1,120 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +#ifdef HAVE_CUDA + +#include "opencv2/cudaarithm.hpp" +#include "opencv2/cudawarping.hpp" +#include "opencv2/core/private.cuda.hpp" + +using namespace testing; +using namespace cv; +using namespace cv::cuda; + +struct BufferPoolTest : TestWithParam +{ +}; + +namespace +{ + void func1(const GpuMat& src, GpuMat& dst, Stream& stream) + { + BufferPool pool(stream); + + GpuMat buf = pool.getBuffer(src.size(), CV_32FC(src.channels())); + + src.convertTo(buf, CV_32F, 1.0 / 255.0, stream); + + cuda::exp(buf, dst, stream); + } + + void func2(const GpuMat& src, GpuMat& dst, Stream& stream) + { + BufferPool pool(stream); + + GpuMat buf1 = pool.getBuffer(saturate_cast(src.rows * 0.5), saturate_cast(src.cols * 0.5), src.type()); + + cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST, stream); + + GpuMat buf2 = pool.getBuffer(buf1.size(), CV_32FC(buf1.channels())); + + func1(buf1, buf2, stream); + + GpuMat buf3 = pool.getBuffer(src.size(), buf2.type()); + + cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST, stream); + + buf3.convertTo(dst, CV_8U, stream); + } +} + +CUDA_TEST_P(BufferPoolTest, SimpleUsage) +{ + DeviceInfo devInfo = GetParam(); + setDevice(devInfo.deviceID()); + + GpuMat src(200, 200, CV_8UC1); + GpuMat dst; + + Stream stream; + + func2(src, dst, stream); + + stream.waitForCompletion(); + + GpuMat buf, buf1, buf2, buf3; + GpuMat dst_gold; + + cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST); + buf1.convertTo(buf, CV_32F, 1.0 / 255.0); + cuda::exp(buf, buf2); + cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST); + buf3.convertTo(dst_gold, CV_8U); + + ASSERT_MAT_NEAR(dst_gold, dst, 0); +} + +INSTANTIATE_TEST_CASE_P(CUDA_Stream, BufferPoolTest, ALL_DEVICES); + +#endif // HAVE_CUDA