@ -329,11 +329,120 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr
/** @brief BufferPool for use with CUDA streams
/** @brief BufferPool for use with CUDA streams
* BufferPool utilizes cuda : : Stream ' s allocator to create new buffers . It is
BufferPool utilizes Stream ' s allocator to create new buffers for GpuMat ' s . It is
* particularly useful when BufferPoolUsage is set to true , or a custom
only useful when enabled with # setBufferPoolUsage .
* allocator is specified for the cuda : : Stream , and you want to implement your
* own stream based functions utilizing the same underlying GPU memory
@ code
* management .
setBufferPoolUsage ( true ) ;
@ endcode
@ note # setBufferPoolUsage must be called \ em before any Stream declaration .
Users may specify custom allocator for Stream and may implement their own stream based
functions utilizing the same underlying GPU memory management .
If custom allocator is not specified , BufferPool utilizes StackAllocator by
default . StackAllocator allocates a chunk of GPU device memory beforehand ,
and when GpuMat is declared later on , it is given the pre - allocated memory .
This kind of strategy reduces the number of calls for memory allocating APIs
such as cudaMalloc or cudaMallocPitch .
Below is an example that utilizes BufferPool with StackAllocator :
@ code
# include <opencv2/opencv.hpp>
using namespace cv ;
using namespace cv : : cuda
int main ( )
{
setBufferPoolUsage ( true ) ; // Tell OpenCV that we are going to utilize BufferPool
setBufferPoolConfig ( getDevice ( ) , 1024 * 1024 * 64 , 2 ) ; // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
Stream stream1 , stream2 ; // Each stream uses 1 stack
BufferPool pool1 ( stream1 ) , pool2 ( stream2 ) ;
GpuMat d_src1 = pool1 . getBuffer ( 4096 , 4096 , CV_8UC1 ) ; // 16MB
GpuMat d_dst1 = pool1 . getBuffer ( 4096 , 4096 , CV_8UC3 ) ; // 48MB, pool1 is now full
GpuMat d_src2 = pool2 . getBuffer ( 1024 , 1024 , CV_8UC1 ) ; // 1MB
GpuMat d_dst2 = pool2 . getBuffer ( 1024 , 1024 , CV_8UC3 ) ; // 3MB
cvtColor ( d_src1 , d_dst1 , CV_GRAY2BGR , 0 , stream1 ) ;
cvtColor ( d_src2 , d_dst2 , CV_GRAY2BGR , 0 , stream2 ) ;
}
@ endcode
If we allocate another GpuMat on pool1 in the above example , it will be carried out by
the DefaultAllocator since the stack for pool1 is full .
@ code
GpuMat d_add1 = pool1 . getBuffer ( 1024 , 1024 , CV_8UC1 ) ; // Stack for pool1 is full, memory is allocated with DefaultAllocator
@ endcode
If a third stream is declared in the above example , allocating with # getBuffer
within that stream will also be carried out by the DefaultAllocator becuase we ' ve run out of
stacks .
@ code
Stream stream3 ; // Only 2 stacks were allocated, we've run out of stacks
BufferPool pool3 ( stream3 ) ;
GpuMat d_src3 = pool3 . getBuffer ( 1024 , 1024 , CV_8UC1 ) ; // Memory is allocated with DefaultAllocator
@ endcode
@ warning When utilizing StackAllocator , deallocation order is important .
Just like a stack , deallocation must be done in LIFO order . Below is an example of
erroneous usage that violates LIFO rule . If OpenCV is compiled in Debug mode , this
sample code will emit CV_Assert error .
@ code
int main ( )
{
setBufferPoolUsage ( true ) ; // Tell OpenCV that we are going to utilize BufferPool
Stream stream ; // A default size (10 MB) stack is allocated to this stream
BufferPool pool ( stream ) ;
GpuMat mat1 = pool . getBuffer ( 1024 , 1024 , CV_8UC1 ) ; // Allocate mat1 (1MB)
GpuMat mat2 = pool . getBuffer ( 1024 , 1024 , CV_8UC1 ) ; // Allocate mat2 (1MB)
mat1 . release ( ) ; // erroneous usage : mat2 must be deallocated before mat1
}
@ endcode
Since C + + local variables are destroyed in the reverse order of construction ,
the code sample below satisfies the LIFO rule . Local GpuMat ' s are deallocated
and the corresponding memory is automatically returned to the pool for later usage .
@ code
int main ( )
{
setBufferPoolUsage ( true ) ; // Tell OpenCV that we are going to utilize BufferPool
setBufferPoolConfig ( getDevice ( ) , 1024 * 1024 * 64 , 2 ) ; // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
Stream stream1 , stream2 ; // Each stream uses 1 stack
BufferPool pool1 ( stream1 ) , pool2 ( stream2 ) ;
for ( int i = 0 ; i < 10 ; i + + )
{
GpuMat d_src1 = pool1 . getBuffer ( 4096 , 4096 , CV_8UC1 ) ; // 16MB
GpuMat d_dst1 = pool1 . getBuffer ( 4096 , 4096 , CV_8UC3 ) ; // 48MB, pool1 is now full
GpuMat d_src2 = pool2 . getBuffer ( 1024 , 1024 , CV_8UC1 ) ; // 1MB
GpuMat d_dst2 = pool2 . getBuffer ( 1024 , 1024 , CV_8UC3 ) ; // 3MB
d_src1 . setTo ( Scalar ( i ) , stream1 ) ;
d_src2 . setTo ( Scalar ( i ) , stream2 ) ;
cvtColor ( d_src1 , d_dst1 , CV_GRAY2BGR , 0 , stream1 ) ;
cvtColor ( d_src2 , d_dst2 , CV_GRAY2BGR , 0 , stream2 ) ;
// The order of destruction of the local variables is:
// d_dst2 => d_src2 => d_dst1 => d_src1
// LIFO rule is satisfied, this code runs without error
}
}
@ endcode
*/
*/
class CV_EXPORTS BufferPool
class CV_EXPORTS BufferPool
{
{