added BufferAllocator

pull/1575/head
Vladislav Vinogradov 11 years ago
parent e5188c7e94
commit 988ab79acb
  1. 23
      modules/core/include/opencv2/core/private.cuda.hpp
  2. 416
      modules/core/src/cuda_buffer_pool.cpp
  3. 2
      modules/core/src/cuda_stream.cpp
  4. 114
      modules/cuda/perf/perf_buffer_pool.cpp
  5. 120
      modules/cuda/test/test_buffer_pool.cpp

@ -90,6 +90,29 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The
namespace cv { namespace cuda
{
class MemoryStack;
class CV_EXPORTS BufferAllocator : public GpuMat::Allocator
{
public:
explicit BufferAllocator(Stream& stream);
~BufferAllocator();
bool allocate(uchar** devPtr, size_t* step, int** refcount, int rows, int cols, size_t elemSize);
void free(uchar* devPtr, int* refcount);
private:
BufferAllocator(const BufferAllocator&);
BufferAllocator& operator =(const BufferAllocator&);
MemoryStack* memStack_;
Stream stream_;
size_t alignment_;
};
CV_EXPORTS void setBufferAllocatorUsage(bool on);
CV_EXPORTS void allocateMemoryPool(int deviceId, size_t stackSize, int stackCount);
static inline void checkNppError(int code, const char* file, const int line, const char* func)
{
if (code < 0)

@ -0,0 +1,416 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#ifdef HAVE_CUDA
#include "opencv2/cudev/common.hpp"
/////////////////////////////////////////////////////////////
/// MemoryStack
namespace
{
class MemoryPool;
}
class cv::cuda::MemoryStack
{
public:
uchar* requestMemory(size_t size);
void returnMemory(uchar* ptr);
uchar* datastart;
uchar* dataend;
uchar* tip;
bool isFree;
MemoryPool* pool;
#if defined(DEBUG) || defined(_DEBUG)
std::vector<size_t> allocations;
#endif
};
uchar* cv::cuda::MemoryStack::requestMemory(size_t size)
{
const size_t freeMem = dataend - tip;
if (size > freeMem)
return 0;
uchar* ptr = tip;
tip += size;
#if defined(DEBUG) || defined(_DEBUG)
allocations.push_back(size);
#endif
return ptr;
}
void cv::cuda::MemoryStack::returnMemory(uchar* ptr)
{
CV_DbgAssert( ptr >= datastart && ptr < dataend );
#if defined(DEBUG) || defined(_DEBUG)
const size_t allocSize = tip - ptr;
CV_Assert( allocSize == allocations.back() );
allocations.pop_back();
#endif
tip = ptr;
}
/////////////////////////////////////////////////////////////
/// MemoryPool
namespace
{
class MemoryPool
{
public:
MemoryPool();
void initialize(size_t stackSize, int stackCount);
void release();
MemoryStack* getFreeMemStack();
void returnMemStack(MemoryStack* memStack);
private:
void initilizeImpl();
Mutex mtx_;
bool initialized_;
size_t stackSize_;
int stackCount_;
uchar* mem_;
std::vector<MemoryStack> stacks_;
};
MemoryPool::MemoryPool() : initialized_(false), mem_(0)
{
// default : 10 Mb, 5 stacks
stackSize_ = 10 * 1024 * 1024;
stackCount_ = 5;
}
void MemoryPool::initialize(size_t stackSize, int stackCount)
{
AutoLock lock(mtx_);
release();
stackSize_ = stackSize;
stackCount_ = stackCount;
initilizeImpl();
}
void MemoryPool::initilizeImpl()
{
const size_t totalSize = stackSize_ * stackCount_;
if (totalSize > 0)
{
cudaError_t err = cudaMalloc(&mem_, totalSize);
if (err != cudaSuccess)
return;
stacks_.resize(stackCount_);
uchar* ptr = mem_;
for (int i = 0; i < stackCount_; ++i)
{
stacks_[i].datastart = ptr;
stacks_[i].dataend = ptr + stackSize_;
stacks_[i].tip = ptr;
stacks_[i].isFree = true;
stacks_[i].pool = this;
ptr += stackSize_;
}
initialized_ = true;
}
}
void MemoryPool::release()
{
if (mem_)
{
#if defined(DEBUG) || defined(_DEBUG)
for (int i = 0; i < stackCount_; ++i)
{
CV_DbgAssert( stacks_[i].isFree );
CV_DbgAssert( stacks_[i].tip == stacks_[i].datastart );
}
#endif
cudaFree( mem_ );
mem_ = 0;
initialized_ = false;
}
}
MemoryStack* MemoryPool::getFreeMemStack()
{
AutoLock lock(mtx_);
if (!initialized_)
initilizeImpl();
if (!mem_)
return 0;
for (int i = 0; i < stackCount_; ++i)
{
if (stacks_[i].isFree)
{
stacks_[i].isFree = false;
return &stacks_[i];
}
}
return 0;
}
void MemoryPool::returnMemStack(MemoryStack* memStack)
{
AutoLock lock(mtx_);
CV_DbgAssert( !memStack->isFree );
#if defined(DEBUG) || defined(_DEBUG)
bool found = false;
for (int i = 0; i < stackCount_; ++i)
{
if (memStack == &stacks_[i])
{
found = true;
break;
}
}
CV_DbgAssert( found );
#endif
CV_DbgAssert( memStack->tip == memStack->datastart );
memStack->isFree = true;
}
}
/////////////////////////////////////////////////////////////
/// MemoryPoolManager
namespace
{
class MemoryPoolManager
{
public:
MemoryPoolManager();
~MemoryPoolManager();
MemoryPool* getPool(int deviceId);
private:
std::vector<MemoryPool> pools_;
};
MemoryPoolManager::MemoryPoolManager()
{
int deviceCount = getCudaEnabledDeviceCount();
if (deviceCount > 0)
pools_.resize(deviceCount);
}
MemoryPoolManager::~MemoryPoolManager()
{
for (size_t i = 0; i < pools_.size(); ++i)
{
cudaSetDevice(i);
pools_[i].release();
}
}
MemoryPool* MemoryPoolManager::getPool(int deviceId)
{
CV_DbgAssert( deviceId >= 0 && deviceId < static_cast<int>(pools_.size()) );
return &pools_[deviceId];
}
MemoryPool* memPool(int deviceId)
{
static MemoryPoolManager manager;
return manager.getPool(deviceId);
}
}
/////////////////////////////////////////////////////////////
/// BufferAllocator
namespace
{
bool enableMemoryPool = true;
}
cv::cuda::BufferAllocator::BufferAllocator(Stream& stream) : memStack_(0), stream_(stream)
{
if (enableMemoryPool)
{
const int deviceId = getDevice();
memStack_ = memPool(deviceId)->getFreeMemStack();
DeviceInfo devInfo(deviceId);
alignment_ = devInfo.textureAlignment();
}
}
namespace
{
void CUDART_CB returnMemStackCallback(cudaStream_t, cudaError_t, void* userData)
{
MemoryStack* memStack = static_cast<MemoryStack*>(userData);
memStack->pool->returnMemStack(memStack);
}
}
cv::cuda::BufferAllocator::~BufferAllocator()
{
if (memStack_ != 0)
CV_CUDEV_SAFE_CALL( cudaStreamAddCallback(StreamAccessor::getStream(stream_), returnMemStackCallback, memStack_, 0) );
}
namespace
{
size_t alignUp(size_t what, size_t alignment)
{
size_t alignMask = alignment-1;
size_t inverseAlignMask = ~alignMask;
size_t res = (what + alignMask) & inverseAlignMask;
return res;
}
}
bool cv::cuda::BufferAllocator::allocate(uchar** devPtr, size_t* step, int** refcount, int rows, int cols, size_t elemSize)
{
if (memStack_ == 0)
return false;
size_t pitch, memSize;
if (rows > 1 && cols > 1)
{
pitch = alignUp(cols * elemSize, alignment_);
memSize = pitch * rows;
}
else
{
// Single row or single column must be continuous
pitch = elemSize * cols;
memSize = alignUp(elemSize * cols * rows, 64);
}
uchar* ptr = memStack_->requestMemory(memSize);
if (ptr == 0)
return false;
*devPtr = ptr;
*step = pitch;
*refcount = static_cast<int*>(fastMalloc(sizeof(int)));
return true;
}
void cv::cuda::BufferAllocator::free(uchar* devPtr, int* refcount)
{
if (memStack_ == 0)
return;
memStack_->returnMemory(devPtr);
fastFree(refcount);
}
void cv::cuda::setBufferAllocatorUsage(bool on)
{
enableMemoryPool = on;
}
void cv::cuda::allocateMemoryPool(int deviceId, size_t stackSize, int stackCount)
{
const int currentDevice = getDevice();
if (deviceId >= 0)
{
setDevice(deviceId);
memPool(deviceId)->initialize(stackSize, stackCount);
}
else
{
const int deviceCount = getCudaEnabledDeviceCount();
for (deviceId = 0; deviceId < deviceCount; ++deviceId)
{
setDevice(deviceId);
memPool(deviceId)->initialize(stackSize, stackCount);
}
}
setDevice(currentDevice);
}
#endif

@ -197,7 +197,7 @@ cv::cuda::Stream::operator bool_type() const
////////////////////////////////////////////////////////////////
// Stream
// Event
#ifndef HAVE_CUDA

@ -0,0 +1,114 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
#include "opencv2/cudaarithm.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace testing;
using namespace perf;
using namespace cv;
using namespace cv::cuda;
namespace
{
void func1(const GpuMat& src, GpuMat& dst, Stream& stream)
{
BufferAllocator bufAlloc(stream);
GpuMat buf(&bufAlloc);
src.convertTo(buf, CV_32F, 1.0 / 255.0, stream);
cuda::exp(buf, dst, stream);
}
void func2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
BufferAllocator bufAlloc(stream);
GpuMat buf1(&bufAlloc);
func1(src1, buf1, stream);
GpuMat buf2(&bufAlloc);
func1(src2, buf2, stream);
cuda::add(buf1, buf2, dst, noArray(), -1, stream);
}
}
PERF_TEST_P(Sz, BufferPool, CUDA_TYPICAL_MAT_SIZES)
{
static bool first = true;
const Size size = GetParam();
const bool useBufferPool = PERF_RUN_CUDA();
Mat host_src(size, CV_8UC1);
declare.in(host_src, WARMUP_RNG);
GpuMat src1(host_src), src2(host_src);
GpuMat dst;
setBufferAllocatorUsage(useBufferPool);
if (useBufferPool && first)
{
allocateMemoryPool(-1, 25 * 1024 * 1024, 2);
first = false;
}
TEST_CYCLE()
{
func2(src1, src2, dst, Stream::Null());
}
Mat h_dst(dst);
SANITY_CHECK(h_dst);
}
#endif

@ -0,0 +1,120 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudawarping.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace testing;
using namespace cv;
using namespace cv::cuda;
struct BufferPool : TestWithParam<DeviceInfo>
{
};
namespace
{
void func1(const GpuMat& src, GpuMat& dst, Stream& stream)
{
BufferAllocator bufAlloc(stream);
GpuMat buf(&bufAlloc);
src.convertTo(buf, CV_32F, 1.0 / 255.0, stream);
cuda::exp(buf, dst, stream);
}
void func2(const GpuMat& src, GpuMat& dst, Stream& stream)
{
BufferAllocator bufAlloc(stream);
GpuMat buf1(&bufAlloc);
cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST, stream);
GpuMat buf2(&bufAlloc);
func1(buf1, buf2, stream);
GpuMat buf3(&bufAlloc);
cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST, stream);
buf3.convertTo(dst, CV_8U, stream);
}
}
CUDA_TEST_P(BufferPool, Test)
{
DeviceInfo devInfo = GetParam();
setDevice(devInfo.deviceID());
GpuMat src(200, 200, CV_8UC1);
GpuMat dst;
Stream stream;
func2(src, dst, stream);
stream.waitForCompletion();
GpuMat buf, buf1, buf2, buf3;
GpuMat dst_gold;
cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST);
buf1.convertTo(buf, CV_32F, 1.0 / 255.0);
cuda::exp(buf, buf2);
cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST);
buf3.convertTo(dst_gold, CV_8U);
ASSERT_MAT_NEAR(dst_gold, dst, 0);
}
INSTANTIATE_TEST_CASE_P(CUDA_Stream, BufferPool, ALL_DEVICES);
#endif // HAVE_CUDA
Loading…
Cancel
Save