mirror of https://github.com/opencv/opencv.git
parent
e5188c7e94
commit
988ab79acb
5 changed files with 674 additions and 1 deletions
@ -0,0 +1,416 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
using namespace cv; |
||||
using namespace cv::cuda; |
||||
|
||||
#ifdef HAVE_CUDA |
||||
|
||||
#include "opencv2/cudev/common.hpp" |
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
/// MemoryStack
|
||||
|
||||
namespace |
||||
{ |
||||
class MemoryPool; |
||||
} |
||||
|
||||
class cv::cuda::MemoryStack |
||||
{ |
||||
public: |
||||
uchar* requestMemory(size_t size); |
||||
void returnMemory(uchar* ptr); |
||||
|
||||
uchar* datastart; |
||||
uchar* dataend; |
||||
uchar* tip; |
||||
|
||||
bool isFree; |
||||
MemoryPool* pool; |
||||
|
||||
#if defined(DEBUG) || defined(_DEBUG) |
||||
std::vector<size_t> allocations; |
||||
#endif |
||||
}; |
||||
|
||||
uchar* cv::cuda::MemoryStack::requestMemory(size_t size) |
||||
{ |
||||
const size_t freeMem = dataend - tip; |
||||
|
||||
if (size > freeMem) |
||||
return 0; |
||||
|
||||
uchar* ptr = tip; |
||||
|
||||
tip += size; |
||||
|
||||
#if defined(DEBUG) || defined(_DEBUG) |
||||
allocations.push_back(size); |
||||
#endif |
||||
|
||||
return ptr; |
||||
} |
||||
|
||||
void cv::cuda::MemoryStack::returnMemory(uchar* ptr) |
||||
{ |
||||
CV_DbgAssert( ptr >= datastart && ptr < dataend ); |
||||
|
||||
#if defined(DEBUG) || defined(_DEBUG) |
||||
const size_t allocSize = tip - ptr; |
||||
CV_Assert( allocSize == allocations.back() ); |
||||
allocations.pop_back(); |
||||
#endif |
||||
|
||||
tip = ptr; |
||||
} |
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
/// MemoryPool
|
||||
|
||||
namespace |
||||
{ |
||||
class MemoryPool |
||||
{ |
||||
public: |
||||
MemoryPool(); |
||||
|
||||
void initialize(size_t stackSize, int stackCount); |
||||
void release(); |
||||
|
||||
MemoryStack* getFreeMemStack(); |
||||
void returnMemStack(MemoryStack* memStack); |
||||
|
||||
private: |
||||
void initilizeImpl(); |
||||
|
||||
Mutex mtx_; |
||||
|
||||
bool initialized_; |
||||
size_t stackSize_; |
||||
int stackCount_; |
||||
|
||||
uchar* mem_; |
||||
|
||||
std::vector<MemoryStack> stacks_; |
||||
}; |
||||
|
||||
MemoryPool::MemoryPool() : initialized_(false), mem_(0) |
||||
{ |
||||
// default : 10 Mb, 5 stacks
|
||||
stackSize_ = 10 * 1024 * 1024; |
||||
stackCount_ = 5; |
||||
} |
||||
|
||||
void MemoryPool::initialize(size_t stackSize, int stackCount) |
||||
{ |
||||
AutoLock lock(mtx_); |
||||
|
||||
release(); |
||||
|
||||
stackSize_ = stackSize; |
||||
stackCount_ = stackCount; |
||||
|
||||
initilizeImpl(); |
||||
} |
||||
|
||||
void MemoryPool::initilizeImpl() |
||||
{ |
||||
const size_t totalSize = stackSize_ * stackCount_; |
||||
|
||||
if (totalSize > 0) |
||||
{ |
||||
cudaError_t err = cudaMalloc(&mem_, totalSize); |
||||
if (err != cudaSuccess) |
||||
return; |
||||
|
||||
stacks_.resize(stackCount_); |
||||
|
||||
uchar* ptr = mem_; |
||||
|
||||
for (int i = 0; i < stackCount_; ++i) |
||||
{ |
||||
stacks_[i].datastart = ptr; |
||||
stacks_[i].dataend = ptr + stackSize_; |
||||
stacks_[i].tip = ptr; |
||||
stacks_[i].isFree = true; |
||||
stacks_[i].pool = this; |
||||
|
||||
ptr += stackSize_; |
||||
} |
||||
|
||||
initialized_ = true; |
||||
} |
||||
} |
||||
|
||||
void MemoryPool::release() |
||||
{ |
||||
if (mem_) |
||||
{ |
||||
#if defined(DEBUG) || defined(_DEBUG) |
||||
for (int i = 0; i < stackCount_; ++i) |
||||
{ |
||||
CV_DbgAssert( stacks_[i].isFree ); |
||||
CV_DbgAssert( stacks_[i].tip == stacks_[i].datastart ); |
||||
} |
||||
#endif |
||||
|
||||
cudaFree( mem_ ); |
||||
|
||||
mem_ = 0; |
||||
initialized_ = false; |
||||
} |
||||
} |
||||
|
||||
MemoryStack* MemoryPool::getFreeMemStack() |
||||
{ |
||||
AutoLock lock(mtx_); |
||||
|
||||
if (!initialized_) |
||||
initilizeImpl(); |
||||
|
||||
if (!mem_) |
||||
return 0; |
||||
|
||||
for (int i = 0; i < stackCount_; ++i) |
||||
{ |
||||
if (stacks_[i].isFree) |
||||
{ |
||||
stacks_[i].isFree = false; |
||||
return &stacks_[i]; |
||||
} |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
void MemoryPool::returnMemStack(MemoryStack* memStack) |
||||
{ |
||||
AutoLock lock(mtx_); |
||||
|
||||
CV_DbgAssert( !memStack->isFree ); |
||||
|
||||
#if defined(DEBUG) || defined(_DEBUG) |
||||
bool found = false; |
||||
for (int i = 0; i < stackCount_; ++i) |
||||
{ |
||||
if (memStack == &stacks_[i]) |
||||
{ |
||||
found = true; |
||||
break; |
||||
} |
||||
} |
||||
CV_DbgAssert( found ); |
||||
#endif |
||||
|
||||
CV_DbgAssert( memStack->tip == memStack->datastart ); |
||||
|
||||
memStack->isFree = true; |
||||
} |
||||
} |
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
/// MemoryPoolManager
|
||||
|
||||
namespace |
||||
{ |
||||
class MemoryPoolManager |
||||
{ |
||||
public: |
||||
MemoryPoolManager(); |
||||
~MemoryPoolManager(); |
||||
|
||||
MemoryPool* getPool(int deviceId); |
||||
|
||||
private: |
||||
std::vector<MemoryPool> pools_; |
||||
}; |
||||
|
||||
MemoryPoolManager::MemoryPoolManager() |
||||
{ |
||||
int deviceCount = getCudaEnabledDeviceCount(); |
||||
|
||||
if (deviceCount > 0) |
||||
pools_.resize(deviceCount); |
||||
} |
||||
|
||||
MemoryPoolManager::~MemoryPoolManager() |
||||
{ |
||||
for (size_t i = 0; i < pools_.size(); ++i) |
||||
{ |
||||
cudaSetDevice(i); |
||||
pools_[i].release(); |
||||
} |
||||
} |
||||
|
||||
MemoryPool* MemoryPoolManager::getPool(int deviceId) |
||||
{ |
||||
CV_DbgAssert( deviceId >= 0 && deviceId < static_cast<int>(pools_.size()) ); |
||||
return &pools_[deviceId]; |
||||
} |
||||
|
||||
MemoryPool* memPool(int deviceId) |
||||
{ |
||||
static MemoryPoolManager manager; |
||||
return manager.getPool(deviceId); |
||||
} |
||||
} |
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
/// BufferAllocator
|
||||
|
||||
namespace |
||||
{ |
||||
bool enableMemoryPool = true; |
||||
} |
||||
|
||||
cv::cuda::BufferAllocator::BufferAllocator(Stream& stream) : memStack_(0), stream_(stream) |
||||
{ |
||||
if (enableMemoryPool) |
||||
{ |
||||
const int deviceId = getDevice(); |
||||
memStack_ = memPool(deviceId)->getFreeMemStack(); |
||||
|
||||
DeviceInfo devInfo(deviceId); |
||||
alignment_ = devInfo.textureAlignment(); |
||||
} |
||||
} |
||||
|
||||
namespace |
||||
{ |
||||
void CUDART_CB returnMemStackCallback(cudaStream_t, cudaError_t, void* userData) |
||||
{ |
||||
MemoryStack* memStack = static_cast<MemoryStack*>(userData); |
||||
memStack->pool->returnMemStack(memStack); |
||||
} |
||||
} |
||||
|
||||
cv::cuda::BufferAllocator::~BufferAllocator() |
||||
{ |
||||
if (memStack_ != 0) |
||||
CV_CUDEV_SAFE_CALL( cudaStreamAddCallback(StreamAccessor::getStream(stream_), returnMemStackCallback, memStack_, 0) ); |
||||
} |
||||
|
||||
namespace |
||||
{ |
||||
size_t alignUp(size_t what, size_t alignment) |
||||
{ |
||||
size_t alignMask = alignment-1; |
||||
size_t inverseAlignMask = ~alignMask; |
||||
size_t res = (what + alignMask) & inverseAlignMask; |
||||
return res; |
||||
} |
||||
} |
||||
|
||||
bool cv::cuda::BufferAllocator::allocate(uchar** devPtr, size_t* step, int** refcount, int rows, int cols, size_t elemSize) |
||||
{ |
||||
if (memStack_ == 0) |
||||
return false; |
||||
|
||||
size_t pitch, memSize; |
||||
|
||||
if (rows > 1 && cols > 1) |
||||
{ |
||||
pitch = alignUp(cols * elemSize, alignment_); |
||||
memSize = pitch * rows; |
||||
} |
||||
else |
||||
{ |
||||
// Single row or single column must be continuous
|
||||
pitch = elemSize * cols; |
||||
memSize = alignUp(elemSize * cols * rows, 64); |
||||
} |
||||
|
||||
uchar* ptr = memStack_->requestMemory(memSize); |
||||
|
||||
if (ptr == 0) |
||||
return false; |
||||
|
||||
*devPtr = ptr; |
||||
*step = pitch; |
||||
|
||||
*refcount = static_cast<int*>(fastMalloc(sizeof(int))); |
||||
|
||||
return true; |
||||
} |
||||
|
||||
void cv::cuda::BufferAllocator::free(uchar* devPtr, int* refcount) |
||||
{ |
||||
if (memStack_ == 0) |
||||
return; |
||||
|
||||
memStack_->returnMemory(devPtr); |
||||
fastFree(refcount); |
||||
} |
||||
|
||||
void cv::cuda::setBufferAllocatorUsage(bool on) |
||||
{ |
||||
enableMemoryPool = on; |
||||
} |
||||
|
||||
void cv::cuda::allocateMemoryPool(int deviceId, size_t stackSize, int stackCount) |
||||
{ |
||||
const int currentDevice = getDevice(); |
||||
|
||||
if (deviceId >= 0) |
||||
{ |
||||
setDevice(deviceId); |
||||
memPool(deviceId)->initialize(stackSize, stackCount); |
||||
} |
||||
else |
||||
{ |
||||
const int deviceCount = getCudaEnabledDeviceCount(); |
||||
|
||||
for (deviceId = 0; deviceId < deviceCount; ++deviceId) |
||||
{ |
||||
setDevice(deviceId); |
||||
memPool(deviceId)->initialize(stackSize, stackCount); |
||||
} |
||||
} |
||||
|
||||
setDevice(currentDevice); |
||||
} |
||||
|
||||
#endif |
@ -0,0 +1,114 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp" |
||||
|
||||
#ifdef HAVE_CUDA |
||||
|
||||
#include "opencv2/cudaarithm.hpp" |
||||
#include "opencv2/core/private.cuda.hpp" |
||||
|
||||
using namespace testing; |
||||
using namespace perf; |
||||
using namespace cv; |
||||
using namespace cv::cuda; |
||||
|
||||
namespace |
||||
{ |
||||
void func1(const GpuMat& src, GpuMat& dst, Stream& stream) |
||||
{ |
||||
BufferAllocator bufAlloc(stream); |
||||
|
||||
GpuMat buf(&bufAlloc); |
||||
|
||||
src.convertTo(buf, CV_32F, 1.0 / 255.0, stream); |
||||
|
||||
cuda::exp(buf, dst, stream); |
||||
} |
||||
|
||||
void func2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) |
||||
{ |
||||
BufferAllocator bufAlloc(stream); |
||||
|
||||
GpuMat buf1(&bufAlloc); |
||||
|
||||
func1(src1, buf1, stream); |
||||
|
||||
GpuMat buf2(&bufAlloc); |
||||
|
||||
func1(src2, buf2, stream); |
||||
|
||||
cuda::add(buf1, buf2, dst, noArray(), -1, stream); |
||||
} |
||||
} |
||||
|
||||
PERF_TEST_P(Sz, BufferPool, CUDA_TYPICAL_MAT_SIZES) |
||||
{ |
||||
static bool first = true; |
||||
|
||||
const Size size = GetParam(); |
||||
|
||||
const bool useBufferPool = PERF_RUN_CUDA(); |
||||
|
||||
Mat host_src(size, CV_8UC1); |
||||
declare.in(host_src, WARMUP_RNG); |
||||
|
||||
GpuMat src1(host_src), src2(host_src); |
||||
GpuMat dst; |
||||
|
||||
setBufferAllocatorUsage(useBufferPool); |
||||
if (useBufferPool && first) |
||||
{ |
||||
allocateMemoryPool(-1, 25 * 1024 * 1024, 2); |
||||
first = false; |
||||
} |
||||
|
||||
TEST_CYCLE() |
||||
{ |
||||
func2(src1, src2, dst, Stream::Null()); |
||||
} |
||||
|
||||
Mat h_dst(dst); |
||||
SANITY_CHECK(h_dst); |
||||
} |
||||
|
||||
#endif |
@ -0,0 +1,120 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp" |
||||
|
||||
#ifdef HAVE_CUDA |
||||
|
||||
#include "opencv2/cudaarithm.hpp" |
||||
#include "opencv2/cudawarping.hpp" |
||||
#include "opencv2/core/private.cuda.hpp" |
||||
|
||||
using namespace testing; |
||||
using namespace cv; |
||||
using namespace cv::cuda; |
||||
|
||||
struct BufferPool : TestWithParam<DeviceInfo> |
||||
{ |
||||
}; |
||||
|
||||
namespace |
||||
{ |
||||
void func1(const GpuMat& src, GpuMat& dst, Stream& stream) |
||||
{ |
||||
BufferAllocator bufAlloc(stream); |
||||
|
||||
GpuMat buf(&bufAlloc); |
||||
|
||||
src.convertTo(buf, CV_32F, 1.0 / 255.0, stream); |
||||
|
||||
cuda::exp(buf, dst, stream); |
||||
} |
||||
|
||||
void func2(const GpuMat& src, GpuMat& dst, Stream& stream) |
||||
{ |
||||
BufferAllocator bufAlloc(stream); |
||||
|
||||
GpuMat buf1(&bufAlloc); |
||||
|
||||
cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST, stream); |
||||
|
||||
GpuMat buf2(&bufAlloc); |
||||
|
||||
func1(buf1, buf2, stream); |
||||
|
||||
GpuMat buf3(&bufAlloc); |
||||
|
||||
cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST, stream); |
||||
|
||||
buf3.convertTo(dst, CV_8U, stream); |
||||
} |
||||
} |
||||
|
||||
CUDA_TEST_P(BufferPool, Test) |
||||
{ |
||||
DeviceInfo devInfo = GetParam(); |
||||
setDevice(devInfo.deviceID()); |
||||
|
||||
GpuMat src(200, 200, CV_8UC1); |
||||
GpuMat dst; |
||||
|
||||
Stream stream; |
||||
|
||||
func2(src, dst, stream); |
||||
|
||||
stream.waitForCompletion(); |
||||
|
||||
GpuMat buf, buf1, buf2, buf3; |
||||
GpuMat dst_gold; |
||||
|
||||
cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST); |
||||
buf1.convertTo(buf, CV_32F, 1.0 / 255.0); |
||||
cuda::exp(buf, buf2); |
||||
cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST); |
||||
buf3.convertTo(dst_gold, CV_8U); |
||||
|
||||
ASSERT_MAT_NEAR(dst_gold, dst, 0); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_Stream, BufferPool, ALL_DEVICES); |
||||
|
||||
#endif // HAVE_CUDA
|
Loading…
Reference in new issue