Merge pull request #6137 from dtmoodie:thrust_allocator

pull/6249/head
Vadim Pisarevsky 9 years ago
commit c9db05de22
  1. 9
      modules/core/include/opencv2/core/cuda/utility.hpp
  2. 41
      modules/core/src/cuda/gpu_mat.cu

@ -54,6 +54,15 @@
namespace cv { namespace cuda { namespace device
{
struct CV_EXPORTS ThrustAllocator
{
typedef uchar value_type;
virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0;
virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0;
static ThrustAllocator& getAllocator();
static void setAllocator(ThrustAllocator* allocator);
};
#define OPENCV_CUDA_LOG_WARP_SIZE (5)
#define OPENCV_CUDA_WARP_SIZE (1 << OPENCV_CUDA_LOG_WARP_SIZE)
#define OPENCV_CUDA_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla

@ -50,11 +50,52 @@
#include "opencv2/core/cuda.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/cuda/utility.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
{
class DefaultThrustAllocator: public cv::cuda::device::ThrustAllocator
{
public:
__device__ __host__ uchar* allocate(size_t numBytes)
{
#ifndef __CUDA_ARCH__
uchar* ptr;
CV_CUDEV_SAFE_CALL(cudaMalloc(&ptr, numBytes));
return ptr;
#else
return NULL;
#endif
}
__device__ __host__ void deallocate(uchar* ptr, size_t numBytes)
{
#ifndef __CUDA_ARCH__
CV_CUDEV_SAFE_CALL(cudaFree(ptr));
#endif
}
};
DefaultThrustAllocator defaultThrustAllocator;
cv::cuda::device::ThrustAllocator* g_thrustAllocator = &defaultThrustAllocator;
}
cv::cuda::device::ThrustAllocator& cv::cuda::device::ThrustAllocator::getAllocator()
{
return *g_thrustAllocator;
}
void cv::cuda::device::ThrustAllocator::setAllocator(cv::cuda::device::ThrustAllocator* allocator)
{
if(allocator == NULL)
g_thrustAllocator = &defaultThrustAllocator;
else
g_thrustAllocator = allocator;
}
namespace
{
class DefaultAllocator : public GpuMat::Allocator

Loading…
Cancel
Save