Merge pull request #6137 from dtmoodie:thrust_allocator

9 years ago · c9db05de22
parent bad09e5f58 95608b1b84
commit c9db05de22
2 changed files with 50 additions and 0 deletions
--- a/modules/core/include/opencv2/core/cuda/utility.hpp
+++ b/modules/core/include/opencv2/core/cuda/utility.hpp
@ -54,6 +54,15 @@

 namespace cv { namespace cuda { namespace device
 {
+    struct CV_EXPORTS ThrustAllocator
+    {
+        typedef uchar value_type;
+
+        virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0;
+        virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0;
+        static ThrustAllocator& getAllocator();
+        static void setAllocator(ThrustAllocator* allocator);
+    };
    #define OPENCV_CUDA_LOG_WARP_SIZE        (5)
    #define OPENCV_CUDA_WARP_SIZE            (1 << OPENCV_CUDA_LOG_WARP_SIZE)
    #define OPENCV_CUDA_LOG_MEM_BANKS        ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
--- a/modules/core/src/cuda/gpu_mat.cu
+++ b/modules/core/src/cuda/gpu_mat.cu
@ -50,11 +50,52 @@

 #include "opencv2/core/cuda.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/cuda/utility.hpp"

 using namespace cv;
 using namespace cv::cuda;
 using namespace cv::cudev;

+namespace
+{
+    class DefaultThrustAllocator: public cv::cuda::device::ThrustAllocator
+    {
+    public:
+        __device__ __host__ uchar* allocate(size_t numBytes)
+        {
+#ifndef __CUDA_ARCH__
+            uchar* ptr;
+            CV_CUDEV_SAFE_CALL(cudaMalloc(&ptr, numBytes));
+            return ptr;
+#else
+            return NULL;
+#endif
+        }
+        __device__ __host__ void deallocate(uchar* ptr, size_t numBytes)
+        {
+#ifndef __CUDA_ARCH__
+            CV_CUDEV_SAFE_CALL(cudaFree(ptr));
+#endif
+        }
+    };
+    DefaultThrustAllocator defaultThrustAllocator;
+    cv::cuda::device::ThrustAllocator* g_thrustAllocator = &defaultThrustAllocator;
+}
+
+
+cv::cuda::device::ThrustAllocator& cv::cuda::device::ThrustAllocator::getAllocator()
+{
+    return *g_thrustAllocator;
+}
+
+void cv::cuda::device::ThrustAllocator::setAllocator(cv::cuda::device::ThrustAllocator* allocator)
+{
+    if(allocator == NULL)
+        g_thrustAllocator = &defaultThrustAllocator;
+    else
+        g_thrustAllocator = allocator;
+}
+
 namespace
 {
    class DefaultAllocator : public GpuMat::Allocator