added implement MatPL with serocopy and writecombited

15 years ago · 35ebeb21bd
parent 024283ceae
commit 35ebeb21bd
3 changed files with 97 additions and 45 deletions
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -233,17 +233,18 @@ namespace cv
        {
        public:

-            //Not supported.  Now behaviour is like ALLOC_DEFAULT.
-            //enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 }
+            //Supported.  Now behaviour is like ALLOC_DEFAULT.
+            enum  { ALLOC_PAGE_LOCKED = 0, ALLOC_ZEROCOPY = 1, ALLOC_WRITE_COMBINED = 4 };

            MatPL();
            MatPL(const MatPL& m);

-            MatPL(int _rows, int _cols, int _type);
-            MatPL(Size _size, int _type);
+            MatPL(int _rows, int _cols, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
+            MatPL(Size _size, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
+

            //! creates from cv::Mat with coping data
-            explicit MatPL(const Mat& m);
+            explicit MatPL(const Mat& m, int type_alloc = ALLOC_PAGE_LOCKED);

            ~MatPL();

@ -253,8 +254,8 @@ namespace cv
            MatPL clone() const;

            //! allocates new matrix data unless the matrix already has specified size and type.
-            void create(int _rows, int _cols, int _type);
-            void create(Size _size, int _type);
+            void create(int _rows, int _cols, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
+            void create(Size _size, int _type, int type_alloc = ALLOC_PAGE_LOCKED);

            //! decrements reference counter and released memory if needed.
            void release();
@ -263,6 +264,11 @@ namespace cv
            Mat createMatHeader() const;
            operator Mat() const;

+            operator GpuMat() const;
+
+            static bool can_device_map_to_host();
+
+
            // Please see cv::Mat for descriptions
            bool isContinuous() const;
            size_t elemSize() const;
@ -274,16 +280,20 @@ namespace cv
            Size size() const;
            bool empty() const;

+
            // Please see cv::Mat for descriptions
            int flags;
            int rows, cols;
            size_t step;

+            int alloc_type;
+
            uchar* data;
            int* refcount;

            uchar* datastart;
            uchar* dataend;
+
        };

        //////////////////////////////// CudaStream ////////////////////////////////
@ -332,7 +342,7 @@ namespace cv

        CV_EXPORTS void remap(const GpuMat& src, const GpuMat& xmap, const GpuMat& ymap, GpuMat& dst);

-        
+
        CV_EXPORTS void meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));

        //////////////////////////////// StereoBM_GPU ////////////////////////////////
@ -374,9 +384,9 @@ namespace cv
        private:
            GpuMat minSSD, leBuf, riBuf;
        };
-        
+
        ////////////////////////// StereoBeliefPropagation ///////////////////////////
-        
+
        class CV_EXPORTS StereoBeliefPropagation
        {
        public:
@ -385,15 +395,15 @@ namespace cv
            enum { DEFAULT_LEVELS = 5  };

            //! the default constructor
-            explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP, 
-                                             int iters  = DEFAULT_ITERS, 
+            explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
+                                             int iters  = DEFAULT_ITERS,
                                             int levels = DEFAULT_LEVELS,
                                             int msg_type = CV_32F);

            //! the full constructor taking the number of disparities, number of BP iterations on each level,
-            //! number of levels, truncation of data cost, data weight, 
+            //! number of levels, truncation of data cost, data weight,
            //! truncation of discontinuity cost and discontinuity single jump
-            StereoBeliefPropagation(int ndisp, int iters, int levels, 
+            StereoBeliefPropagation(int ndisp, int iters, int levels,
                                    float max_data_term, float data_weight,
                                    float max_disc_term, float disc_single_jump,
                                    int msg_type = CV_32F);
@ -401,29 +411,29 @@ namespace cv
            //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
            //! if disparity is empty output type will be CV_16S else output type will be disparity.type().
            void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity);
-            
+
            //! Acync version
            void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream);
-            
+
            int ndisp;

            int iters;
            int levels;
-            
-            float max_data_term; 
+
+            float max_data_term;
            float data_weight;
-            float max_disc_term; 
+            float max_disc_term;
            float disc_single_jump;
-            
+
            int msg_type;
        private:
            GpuMat u, d, l, r, u2, d2, l2, r2;
-            std::vector<GpuMat> datas;            
+            std::vector<GpuMat> datas;
            GpuMat out;
        };
-        
+
        /////////////////////////// StereoConstantSpaceBP ///////////////////////////
-        
+
        class CV_EXPORTS StereoConstantSpaceBP
        {
        public:
@ -434,13 +444,13 @@ namespace cv

            //! the default constructor
            explicit StereoConstantSpaceBP(int ndisp    = DEFAULT_NDISP,
-                                           int iters    = DEFAULT_ITERS, 
-                                           int levels   = DEFAULT_LEVELS, 
+                                           int iters    = DEFAULT_ITERS,
+                                           int levels   = DEFAULT_LEVELS,
                                           int nr_plane = DEFAULT_NR_PLANE,
                                           int msg_type = CV_32F);

            //! the full constructor taking the number of disparities, number of BP iterations on each level,
-            //! number of levels, number of active disparity on the first level, truncation of data cost, data weight, 
+            //! number of levels, number of active disparity on the first level, truncation of data cost, data weight,
            //! truncation of discontinuity cost, discontinuity single jump and minimum disparity threshold
            StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
                                  float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
@ -450,20 +460,20 @@ namespace cv
            //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
            //! if disparity is empty output type will be CV_16S else output type will be disparity.type().
            void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity);
-            
+
            //! Acync version
            void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream);
-            
+
            int ndisp;

            int iters;
            int levels;
-            
+
            int nr_plane;
-            
-            float max_data_term; 
+
+            float max_data_term;
            float data_weight;
-            float max_disc_term; 
+            float max_disc_term;
            float disc_single_jump;

            int min_disp_th;
@ -483,7 +493,7 @@ namespace cv
    }

    //! Speckle filtering - filters small connected components on diparity image.
-    //! It sets pixel (x,y) to newVal if it coresponds to small CC with size < maxSpeckleSize. 
+    //! It sets pixel (x,y) to newVal if it coresponds to small CC with size < maxSpeckleSize.
    //! Threshold for border between CC is diffThreshold;
    void filterSpeckles( Mat& img, uchar newVal, int maxSpeckleSize, uchar diffThreshold, Mat& buf);

--- a/modules/gpu/include/opencv2/gpu/matrix_operations.hpp
+++ b/modules/gpu/include/opencv2/gpu/matrix_operations.hpp
@ -343,29 +343,28 @@ static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); }
 ///////////////////////////////////////////////////////////////////////

 inline MatPL::MatPL()  : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {}
-inline MatPL::MatPL(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+inline MatPL::MatPL(int _rows, int _cols, int _type, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
 {
    if( _rows > 0 && _cols > 0 )
-        create( _rows, _cols, _type );
+        create( _rows, _cols, _type , type_alloc);
 }

-inline MatPL::MatPL(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+inline MatPL::MatPL(Size _size, int _type, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
 {
    if( _size.height > 0 && _size.width > 0 )
-        create( _size.height, _size.width, _type );
+        create( _size.height, _size.width, _type, type_alloc );
 }

 inline MatPL::MatPL(const MatPL& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(0), dataend(0)
 {
    if( refcount )
        CV_XADD(refcount, 1);
-
 }

-inline MatPL::MatPL(const Mat& m) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+inline MatPL::MatPL(const Mat& m, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
 {
    if( m.rows > 0 && m.cols > 0 )
-        create( m.size(), m.type() );
+        create( m.size(), m.type() , type_alloc);

    Mat tmp = createMatHeader();
    m.copyTo(tmp);
@ -375,6 +374,7 @@ inline MatPL::~MatPL()
 {
    release();
 }
+
 inline MatPL& MatPL::operator = (const MatPL& m)
 {
    if( this != &m )
@ -388,6 +388,7 @@ inline MatPL& MatPL::operator = (const MatPL& m)
        datastart = m.datastart;
        dataend = m.dataend;
        refcount = m.refcount;
+        alloc_type = m.alloc_type;
    }
    return *this;
 }
@ -401,7 +402,7 @@ inline MatPL MatPL::clone() const
    return m;
 }

-inline void MatPL::create(Size _size, int _type) { create(_size.height, _size.width, _type); }
+inline void MatPL::create(Size _size, int _type, int type_alloc) { create(_size.height, _size.width, _type, type_alloc); }
 //CCP void MatPL::create(int _rows, int _cols, int _type);
 //CPP void MatPL::release();

--- a/modules/gpu/src/matrix_operations.cpp
+++ b/modules/gpu/src/matrix_operations.cpp
@ -67,7 +67,8 @@ namespace cv
        void GpuMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }
        void GpuMat::release() { throw_nogpu(); }

-        void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }
+        void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); }
+        void MatPL::get_property_device() { throw_nogpu(); }
        void MatPL::release() { throw_nogpu(); }
    }

@ -164,7 +165,7 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
    else
        impl::set_to_with_mask( *this, depth(), s.val, mask, channels());

-    return *this;   
+    return *this;
 }


@ -209,6 +210,15 @@ GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const
    return hdr;
 }

+bool cv::gpu::MatPL::can_device_map_to_host()
+{
+        cudaDeviceProp prop;
+        cudaGetDeviceProperties(&prop, 0);
+
+        return (prop.canMapHostMemory != 0) ? true : false;
+}
+
+
 void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
 {
    _type &= TYPE_MASK;
@ -259,8 +269,9 @@ void cv::gpu::GpuMat::release()
 //////////////////////////////// MatPL ////////////////////////////////
 ///////////////////////////////////////////////////////////////////////

-void cv::gpu::MatPL::create(int _rows, int _cols, int _type)
+void cv::gpu::MatPL::create(int _rows, int _cols, int _type, int type_alloc)
 {
+    alloc_type = type_alloc;
    _type &= TYPE_MASK;
    if( rows == _rows && cols == _cols && type() == _type && data )
        return;
@ -281,7 +292,24 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type)

        //datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount));
        void *ptr;
-        cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) );
+
+        switch (type_alloc)
+        {
+            case ALLOC_PAGE_LOCKED:  cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
+            case ALLOC_ZEROCOPY:
+                if (can_device_map_to_host() == true)
+                {
+                    cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) );
+                }
+                else
+                    cv::gpu::error("ZeroCopy is not supported by current device", __FILE__, __LINE__);
+                break;
+
+            case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
+
+            default:
+                cv::gpu::error("Invalid alloc type", __FILE__, __LINE__);
+        }

        datastart = data =  (uchar*)ptr;
        dataend = data + nettosize;
@ -291,6 +319,19 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type)
    }
 }

+inline MatPL::operator GpuMat() const
+{
+    if (alloc_type == ALLOC_ZEROCOPY)
+    {
+        void ** pdev;
+        cudaHostGetDevicePointer( pdev, this->data, 0 );
+        GpuMat m(this->rows, this->cols, this->type(), *pdev, this->step);
+        return m;
+    }
+    else
+        cv::gpu::error("", __FILE__, __LINE__);
+}
+
 void cv::gpu::MatPL::release()
 {
    if( refcount && CV_XADD(refcount, -1) == 1 )