From 35ebeb21bd0d79d9a2c0a9ba5651753251f16397 Mon Sep 17 00:00:00 2001 From: Andrey Morozov Date: Fri, 13 Aug 2010 14:52:50 +0000 Subject: [PATCH] added implement MatPL with serocopy and writecombited --- modules/gpu/include/opencv2/gpu/gpu.hpp | 76 +++++++++++-------- .../include/opencv2/gpu/matrix_operations.hpp | 17 +++-- modules/gpu/src/matrix_operations.cpp | 49 +++++++++++- 3 files changed, 97 insertions(+), 45 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 9fa9087895..f74deacb6e 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -233,17 +233,18 @@ namespace cv { public: - //Not supported. Now behaviour is like ALLOC_DEFAULT. - //enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 } + //Supported. Now behaviour is like ALLOC_DEFAULT. + enum { ALLOC_PAGE_LOCKED = 0, ALLOC_ZEROCOPY = 1, ALLOC_WRITE_COMBINED = 4 }; MatPL(); MatPL(const MatPL& m); - MatPL(int _rows, int _cols, int _type); - MatPL(Size _size, int _type); + MatPL(int _rows, int _cols, int _type, int type_alloc = ALLOC_PAGE_LOCKED); + MatPL(Size _size, int _type, int type_alloc = ALLOC_PAGE_LOCKED); + //! creates from cv::Mat with coping data - explicit MatPL(const Mat& m); + explicit MatPL(const Mat& m, int type_alloc = ALLOC_PAGE_LOCKED); ~MatPL(); @@ -253,8 +254,8 @@ namespace cv MatPL clone() const; //! allocates new matrix data unless the matrix already has specified size and type. - void create(int _rows, int _cols, int _type); - void create(Size _size, int _type); + void create(int _rows, int _cols, int _type, int type_alloc = ALLOC_PAGE_LOCKED); + void create(Size _size, int _type, int type_alloc = ALLOC_PAGE_LOCKED); //! decrements reference counter and released memory if needed. void release(); @@ -263,6 +264,11 @@ namespace cv Mat createMatHeader() const; operator Mat() const; + operator GpuMat() const; + + static bool can_device_map_to_host(); + + // Please see cv::Mat for descriptions bool isContinuous() const; size_t elemSize() const; @@ -274,16 +280,20 @@ namespace cv Size size() const; bool empty() const; + // Please see cv::Mat for descriptions int flags; int rows, cols; size_t step; + int alloc_type; + uchar* data; int* refcount; uchar* datastart; uchar* dataend; + }; //////////////////////////////// CudaStream //////////////////////////////// @@ -332,7 +342,7 @@ namespace cv CV_EXPORTS void remap(const GpuMat& src, const GpuMat& xmap, const GpuMat& ymap, GpuMat& dst); - + CV_EXPORTS void meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); //////////////////////////////// StereoBM_GPU //////////////////////////////// @@ -374,9 +384,9 @@ namespace cv private: GpuMat minSSD, leBuf, riBuf; }; - + ////////////////////////// StereoBeliefPropagation /////////////////////////// - + class CV_EXPORTS StereoBeliefPropagation { public: @@ -385,15 +395,15 @@ namespace cv enum { DEFAULT_LEVELS = 5 }; //! the default constructor - explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, - int iters = DEFAULT_ITERS, + explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, + int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int msg_type = CV_32F); //! the full constructor taking the number of disparities, number of BP iterations on each level, - //! number of levels, truncation of data cost, data weight, + //! number of levels, truncation of data cost, data weight, //! truncation of discontinuity cost and discontinuity single jump - StereoBeliefPropagation(int ndisp, int iters, int levels, + StereoBeliefPropagation(int ndisp, int iters, int levels, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int msg_type = CV_32F); @@ -401,29 +411,29 @@ namespace cv //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair, //! if disparity is empty output type will be CV_16S else output type will be disparity.type(). void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity); - + //! Acync version void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream); - + int ndisp; int iters; int levels; - - float max_data_term; + + float max_data_term; float data_weight; - float max_disc_term; + float max_disc_term; float disc_single_jump; - + int msg_type; private: GpuMat u, d, l, r, u2, d2, l2, r2; - std::vector datas; + std::vector datas; GpuMat out; }; - + /////////////////////////// StereoConstantSpaceBP /////////////////////////// - + class CV_EXPORTS StereoConstantSpaceBP { public: @@ -434,13 +444,13 @@ namespace cv //! the default constructor explicit StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, - int iters = DEFAULT_ITERS, - int levels = DEFAULT_LEVELS, + int iters = DEFAULT_ITERS, + int levels = DEFAULT_LEVELS, int nr_plane = DEFAULT_NR_PLANE, int msg_type = CV_32F); //! the full constructor taking the number of disparities, number of BP iterations on each level, - //! number of levels, number of active disparity on the first level, truncation of data cost, data weight, + //! number of levels, number of active disparity on the first level, truncation of data cost, data weight, //! truncation of discontinuity cost, discontinuity single jump and minimum disparity threshold StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, @@ -450,20 +460,20 @@ namespace cv //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair, //! if disparity is empty output type will be CV_16S else output type will be disparity.type(). void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity); - + //! Acync version void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream); - + int ndisp; int iters; int levels; - + int nr_plane; - - float max_data_term; + + float max_data_term; float data_weight; - float max_disc_term; + float max_disc_term; float disc_single_jump; int min_disp_th; @@ -483,7 +493,7 @@ namespace cv } //! Speckle filtering - filters small connected components on diparity image. - //! It sets pixel (x,y) to newVal if it coresponds to small CC with size < maxSpeckleSize. + //! It sets pixel (x,y) to newVal if it coresponds to small CC with size < maxSpeckleSize. //! Threshold for border between CC is diffThreshold; void filterSpeckles( Mat& img, uchar newVal, int maxSpeckleSize, uchar diffThreshold, Mat& buf); diff --git a/modules/gpu/include/opencv2/gpu/matrix_operations.hpp b/modules/gpu/include/opencv2/gpu/matrix_operations.hpp index f3ee64a9b6..36a078d7bd 100644 --- a/modules/gpu/include/opencv2/gpu/matrix_operations.hpp +++ b/modules/gpu/include/opencv2/gpu/matrix_operations.hpp @@ -343,29 +343,28 @@ static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); } /////////////////////////////////////////////////////////////////////// inline MatPL::MatPL() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {} -inline MatPL::MatPL(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +inline MatPL::MatPL(int _rows, int _cols, int _type, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { if( _rows > 0 && _cols > 0 ) - create( _rows, _cols, _type ); + create( _rows, _cols, _type , type_alloc); } -inline MatPL::MatPL(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +inline MatPL::MatPL(Size _size, int _type, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { if( _size.height > 0 && _size.width > 0 ) - create( _size.height, _size.width, _type ); + create( _size.height, _size.width, _type, type_alloc ); } inline MatPL::MatPL(const MatPL& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(0), dataend(0) { if( refcount ) CV_XADD(refcount, 1); - } -inline MatPL::MatPL(const Mat& m) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +inline MatPL::MatPL(const Mat& m, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { if( m.rows > 0 && m.cols > 0 ) - create( m.size(), m.type() ); + create( m.size(), m.type() , type_alloc); Mat tmp = createMatHeader(); m.copyTo(tmp); @@ -375,6 +374,7 @@ inline MatPL::~MatPL() { release(); } + inline MatPL& MatPL::operator = (const MatPL& m) { if( this != &m ) @@ -388,6 +388,7 @@ inline MatPL& MatPL::operator = (const MatPL& m) datastart = m.datastart; dataend = m.dataend; refcount = m.refcount; + alloc_type = m.alloc_type; } return *this; } @@ -401,7 +402,7 @@ inline MatPL MatPL::clone() const return m; } -inline void MatPL::create(Size _size, int _type) { create(_size.height, _size.width, _type); } +inline void MatPL::create(Size _size, int _type, int type_alloc) { create(_size.height, _size.width, _type, type_alloc); } //CCP void MatPL::create(int _rows, int _cols, int _type); //CPP void MatPL::release(); diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp index cdab363d83..03e4fc6ad6 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -67,7 +67,8 @@ namespace cv void GpuMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); } void GpuMat::release() { throw_nogpu(); } - void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); } + void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); } + void MatPL::get_property_device() { throw_nogpu(); } void MatPL::release() { throw_nogpu(); } } @@ -164,7 +165,7 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) else impl::set_to_with_mask( *this, depth(), s.val, mask, channels()); - return *this; + return *this; } @@ -209,6 +210,15 @@ GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const return hdr; } +bool cv::gpu::MatPL::can_device_map_to_host() +{ + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, 0); + + return (prop.canMapHostMemory != 0) ? true : false; +} + + void cv::gpu::GpuMat::create(int _rows, int _cols, int _type) { _type &= TYPE_MASK; @@ -259,8 +269,9 @@ void cv::gpu::GpuMat::release() //////////////////////////////// MatPL //////////////////////////////// /////////////////////////////////////////////////////////////////////// -void cv::gpu::MatPL::create(int _rows, int _cols, int _type) +void cv::gpu::MatPL::create(int _rows, int _cols, int _type, int type_alloc) { + alloc_type = type_alloc; _type &= TYPE_MASK; if( rows == _rows && cols == _cols && type() == _type && data ) return; @@ -281,7 +292,24 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type) //datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount)); void *ptr; - cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); + + switch (type_alloc) + { + case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break; + case ALLOC_ZEROCOPY: + if (can_device_map_to_host() == true) + { + cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); + } + else + cv::gpu::error("ZeroCopy is not supported by current device", __FILE__, __LINE__); + break; + + case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break; + + default: + cv::gpu::error("Invalid alloc type", __FILE__, __LINE__); + } datastart = data = (uchar*)ptr; dataend = data + nettosize; @@ -291,6 +319,19 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type) } } +inline MatPL::operator GpuMat() const +{ + if (alloc_type == ALLOC_ZEROCOPY) + { + void ** pdev; + cudaHostGetDevicePointer( pdev, this->data, 0 ); + GpuMat m(this->rows, this->cols, this->type(), *pdev, this->step); + return m; + } + else + cv::gpu::error("", __FILE__, __LINE__); +} + void cv::gpu::MatPL::release() { if( refcount && CV_XADD(refcount, -1) == 1 )