diff --git a/cmake/OpenCVDetectPython.cmake b/cmake/OpenCVDetectPython.cmake index 6f3ce4e903..7f258dc25f 100644 --- a/cmake/OpenCVDetectPython.cmake +++ b/cmake/OpenCVDetectPython.cmake @@ -12,7 +12,10 @@ if(WIN32 AND NOT PYTHON_EXECUTABLE) ) endforeach() endif() +find_host_package(PythonInterp 2.7) +if(NOT PYTHONINTERP_FOUND) find_host_package(PythonInterp "${MIN_VER_PYTHON}") +endif() unset(HAVE_SPHINX CACHE) diff --git a/modules/core/doc/operations_on_arrays.rst b/modules/core/doc/operations_on_arrays.rst index d38f57ac33..a894d07685 100644 --- a/modules/core/doc/operations_on_arrays.rst +++ b/modules/core/doc/operations_on_arrays.rst @@ -378,7 +378,7 @@ Calculates the covariance matrix of a set of vectors. .. ocv:function:: void calcCovarMatrix( const Mat* samples, int nsamples, Mat& covar, Mat& mean, int flags, int ctype=CV_64F) -.. ocv:function:: void calcCovarMatrix( InputArray samples, OutputArray covar, OutputArray mean, int flags, int ctype=CV_64F) +.. ocv:function:: void calcCovarMatrix( InputArray samples, OutputArray covar, InputOutputArray mean, int flags, int ctype=CV_64F) .. ocv:pyfunction:: cv2.calcCovarMatrix(samples, flags[, covar[, mean[, ctype]]]) -> covar, mean diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index c7f07ed459..ee91c5e391 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -158,6 +158,9 @@ enum { REDUCE_SUM = 0, //! swaps two matrices CV_EXPORTS void swap(Mat& a, Mat& b); +//! swaps two umatrices +CV_EXPORTS void swap( UMat& a, UMat& b ); + //! 1D interpolation function: returns coordinate of the "donor" pixel for the specified location p. CV_EXPORTS_W int borderInterpolate(int p, int len, int borderType); @@ -439,7 +442,7 @@ CV_EXPORTS void calcCovarMatrix( const Mat* samples, int nsamples, Mat& covar, M //! computes covariation matrix of a set of samples CV_EXPORTS_W void calcCovarMatrix( InputArray samples, OutputArray covar, - OutputArray mean, int flags, int ctype = CV_64F); + InputOutputArray mean, int flags, int ctype = CV_64F); CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean, OutputArray eigenvectors, int maxComponents = 0); diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index ce0518015e..61b8df7479 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -472,6 +472,9 @@ class CV_EXPORTS RNG; class CV_EXPORTS Mat; class CV_EXPORTS MatExpr; +class CV_EXPORTS UMat; +class CV_EXPORTS UMatExpr; + class CV_EXPORTS SparseMat; typedef Mat MatND; diff --git a/modules/core/include/opencv2/core/cuda.inl.hpp b/modules/core/include/opencv2/core/cuda.inl.hpp index 170d0affb3..d497f20ccf 100644 --- a/modules/core/include/opencv2/core/cuda.inl.hpp +++ b/modules/core/include/opencv2/core/cuda.inl.hpp @@ -595,7 +595,7 @@ namespace cv { inline Mat::Mat(const cuda::GpuMat& m) - : flags(0), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), datalimit(0), allocator(0), size(&rows) + : flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows) { m.download(*this); } diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index 8cb27d5de8..4df2432aeb 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -55,6 +55,9 @@ namespace cv { +enum { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25, + ACCESS_RW=3<<24, ACCESS_MASK=ACCESS_RW, ACCESS_FAST=1<<26 }; + //////////////////////// Input/Output Array Arguments ///////////////////////////////// /*! @@ -67,7 +70,7 @@ public: KIND_SHIFT = 16, FIXED_TYPE = 0x8000 << KIND_SHIFT, FIXED_SIZE = 0x4000 << KIND_SHIFT, - KIND_MASK = ~(FIXED_TYPE|FIXED_SIZE) - (1 << KIND_SHIFT) + 1, + KIND_MASK = 31 << KIND_SHIFT, NONE = 0 << KIND_SHIFT, MAT = 1 << KIND_SHIFT, @@ -79,10 +82,14 @@ public: OPENGL_BUFFER = 7 << KIND_SHIFT, CUDA_MEM = 8 << KIND_SHIFT, GPU_MAT = 9 << KIND_SHIFT, - OCL_MAT =10 << KIND_SHIFT + OCL_MAT =10 << KIND_SHIFT, + UMAT =11 << KIND_SHIFT, + STD_VECTOR_UMAT =12 << KIND_SHIFT, + UEXPR =13 << KIND_SHIFT }; _InputArray(); + _InputArray(int _flags, void* _obj); _InputArray(const Mat& m); _InputArray(const MatExpr& expr); _InputArray(const std::vector& vec); @@ -97,11 +104,16 @@ public: _InputArray(const ogl::Buffer& buf); _InputArray(const cuda::CudaMem& cuda_mem); template _InputArray(const cudev::GpuMat_<_Tp>& m); + _InputArray(const UMat& um); + _InputArray(const std::vector& umv); + _InputArray(const UMatExpr& uexpr); - virtual Mat getMat(int i=-1) const; + virtual Mat getMat(int idx=-1) const; + virtual UMat getUMat(int idx=-1) const; virtual void getMatVector(std::vector& mv) const; virtual cuda::GpuMat getGpuMat() const; virtual ogl::Buffer getOGlBuffer() const; + void* getObj() const; virtual int kind() const; virtual Size size(int i=-1) const; @@ -113,9 +125,13 @@ public: virtual ~_InputArray(); +protected: int flags; void* obj; Size sz; + + void init(int _flags, const void* _obj); + void init(int _flags, const void* _obj, Size _sz); }; @@ -140,6 +156,7 @@ public: }; _OutputArray(); + _OutputArray(int _flags, void* _obj); _OutputArray(Mat& m); _OutputArray(std::vector& vec); _OutputArray(cuda::GpuMat& d_mat); @@ -152,6 +169,8 @@ public: template _OutputArray(Mat_<_Tp>& m); template _OutputArray(_Tp* vec, int n); template _OutputArray(Matx<_Tp, m, n>& matx); + _OutputArray(UMat& m); + _OutputArray(std::vector& vec); _OutputArray(const Mat& m); _OutputArray(const std::vector& vec); @@ -165,6 +184,8 @@ public: template _OutputArray(const Mat_<_Tp>& m); template _OutputArray(const _Tp* vec, int n); template _OutputArray(const Matx<_Tp, m, n>& matx); + _OutputArray(const UMat& m); + _OutputArray(const std::vector& vec); virtual bool fixedSize() const; virtual bool fixedType() const; @@ -178,23 +199,58 @@ public: virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; virtual void release() const; virtual void clear() const; +}; + - virtual ~_OutputArray(); +class CV_EXPORTS _InputOutputArray : public _OutputArray +{ +public: + _InputOutputArray(); + _InputOutputArray(int _flags, void* _obj); + _InputOutputArray(Mat& m); + _InputOutputArray(std::vector& vec); + _InputOutputArray(cuda::GpuMat& d_mat); + _InputOutputArray(ogl::Buffer& buf); + _InputOutputArray(cuda::CudaMem& cuda_mem); + template _InputOutputArray(cudev::GpuMat_<_Tp>& m); + template _InputOutputArray(std::vector<_Tp>& vec); + template _InputOutputArray(std::vector >& vec); + template _InputOutputArray(std::vector >& vec); + template _InputOutputArray(Mat_<_Tp>& m); + template _InputOutputArray(_Tp* vec, int n); + template _InputOutputArray(Matx<_Tp, m, n>& matx); + _InputOutputArray(UMat& m); + _InputOutputArray(std::vector& vec); + + _InputOutputArray(const Mat& m); + _InputOutputArray(const std::vector& vec); + _InputOutputArray(const cuda::GpuMat& d_mat); + _InputOutputArray(const ogl::Buffer& buf); + _InputOutputArray(const cuda::CudaMem& cuda_mem); + template _InputOutputArray(const cudev::GpuMat_<_Tp>& m); + template _InputOutputArray(const std::vector<_Tp>& vec); + template _InputOutputArray(const std::vector >& vec); + template _InputOutputArray(const std::vector >& vec); + template _InputOutputArray(const Mat_<_Tp>& m); + template _InputOutputArray(const _Tp* vec, int n); + template _InputOutputArray(const Matx<_Tp, m, n>& matx); + _InputOutputArray(const UMat& m); + _InputOutputArray(const std::vector& vec); }; typedef const _InputArray& InputArray; typedef InputArray InputArrayOfArrays; typedef const _OutputArray& OutputArray; typedef OutputArray OutputArrayOfArrays; -typedef OutputArray InputOutputArray; -typedef OutputArray InputOutputArrayOfArrays; - -CV_EXPORTS OutputArray noArray(); - +typedef const _InputOutputArray& InputOutputArray; +typedef InputOutputArray InputOutputArrayOfArrays; +CV_EXPORTS InputOutputArray noArray(); /////////////////////////////////// MatAllocator ////////////////////////////////////// +struct CV_EXPORTS UMatData; + /*! Custom array allocator @@ -204,11 +260,27 @@ class CV_EXPORTS MatAllocator public: MatAllocator() {} virtual ~MatAllocator() {} - virtual void allocate(int dims, const int* sizes, int type, int*& refcount, - uchar*& datastart, uchar*& data, size_t* step) = 0; - virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0; -}; + // let's comment it off for now to detect and fix all the uses of allocator + //virtual void allocate(int dims, const int* sizes, int type, int*& refcount, + // uchar*& datastart, uchar*& data, size_t* step) = 0; + //virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0; + virtual UMatData* allocate(int dims, const int* sizes, + int type, size_t* step) const = 0; + virtual bool allocate(UMatData* data, int accessflags) const = 0; + virtual void deallocate(UMatData* data) const = 0; + virtual void map(UMatData* data, int accessflags) const = 0; + virtual void unmap(UMatData* data) const = 0; + virtual void download(UMatData* data, void* dst, int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dststep[]) const = 0; + virtual void upload(UMatData* data, const void* src, int dims, const size_t sz[], + const size_t dstofs[], const size_t dststep[], + const size_t srcstep[]) const = 0; + virtual void copy(UMatData* srcdata, UMatData* dstdata, int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dstofs[], const size_t dststep[], bool sync) const = 0; +}; //////////////////////////////// MatCommaInitializer ////////////////////////////////// @@ -240,11 +312,81 @@ protected: }; +/////////////////////////////////////// Mat /////////////////////////////////////////// +// note that umatdata might be allocated together +// with the matrix data, not as a separate object. +// therefore, it does not have constructor or destructor; +// it should be explicitly initialized using init(). +struct CV_EXPORTS UMatData +{ + enum { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2, + DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24 }; + UMatData(const MatAllocator* allocator); + + // provide atomic access to the structure + void lock(); + void unlock(); + + bool hostCopyObsolete() const; + bool deviceCopyObsolete() const; + bool copyOnMap() const; + bool tempUMat() const; + bool tempCopiedUMat() const; + void markHostCopyObsolete(bool flag); + void markDeviceCopyObsolete(bool flag); + + const MatAllocator* prevAllocator; + const MatAllocator* currAllocator; + int urefcount; + int refcount; + uchar* data; + uchar* origdata; + size_t size; -/////////////////////////////////////// Mat /////////////////////////////////////////// + int flags; + void* handle; + void* userdata; +}; -/*! + +struct CV_EXPORTS UMatDataAutoLock +{ + UMatDataAutoLock(UMatData* u); + ~UMatDataAutoLock(); + UMatData* u; +}; + + +struct CV_EXPORTS MatSize +{ + MatSize(int* _p); + Size operator()() const; + const int& operator[](int i) const; + int& operator[](int i); + operator const int*() const; + bool operator == (const MatSize& sz) const; + bool operator != (const MatSize& sz) const; + + int* p; +}; + +struct CV_EXPORTS MatStep +{ + MatStep(); + MatStep(size_t s); + const size_t& operator[](int i) const; + size_t& operator[](int i); + operator size_t() const; + MatStep& operator = (size_t s); + + size_t* p; + size_t buf[2]; +protected: + MatStep& operator = (const MatStep&); +}; + + /*! The n-dimensional matrix class. The class represents an n-dimensional dense numerical array that can act as @@ -497,14 +639,6 @@ public: //! builds matrix from comma initializer template explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer); - // //! converts old-style CvMat to the new matrix; the data is not copied by default - // Mat(const CvMat* m, bool copyData=false); - // //! converts old-style CvMatND to the new matrix; the data is not copied by default - // Mat(const CvMatND* m, bool copyData=false); - // //! converts old-style IplImage to the new matrix; the data is not copied by default - // Mat(const IplImage* img, bool copyData=false); - //Mat(const void* img, bool copyData=false); - //! download data from GpuMat explicit Mat(const cuda::GpuMat& m); @@ -514,6 +648,9 @@ public: Mat& operator = (const Mat& m); Mat& operator = (const MatExpr& expr); + //! retrieve UMat from Mat + UMat getUMat(int accessFlags) const; + //! returns a new matrix header for the specified row Mat row(int y) const; //! returns a new matrix header for the specified column @@ -737,10 +874,6 @@ public: //! pointer to the data uchar* data; - //! pointer to the reference counter; - // when matrix points to user-allocated data, the pointer is NULL - int* refcount; - //! helper fields used in locateROI and adjustROI uchar* datastart; uchar* dataend; @@ -748,37 +881,14 @@ public: //! custom allocator MatAllocator* allocator; + //! and the standard allocator + static MatAllocator* getStdAllocator(); - struct CV_EXPORTS MSize - { - MSize(int* _p); - Size operator()() const; - const int& operator[](int i) const; - int& operator[](int i); - operator const int*() const; - bool operator == (const MSize& sz) const; - bool operator != (const MSize& sz) const; - - int* p; - }; + //! interaction with UMat + UMatData* u; - struct CV_EXPORTS MStep - { - MStep(); - MStep(size_t s); - const size_t& operator[](int i) const; - size_t& operator[](int i); - operator size_t() const; - MStep& operator = (size_t s); - - size_t* p; - size_t buf[2]; - protected: - MStep& operator = (const MStep&); - }; - - MSize size; - MStep step; + MatSize size; + MatStep step; protected: }; @@ -1001,6 +1111,205 @@ typedef Mat_ Mat3d; typedef Mat_ Mat4d; +class CV_EXPORTS UMatExpr; + +class CV_EXPORTS UMat +{ +public: + //! default constructor + UMat(); + //! constructs 2D matrix of the specified size and type + // (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.) + UMat(int rows, int cols, int type); + UMat(Size size, int type); + //! constucts 2D matrix and fills it with the specified value _s. + UMat(int rows, int cols, int type, const Scalar& s); + UMat(Size size, int type, const Scalar& s); + + //! constructs n-dimensional matrix + UMat(int ndims, const int* sizes, int type); + UMat(int ndims, const int* sizes, int type, const Scalar& s); + + //! copy constructor + UMat(const UMat& m); + + //! creates a matrix header for a part of the bigger matrix + UMat(const UMat& m, const Range& rowRange, const Range& colRange=Range::all()); + UMat(const UMat& m, const Rect& roi); + UMat(const UMat& m, const Range* ranges); + //! builds matrix from std::vector with or without copying the data + template explicit UMat(const std::vector<_Tp>& vec, bool copyData=false); + //! builds matrix from cv::Vec; the data is copied by default + template explicit UMat(const Vec<_Tp, n>& vec, bool copyData=true); + //! builds matrix from cv::Matx; the data is copied by default + template explicit UMat(const Matx<_Tp, m, n>& mtx, bool copyData=true); + //! builds matrix from a 2D point + template explicit UMat(const Point_<_Tp>& pt, bool copyData=true); + //! builds matrix from a 3D point + template explicit UMat(const Point3_<_Tp>& pt, bool copyData=true); + //! builds matrix from comma initializer + template explicit UMat(const MatCommaInitializer_<_Tp>& commaInitializer); + + //! destructor - calls release() + ~UMat(); + //! assignment operators + UMat& operator = (const UMat& m); + UMat& operator = (const UMatExpr& expr); + + Mat getMat(int flags) const; + + //! returns a new matrix header for the specified row + UMat row(int y) const; + //! returns a new matrix header for the specified column + UMat col(int x) const; + //! ... for the specified row span + UMat rowRange(int startrow, int endrow) const; + UMat rowRange(const Range& r) const; + //! ... for the specified column span + UMat colRange(int startcol, int endcol) const; + UMat colRange(const Range& r) const; + //! ... for the specified diagonal + // (d=0 - the main diagonal, + // >0 - a diagonal from the lower half, + // <0 - a diagonal from the upper half) + UMat diag(int d=0) const; + //! constructs a square diagonal matrix which main diagonal is vector "d" + static UMat diag(const UMat& d); + + //! returns deep copy of the matrix, i.e. the data is copied + UMat clone() const; + //! copies the matrix content to "m". + // It calls m.create(this->size(), this->type()). + void copyTo( OutputArray m ) const; + //! copies those matrix elements to "m" that are marked with non-zero mask elements. + void copyTo( OutputArray m, InputArray mask ) const; + //! converts matrix to another datatype with optional scalng. See cvConvertScale. + void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const; + + void assignTo( UMat& m, int type=-1 ) const; + + //! sets every matrix element to s + UMat& operator = (const Scalar& s); + //! sets some of the matrix elements to s, according to the mask + UMat& setTo(InputArray value, InputArray mask=noArray()); + //! creates alternative matrix header for the same data, with different + // number of channels and/or different number of rows. see cvReshape. + UMat reshape(int cn, int rows=0) const; + UMat reshape(int cn, int newndims, const int* newsz) const; + + //! matrix transposition by means of matrix expressions + UMatExpr t() const; + //! matrix inversion by means of matrix expressions + UMatExpr inv(int method=DECOMP_LU) const; + //! per-element matrix multiplication by means of matrix expressions + UMatExpr mul(InputArray m, double scale=1) const; + + //! computes cross-product of 2 3D vectors + UMat cross(InputArray m) const; + //! computes dot-product + double dot(InputArray m) const; + + //! Matlab-style matrix initialization + static UMatExpr zeros(int rows, int cols, int type); + static UMatExpr zeros(Size size, int type); + static UMatExpr zeros(int ndims, const int* sz, int type); + static UMatExpr ones(int rows, int cols, int type); + static UMatExpr ones(Size size, int type); + static UMatExpr ones(int ndims, const int* sz, int type); + static UMatExpr eye(int rows, int cols, int type); + static UMatExpr eye(Size size, int type); + + //! allocates new matrix data unless the matrix already has specified size and type. + // previous data is unreferenced if needed. + void create(int rows, int cols, int type); + void create(Size size, int type); + void create(int ndims, const int* sizes, int type); + + //! increases the reference counter; use with care to avoid memleaks + void addref(); + //! decreases reference counter; + // deallocates the data when reference counter reaches 0. + void release(); + + //! deallocates the matrix data + void deallocate(); + //! internal use function; properly re-allocates _size, _step arrays + void copySize(const UMat& m); + + //! locates matrix header within a parent matrix. See below + void locateROI( Size& wholeSize, Point& ofs ) const; + //! moves/resizes the current matrix ROI inside the parent matrix. + UMat& adjustROI( int dtop, int dbottom, int dleft, int dright ); + //! extracts a rectangular sub-matrix + // (this is a generalized form of row, rowRange etc.) + UMat operator()( Range rowRange, Range colRange ) const; + UMat operator()( const Rect& roi ) const; + UMat operator()( const Range* ranges ) const; + + //! returns true iff the matrix data is continuous + // (i.e. when there are no gaps between successive rows). + // similar to CV_IS_MAT_CONT(cvmat->type) + bool isContinuous() const; + + //! returns true if the matrix is a submatrix of another matrix + bool isSubmatrix() const; + + //! returns element size in bytes, + // similar to CV_ELEM_SIZE(cvmat->type) + size_t elemSize() const; + //! returns the size of element channel in bytes. + size_t elemSize1() const; + //! returns element type, similar to CV_MAT_TYPE(cvmat->type) + int type() const; + //! returns element type, similar to CV_MAT_DEPTH(cvmat->type) + int depth() const; + //! returns element type, similar to CV_MAT_CN(cvmat->type) + int channels() const; + //! returns step/elemSize1() + size_t step1(int i=0) const; + //! returns true if matrix data is NULL + bool empty() const; + //! returns the total number of matrix elements + size_t total() const; + + //! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise + int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const; + + void* handle(int accessFlags) const; + void ndoffset(size_t* ofs) const; + + enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG }; + enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 }; + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + //! the matrix dimensionality, >= 2 + int dims; + //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions + int rows, cols; + + //! custom allocator + MatAllocator* allocator; + //! and the standard allocator + static MatAllocator* getStdAllocator(); + + // black-box container of UMat data + UMatData* u; + + // offset of the submatrix (or 0) + size_t offset; + + MatSize size; + MatStep step; + +protected: +}; + /////////////////////////// multi-dimensional sparse matrix ////////////////////////// diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp index 671ac60a90..3c49984e7f 100644 --- a/modules/core/include/opencv2/core/mat.inl.hpp +++ b/modules/core/include/opencv2/core/mat.inl.hpp @@ -52,127 +52,244 @@ namespace cv //////////////////////// Input/Output Arrays //////////////////////// +inline void _InputArray::init(int _flags, const void* _obj) +{ flags = _flags; obj = (void*)_obj; } + +inline void _InputArray::init(int _flags, const void* _obj, Size _sz) +{ flags = _flags; obj = (void*)_obj; sz = _sz; } + +inline void* _InputArray::getObj() const { return obj; } + +inline _InputArray::_InputArray() { init(0, 0); } +inline _InputArray::_InputArray(int _flags, void* _obj) { init(_flags, _obj); } +inline _InputArray::_InputArray(const Mat& m) { init(MAT+ACCESS_READ, &m); } +inline _InputArray::_InputArray(const std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_READ, &vec); } +inline _InputArray::_InputArray(const UMat& m) { init(UMAT+ACCESS_READ, &m); } +inline _InputArray::_InputArray(const std::vector& vec) { init(STD_VECTOR_UMAT+ACCESS_READ, &vec); } + template inline _InputArray::_InputArray(const std::vector<_Tp>& vec) - : flags(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type), obj((void*)&vec) -{} +{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_READ, &vec); } template inline _InputArray::_InputArray(const std::vector >& vec) - : flags(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type), obj((void*)&vec) -{} +{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_READ, &vec); } template inline _InputArray::_InputArray(const std::vector >& vec) - : flags(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type), obj((void*)&vec) -{} +{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_READ, &vec); } template inline _InputArray::_InputArray(const Matx<_Tp, m, n>& mtx) - : flags(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type), obj((void*)&mtx), sz(n, m) -{} +{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_READ, &mtx, Size(n, m)); } template inline _InputArray::_InputArray(const _Tp* vec, int n) - : flags(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type), obj((void*)vec), sz(n, 1) -{} +{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_READ, vec, Size(n, 1)); } template inline _InputArray::_InputArray(const Mat_<_Tp>& m) - : flags(FIXED_TYPE + MAT + DataType<_Tp>::type), obj((void*)&m) -{} +{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_READ, &m); } +inline _InputArray::_InputArray(const double& val) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F + ACCESS_READ, &val, Size(1,1)); } + +inline _InputArray::_InputArray(const MatExpr& expr) +{ init(FIXED_TYPE + FIXED_SIZE + EXPR + ACCESS_READ, &expr); } + +inline _InputArray::_InputArray(const cuda::GpuMat& d_mat) +{ init(GPU_MAT + ACCESS_READ, &d_mat); } + +inline _InputArray::_InputArray(const ogl::Buffer& buf) +{ init(OPENGL_BUFFER + ACCESS_READ, &buf); } + +inline _InputArray::_InputArray(const cuda::CudaMem& cuda_mem) +{ init(CUDA_MEM + ACCESS_READ, &cuda_mem); } + +inline _InputArray::~_InputArray() {} + +//////////////////////////////////////////////////////////////////////////////////////// + +inline _OutputArray::_OutputArray() { init(ACCESS_WRITE, 0); } +inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags|ACCESS_WRITE, _obj); } +inline _OutputArray::_OutputArray(Mat& m) { init(MAT+ACCESS_WRITE, &m); } +inline _OutputArray::_OutputArray(std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_WRITE, &vec); } +inline _OutputArray::_OutputArray(UMat& m) { init(UMAT+ACCESS_WRITE, &m); } +inline _OutputArray::_OutputArray(std::vector& vec) { init(STD_VECTOR_UMAT+ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(std::vector<_Tp>& vec) - : _InputArray(vec) -{} +{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(std::vector >& vec) - : _InputArray(vec) -{} +{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(std::vector >& vec) - : _InputArray(vec) -{} +{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(Mat_<_Tp>& m) - : _InputArray(m) -{} +{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_WRITE, &m); } template inline _OutputArray::_OutputArray(Matx<_Tp, m, n>& mtx) - : _InputArray(mtx) -{} +{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, &mtx, Size(n, m)); } template inline _OutputArray::_OutputArray(_Tp* vec, int n) - : _InputArray(vec, n) -{} +{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, vec, Size(n, 1)); } template inline _OutputArray::_OutputArray(const std::vector<_Tp>& vec) - : _InputArray(vec) -{ - flags |= FIXED_SIZE; -} +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(const std::vector >& vec) - : _InputArray(vec) -{ - flags |= FIXED_SIZE; -} +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(const std::vector >& vec) - : _InputArray(vec) -{ - flags |= FIXED_SIZE; -} +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_WRITE, &vec); } template inline _OutputArray::_OutputArray(const Mat_<_Tp>& m) - : _InputArray(m) -{ - flags |= FIXED_SIZE; -} +{ init(FIXED_TYPE + FIXED_SIZE + MAT + DataType<_Tp>::type + ACCESS_WRITE, &m); } template inline _OutputArray::_OutputArray(const Matx<_Tp, m, n>& mtx) - : _InputArray(mtx) -{} +{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, &mtx, Size(n, m)); } template inline _OutputArray::_OutputArray(const _Tp* vec, int n) - : _InputArray(vec, n) -{} +{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, vec, Size(n, 1)); } + +inline _OutputArray::_OutputArray(cuda::GpuMat& d_mat) +{ init(GPU_MAT + ACCESS_WRITE, &d_mat); } + +inline _OutputArray::_OutputArray(ogl::Buffer& buf) +{ init(OPENGL_BUFFER + ACCESS_WRITE, &buf); } + +inline _OutputArray::_OutputArray(cuda::CudaMem& cuda_mem) +{ init(CUDA_MEM + ACCESS_WRITE, &cuda_mem); } + +inline _OutputArray::_OutputArray(const Mat& m) +{ init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_WRITE, &m); } + +inline _OutputArray::_OutputArray(const std::vector& vec) +{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_WRITE, &vec); } + +inline _OutputArray::_OutputArray(const cuda::GpuMat& d_mat) +{ init(FIXED_TYPE + FIXED_SIZE + GPU_MAT + ACCESS_WRITE, &d_mat); } + +inline _OutputArray::_OutputArray(const ogl::Buffer& buf) +{ init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_WRITE, &buf); } +inline _OutputArray::_OutputArray(const cuda::CudaMem& cuda_mem) +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_MEM + ACCESS_WRITE, &cuda_mem); } +/////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////// Mat //////////////////////////////// +inline _InputOutputArray::_InputOutputArray() { init(ACCESS_RW, 0); } +inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags|ACCESS_RW, _obj); } +inline _InputOutputArray::_InputOutputArray(Mat& m) { init(MAT+ACCESS_RW, &m); } +inline _InputOutputArray::_InputOutputArray(std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_RW, &vec); } +inline _InputOutputArray::_InputOutputArray(UMat& m) { init(UMAT+ACCESS_RW, &m); } +inline _InputOutputArray::_InputOutputArray(std::vector& vec) { init(STD_VECTOR_UMAT+ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec) +{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(std::vector >& vec) +{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(Mat_<_Tp>& m) +{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_RW, &m); } + +template inline +_InputOutputArray::_InputOutputArray(Matx<_Tp, m, n>& mtx) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, &mtx, Size(n, m)); } + +template inline +_InputOutputArray::_InputOutputArray(_Tp* vec, int n) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, vec, Size(n, 1)); } + +template inline +_InputOutputArray::_InputOutputArray(const std::vector<_Tp>& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(const std::vector >& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(const std::vector >& vec) +{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_RW, &vec); } + +template inline +_InputOutputArray::_InputOutputArray(const Mat_<_Tp>& m) +{ init(FIXED_TYPE + FIXED_SIZE + MAT + DataType<_Tp>::type + ACCESS_RW, &m); } + +template inline +_InputOutputArray::_InputOutputArray(const Matx<_Tp, m, n>& mtx) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, &mtx, Size(n, m)); } + +template inline +_InputOutputArray::_InputOutputArray(const _Tp* vec, int n) +{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, vec, Size(n, 1)); } + +inline _InputOutputArray::_InputOutputArray(cuda::GpuMat& d_mat) +{ init(GPU_MAT + ACCESS_RW, &d_mat); } + +inline _InputOutputArray::_InputOutputArray(ogl::Buffer& buf) +{ init(OPENGL_BUFFER + ACCESS_RW, &buf); } + +inline _InputOutputArray::_InputOutputArray(cuda::CudaMem& cuda_mem) +{ init(CUDA_MEM + ACCESS_RW, &cuda_mem); } + +inline _InputOutputArray::_InputOutputArray(const Mat& m) +{ init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_RW, &m); } + +inline _InputOutputArray::_InputOutputArray(const std::vector& vec) +{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_RW, &vec); } + +inline _InputOutputArray::_InputOutputArray(const cuda::GpuMat& d_mat) +{ init(FIXED_TYPE + FIXED_SIZE + GPU_MAT + ACCESS_RW, &d_mat); } + +inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf) +{ init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_RW, &buf); } + +inline _InputOutputArray::_InputOutputArray(const cuda::CudaMem& cuda_mem) +{ init(FIXED_TYPE + FIXED_SIZE + CUDA_MEM + ACCESS_RW, &cuda_mem); } + +//////////////////////////////////////////// Mat ////////////////////////////////////////// inline Mat::Mat() - : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), - datalimit(0), allocator(0), size(&rows) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), + datalimit(0), allocator(0), u(0), size(&rows) {} inline Mat::Mat(int _rows, int _cols, int _type) - : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), - datalimit(0), allocator(0), size(&rows) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), + datalimit(0), allocator(0), u(0), size(&rows) { create(_rows, _cols, _type); } inline Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s) - : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), - datalimit(0), allocator(0), size(&rows) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), + datalimit(0), allocator(0), u(0), size(&rows) { create(_rows, _cols, _type); *this = _s; @@ -180,16 +297,16 @@ Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s) inline Mat::Mat(Size _sz, int _type) - : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), - datalimit(0), allocator(0), size(&rows) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), + datalimit(0), allocator(0), u(0), size(&rows) { create( _sz.height, _sz.width, _type ); } inline Mat::Mat(Size _sz, int _type, const Scalar& _s) - : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), - datalimit(0), allocator(0), size(&rows) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), + datalimit(0), allocator(0), u(0), size(&rows) { create(_sz.height, _sz.width, _type); *this = _s; @@ -197,16 +314,16 @@ Mat::Mat(Size _sz, int _type, const Scalar& _s) inline Mat::Mat(int _dims, const int* _sz, int _type) - : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), - datalimit(0), allocator(0), size(&rows) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), + datalimit(0), allocator(0), u(0), size(&rows) { create(_dims, _sz, _type); } inline Mat::Mat(int _dims, const int* _sz, int _type, const Scalar& _s) - : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), - datalimit(0), allocator(0), size(&rows) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), + datalimit(0), allocator(0), u(0), size(&rows) { create(_dims, _sz, _type); *this = _s; @@ -214,12 +331,12 @@ Mat::Mat(int _dims, const int* _sz, int _type, const Scalar& _s) inline Mat::Mat(const Mat& m) - : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data), refcount(m.refcount), + : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data), datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator), - size(&rows) + u(m.u), size(&rows) { - if( refcount ) - CV_XADD(refcount, 1); + if( u ) + CV_XADD(&u->refcount, 1); if( m.dims <= 2 ) { step[0] = m.step[0]; step[1] = m.step[1]; @@ -234,8 +351,8 @@ Mat::Mat(const Mat& m) inline Mat::Mat(int _rows, int _cols, int _type, void* _data, size_t _step) : flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_rows), cols(_cols), - data((uchar*)_data), refcount(0), datastart((uchar*)_data), dataend(0), datalimit(0), - allocator(0), size(&rows) + data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0), + allocator(0), u(0), size(&rows) { size_t esz = CV_ELEM_SIZE(_type); size_t minstep = cols * esz; @@ -259,8 +376,8 @@ Mat::Mat(int _rows, int _cols, int _type, void* _data, size_t _step) inline Mat::Mat(Size _sz, int _type, void* _data, size_t _step) : flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_sz.height), cols(_sz.width), - data((uchar*)_data), refcount(0), datastart((uchar*)_data), dataend(0), datalimit(0), - allocator(0), size(&rows) + data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0), + allocator(0), u(0), size(&rows) { size_t esz = CV_ELEM_SIZE(_type); size_t minstep = cols*esz; @@ -284,7 +401,7 @@ Mat::Mat(Size _sz, int _type, void* _data, size_t _step) template inline Mat::Mat(const std::vector<_Tp>& vec, bool copyData) : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()), - cols(1), data(0), refcount(0), datastart(0), dataend(0), allocator(0), size(&rows) + cols(1), data(0), datastart(0), dataend(0), allocator(0), u(0), size(&rows) { if(vec.empty()) return; @@ -301,7 +418,7 @@ Mat::Mat(const std::vector<_Tp>& vec, bool copyData) template inline Mat::Mat(const Vec<_Tp, n>& vec, bool copyData) : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(n), cols(1), data(0), - refcount(0), datastart(0), dataend(0), allocator(0), size(&rows) + datastart(0), dataend(0), allocator(0), u(0), size(&rows) { if( !copyData ) { @@ -317,7 +434,7 @@ Mat::Mat(const Vec<_Tp, n>& vec, bool copyData) template inline Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData) : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0), - refcount(0), datastart(0), dataend(0), allocator(0), size(&rows) + datastart(0), dataend(0), allocator(0), u(0), size(&rows) { if( !copyData ) { @@ -333,7 +450,7 @@ Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData) template inline Mat::Mat(const Point_<_Tp>& pt, bool copyData) : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(2), cols(1), data(0), - refcount(0), datastart(0), dataend(0), allocator(0), size(&rows) + datastart(0), dataend(0), allocator(0), u(0), size(&rows) { if( !copyData ) { @@ -352,7 +469,7 @@ Mat::Mat(const Point_<_Tp>& pt, bool copyData) template inline Mat::Mat(const Point3_<_Tp>& pt, bool copyData) : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(3), cols(1), data(0), - refcount(0), datastart(0), dataend(0), allocator(0), size(&rows) + datastart(0), dataend(0), allocator(0), u(0), size(&rows) { if( !copyData ) { @@ -372,7 +489,7 @@ Mat::Mat(const Point3_<_Tp>& pt, bool copyData) template inline Mat::Mat(const MatCommaInitializer_<_Tp>& commaInitializer) : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(0), rows(0), cols(0), data(0), - refcount(0), datastart(0), dataend(0), allocator(0), size(&rows) + datastart(0), dataend(0), allocator(0), u(0), size(&rows) { *this = commaInitializer.operator Mat_<_Tp>(); } @@ -390,8 +507,8 @@ Mat& Mat::operator = (const Mat& m) { if( this != &m ) { - if( m.refcount ) - CV_XADD(m.refcount, 1); + if( m.u ) + CV_XADD(&m.u->refcount, 1); release(); flags = m.flags; if( dims <= 2 && m.dims <= 2 ) @@ -408,8 +525,8 @@ Mat& Mat::operator = (const Mat& m) datastart = m.datastart; dataend = m.dataend; datalimit = m.datalimit; - refcount = m.refcount; allocator = m.allocator; + u = m.u; } return *this; } @@ -486,17 +603,17 @@ void Mat::create(Size _sz, int _type) inline void Mat::addref() { - if( refcount ) - CV_XADD(refcount, 1); + if( u ) + CV_XADD(&u->refcount, 1); } inline void Mat::release() { - if( refcount && CV_XADD(refcount, -1) == 1 ) + if( u && CV_XADD(&u->refcount, -1) == 1 ) deallocate(); data = datastart = dataend = datalimit = 0; size.p[0] = 0; - refcount = 0; + u = 0; } inline @@ -913,41 +1030,39 @@ void Mat::push_back(const Mat_<_Tp>& m) push_back((const Mat&)m); } - - -///////////////////////////// Mat::MSize //////////////////////////// +///////////////////////////// MatSize //////////////////////////// inline -Mat::MSize::MSize(int* _p) +MatSize::MatSize(int* _p) : p(_p) {} inline -Size Mat::MSize::operator()() const +Size MatSize::operator()() const { CV_DbgAssert(p[-1] <= 2); return Size(p[1], p[0]); } inline -const int& Mat::MSize::operator[](int i) const +const int& MatSize::operator[](int i) const { return p[i]; } inline -int& Mat::MSize::operator[](int i) +int& MatSize::operator[](int i) { return p[i]; } inline -Mat::MSize::operator const int*() const +MatSize::operator const int*() const { return p; } inline -bool Mat::MSize::operator == (const MSize& sz) const +bool MatSize::operator == (const MatSize& sz) const { int d = p[-1]; int dsz = sz.p[-1]; @@ -963,46 +1078,46 @@ bool Mat::MSize::operator == (const MSize& sz) const } inline -bool Mat::MSize::operator != (const MSize& sz) const +bool MatSize::operator != (const MatSize& sz) const { return !(*this == sz); } -///////////////////////////// Mat::MStep //////////////////////////// +///////////////////////////// MatStep //////////////////////////// inline -Mat::MStep::MStep() +MatStep::MatStep() { p = buf; p[0] = p[1] = 0; } inline -Mat::MStep::MStep(size_t s) +MatStep::MatStep(size_t s) { p = buf; p[0] = s; p[1] = 0; } inline -const size_t& Mat::MStep::operator[](int i) const +const size_t& MatStep::operator[](int i) const { return p[i]; } inline -size_t& Mat::MStep::operator[](int i) +size_t& MatStep::operator[](int i) { return p[i]; } -inline Mat::MStep::operator size_t() const +inline MatStep::operator size_t() const { CV_DbgAssert( p == buf ); return buf[0]; } -inline Mat::MStep& Mat::MStep::operator = (size_t s) +inline MatStep& MatStep::operator = (size_t s) { CV_DbgAssert( p == buf ); buf[0] = s; @@ -1438,43 +1553,6 @@ MatIterator_<_Tp> Mat_<_Tp>::end() } -/*template inline -void process( const Mat_& m1, Mat_& m2, Op op ) -{ - int y, x, rows = m1.rows, cols = m1.cols; - - CV_DbgAssert( m1.size() == m2.size() ); - - for( y = 0; y < rows; y++ ) - { - const T1* src = m1[y]; - T2* dst = m2[y]; - - for( x = 0; x < cols; x++ ) - dst[x] = op(src[x]); - } -} - -template inline -void process( const Mat_& m1, const Mat_& m2, Mat_& m3, Op op ) -{ - int y, x, rows = m1.rows, cols = m1.cols; - - CV_DbgAssert( m1.size() == m2.size() ); - - for( y = 0; y < rows; y++ ) - { - const T1* src1 = m1[y]; - const T2* src2 = m2[y]; - T3* dst = m3[y]; - - for( x = 0; x < cols; x++ ) - dst[x] = op( src1[x], src2[x] ); - } -}*/ - - - ///////////////////////////// SparseMat ///////////////////////////// inline @@ -2956,6 +3034,320 @@ const Mat_<_Tp>& operator /= (const Mat_<_Tp>& a, const MatExpr& b) return a; } + +//////////////////////////////// UMat //////////////////////////////// + +inline +UMat::UMat() +: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows) +{} + +inline +UMat::UMat(int _rows, int _cols, int _type) +: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows) +{ + create(_rows, _cols, _type); +} + +inline +UMat::UMat(int _rows, int _cols, int _type, const Scalar& _s) +: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows) +{ + create(_rows, _cols, _type); + *this = _s; +} + +inline +UMat::UMat(Size _sz, int _type) +: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows) +{ + create( _sz.height, _sz.width, _type ); +} + +inline +UMat::UMat(Size _sz, int _type, const Scalar& _s) +: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows) +{ + create(_sz.height, _sz.width, _type); + *this = _s; +} + +inline +UMat::UMat(int _dims, const int* _sz, int _type) +: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows) +{ + create(_dims, _sz, _type); +} + +inline +UMat::UMat(int _dims, const int* _sz, int _type, const Scalar& _s) +: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows) +{ + create(_dims, _sz, _type); + *this = _s; +} + +inline +UMat::UMat(const UMat& m) +: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator), +u(m.u), offset(m.offset), size(&rows) +{ + if( u ) + CV_XADD(&(u->urefcount), 1); + if( m.dims <= 2 ) + { + step[0] = m.step[0]; step[1] = m.step[1]; + } + else + { + dims = 0; + copySize(m); + } +} + + +template inline +UMat::UMat(const std::vector<_Tp>& vec, bool copyData) +: flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()), +cols(1), allocator(0), u(0), offset(0), size(&rows) +{ + if(vec.empty()) + return; + if( !copyData ) + { + // !!!TODO!!! + CV_Error(Error::StsNotImplemented, ""); + } + else + Mat((int)vec.size(), 1, DataType<_Tp>::type, (uchar*)&vec[0]).copyTo(*this); +} + + +inline +UMat::~UMat() +{ + release(); + if( step.p != step.buf ) + fastFree(step.p); +} + +inline +UMat& UMat::operator = (const UMat& m) +{ + if( this != &m ) + { + if( m.u ) + CV_XADD(&(m.u->urefcount), 1); + release(); + flags = m.flags; + if( dims <= 2 && m.dims <= 2 ) + { + dims = m.dims; + rows = m.rows; + cols = m.cols; + step[0] = m.step[0]; + step[1] = m.step[1]; + } + else + copySize(m); + allocator = m.allocator; + u = m.u; + offset = m.offset; + } + return *this; +} + +inline +UMat UMat::row(int y) const +{ + return UMat(*this, Range(y, y + 1), Range::all()); +} + +inline +UMat UMat::col(int x) const +{ + return UMat(*this, Range::all(), Range(x, x + 1)); +} + +inline +UMat UMat::rowRange(int startrow, int endrow) const +{ + return UMat(*this, Range(startrow, endrow), Range::all()); +} + +inline +UMat UMat::rowRange(const Range& r) const +{ + return UMat(*this, r, Range::all()); +} + +inline +UMat UMat::colRange(int startcol, int endcol) const +{ + return UMat(*this, Range::all(), Range(startcol, endcol)); +} + +inline +UMat UMat::colRange(const Range& r) const +{ + return UMat(*this, Range::all(), r); +} + +inline +UMat UMat::clone() const +{ + UMat m; + copyTo(m); + return m; +} + +inline +void UMat::assignTo( UMat& m, int _type ) const +{ + if( _type < 0 ) + m = *this; + else + convertTo(m, _type); +} + +inline +void UMat::create(int _rows, int _cols, int _type) +{ + _type &= TYPE_MASK; + if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && u ) + return; + int sz[] = {_rows, _cols}; + create(2, sz, _type); +} + +inline +void UMat::create(Size _sz, int _type) +{ + create(_sz.height, _sz.width, _type); +} + +inline +void UMat::addref() +{ + if( u ) + CV_XADD(&(u->urefcount), 1); +} + +inline void UMat::release() +{ + if( u && CV_XADD(&(u->urefcount), -1) == 1 ) + deallocate(); + size.p[0] = 0; + u = 0; +} + +inline +UMat UMat::operator()( Range _rowRange, Range _colRange ) const +{ + return UMat(*this, _rowRange, _colRange); +} + +inline +UMat UMat::operator()( const Rect& roi ) const +{ + return UMat(*this, roi); +} + +inline +UMat UMat::operator()(const Range* ranges) const +{ + return UMat(*this, ranges); +} + +inline +bool UMat::isContinuous() const +{ + return (flags & CONTINUOUS_FLAG) != 0; +} + +inline +bool UMat::isSubmatrix() const +{ + return (flags & SUBMATRIX_FLAG) != 0; +} + +inline +size_t UMat::elemSize() const +{ + return dims > 0 ? step.p[dims - 1] : 0; +} + +inline +size_t UMat::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +int UMat::type() const +{ + return CV_MAT_TYPE(flags); +} + +inline +int UMat::depth() const +{ + return CV_MAT_DEPTH(flags); +} + +inline +int UMat::channels() const +{ + return CV_MAT_CN(flags); +} + +inline +size_t UMat::step1(int i) const +{ + return step.p[i] / elemSize1(); +} + +inline +bool UMat::empty() const +{ + return u == 0 || total() == 0; +} + +inline +size_t UMat::total() const +{ + if( dims <= 2 ) + return (size_t)rows * cols; + size_t p = 1; + for( int i = 0; i < dims; i++ ) + p *= size[i]; + return p; +} + +inline bool UMatData::hostCopyObsolete() const { return (flags & HOST_COPY_OBSOLETE) != 0; } +inline bool UMatData::deviceCopyObsolete() const { return (flags & DEVICE_COPY_OBSOLETE) != 0; } +inline bool UMatData::copyOnMap() const { return (flags & COPY_ON_MAP) != 0; } +inline bool UMatData::tempUMat() const { return (flags & TEMP_UMAT) != 0; } +inline bool UMatData::tempCopiedUMat() const { return (flags & TEMP_COPIED_UMAT) == TEMP_COPIED_UMAT; } + +inline void UMatData::markHostCopyObsolete(bool flag) +{ + if(flag) + flags |= HOST_COPY_OBSOLETE; + else + flags &= ~HOST_COPY_OBSOLETE; +} +inline void UMatData::markDeviceCopyObsolete(bool flag) +{ + if(flag) + flags |= DEVICE_COPY_OBSOLETE; + else + flags &= ~DEVICE_COPY_OBSOLETE; +} + +inline UMatDataAutoLock::UMatDataAutoLock(UMatData* _u) : u(_u) { u->lock(); } +inline UMatDataAutoLock::~UMatDataAutoLock() { u->unlock(); } + } //cv #endif diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp new file mode 100644 index 0000000000..419ccffd5b --- /dev/null +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -0,0 +1,451 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_OPENCL_HPP__ +#define __OPENCV_OPENCL_HPP__ + +#include "opencv2/core.hpp" + +namespace cv { namespace ocl { + +CV_EXPORTS bool haveOpenCL(); +CV_EXPORTS bool useOpenCL(); +CV_EXPORTS void setUseOpenCL(bool flag); +CV_EXPORTS void finish(); + +class CV_EXPORTS Context; +class CV_EXPORTS Device; +class CV_EXPORTS Kernel; +class CV_EXPORTS Program; +class CV_EXPORTS ProgramSource; +class CV_EXPORTS Queue; + +class CV_EXPORTS Device +{ +public: + Device(); + explicit Device(void* d); + Device(const Device& d); + Device& operator = (const Device& d); + ~Device(); + + void set(void* d); + + enum + { + TYPE_DEFAULT = (1 << 0), + TYPE_CPU = (1 << 1), + TYPE_GPU = (1 << 2), + TYPE_ACCELERATOR = (1 << 3), + TYPE_DGPU = TYPE_GPU + (1 << 16), + TYPE_IGPU = TYPE_GPU + (1 << 17), + TYPE_ALL = 0xFFFFFFFF + }; + + String name() const; + String extensions() const; + String vendor() const; + String OpenCL_C_Version() const; + String OpenCLVersion() const; + String driverVersion() const; + void* ptr() const; + + int type() const; + + int addressBits() const; + bool available() const; + bool compilerAvailable() const; + bool linkerAvailable() const; + + enum + { + FP_DENORM=(1 << 0), + FP_INF_NAN=(1 << 1), + FP_ROUND_TO_NEAREST=(1 << 2), + FP_ROUND_TO_ZERO=(1 << 3), + FP_ROUND_TO_INF=(1 << 4), + FP_FMA=(1 << 5), + FP_SOFT_FLOAT=(1 << 6), + FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7) + }; + int doubleFPConfig() const; + int singleFPConfig() const; + int halfFPConfig() const; + + bool endianLittle() const; + bool errorCorrectionSupport() const; + + enum + { + EXEC_KERNEL=(1 << 0), + EXEC_NATIVE_KERNEL=(1 << 1) + }; + int executionCapabilities() const; + + size_t globalMemCacheSize() const; + + enum + { + NO_CACHE=0, + READ_ONLY_CACHE=1, + READ_WRITE_CACHE=2 + }; + int globalMemCacheType() const; + int globalMemCacheLineSize() const; + size_t globalMemSize() const; + + size_t localMemSize() const; + enum + { + NO_LOCAL_MEM=0, + LOCAL_IS_LOCAL=1, + LOCAL_IS_GLOBAL=2 + }; + int localMemType() const; + bool hostUnifiedMemory() const; + + bool imageSupport() const; + + size_t image2DMaxWidth() const; + size_t image2DMaxHeight() const; + + size_t image3DMaxWidth() const; + size_t image3DMaxHeight() const; + size_t image3DMaxDepth() const; + + size_t imageMaxBufferSize() const; + size_t imageMaxArraySize() const; + + int maxClockFrequency() const; + int maxComputeUnits() const; + int maxConstantArgs() const; + size_t maxConstantBufferSize() const; + + size_t maxMemAllocSize() const; + size_t maxParameterSize() const; + + int maxReadImageArgs() const; + int maxWriteImageArgs() const; + int maxSamplers() const; + + size_t maxWorkGroupSize() const; + int maxWorkItemDims() const; + void maxWorkItemSizes(size_t*) const; + + int memBaseAddrAlign() const; + + int nativeVectorWidthChar() const; + int nativeVectorWidthShort() const; + int nativeVectorWidthInt() const; + int nativeVectorWidthLong() const; + int nativeVectorWidthFloat() const; + int nativeVectorWidthDouble() const; + int nativeVectorWidthHalf() const; + + int preferredVectorWidthChar() const; + int preferredVectorWidthShort() const; + int preferredVectorWidthInt() const; + int preferredVectorWidthLong() const; + int preferredVectorWidthFloat() const; + int preferredVectorWidthDouble() const; + int preferredVectorWidthHalf() const; + + size_t printfBufferSize() const; + size_t profilingTimerResolution() const; + + static const Device& getDefault(); + +protected: + struct Impl; + Impl* p; +}; + + +class CV_EXPORTS Context +{ +public: + Context(); + explicit Context(int dtype); + ~Context(); + Context(const Context& c); + Context& operator = (const Context& c); + + bool create(int dtype); + size_t ndevices() const; + const Device& device(size_t idx) const; + Program getProg(const ProgramSource& prog, + const String& buildopt, String& errmsg); + + static Context& getDefault(); + void* ptr() const; +protected: + struct Impl; + Impl* p; +}; + + +class CV_EXPORTS Queue +{ +public: + Queue(); + explicit Queue(const Context& c, const Device& d=Device()); + ~Queue(); + Queue(const Queue& q); + Queue& operator = (const Queue& q); + + bool create(const Context& c=Context(), const Device& d=Device()); + void finish(); + void* ptr() const; + static Queue& getDefault(); + +protected: + struct Impl; + Impl* p; +}; + + +class CV_EXPORTS KernelArg +{ +public: + enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8 }; + KernelArg(int _flags, UMat* _m, void* _obj=0, size_t _sz=0); + + static KernelArg Local() { return KernelArg(LOCAL, 0); } + static KernelArg ReadOnly(const UMat& m) { return KernelArg(READ_ONLY, (UMat*)&m); } + static KernelArg WriteOnly(const UMat& m) { return KernelArg(WRITE_ONLY, (UMat*)&m); } + static KernelArg Constant(const Mat& m); + template static KernelArg Constant(const _Tp* arr, size_t n) + { return KernelArg(CONSTANT, 0, (void*)arr, n); } + + int flags; + UMat* m; + void* obj; + size_t sz; +}; + +class CV_EXPORTS Kernel +{ +public: + Kernel(); + Kernel(const char* kname, const Program& prog); + Kernel(const char* kname, const ProgramSource& prog, + const String& buildopts, String& errmsg); + ~Kernel(); + Kernel(const Kernel& k); + Kernel& operator = (const Kernel& k); + + bool create(const char* kname, const Program& prog); + bool create(const char* kname, const ProgramSource& prog, + const String& buildopts, String& errmsg); + + void set(int i, const void* value, size_t sz); + void set(int i, const UMat& m); + void set(int i, const KernelArg& arg); + template void set(int i, const _Tp& value) + { return set(i, &value, sizeof(value)); } + + template + Kernel& args(const _Tp0& a0) + { + set(0, a0); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1) + { + set(0, a0); set(1, a1); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2) + { + set(0, a0); set(1, a1); set(2, a2); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3) + { + set(0, a0); set(1, a1); set(2, a2); set(3, a3); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, + const _Tp3& a3, const _Tp4& a4) + { + set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, + const _Tp3& a3, const _Tp4& a4, const _Tp5& a5) + { + set(0, a0); set(1, a1); set(2, a2); + set(3, a3); set(4, a4); set(5, a5); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, + const _Tp4& a4, const _Tp5& a5, const _Tp6& a6) + { + set(0, a0); set(1, a1); set(2, a2); set(3, a3); + set(4, a4); set(5, a5); set(6, a6); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, + const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7) + { + set(0, a0); set(1, a1); set(2, a2); set(3, a3); + set(4, a4); set(5, a5); set(6, a6); set(7, a7); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, + const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, + const _Tp8& a8) + { + set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); + set(5, a5); set(6, a6); set(7, a7); set(8, a8); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, + const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, + const _Tp8& a8, const _Tp9& a9) + { + set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5); + set(6, a6); set(7, a7); set(8, a8); set(9, a9); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, + const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, + const _Tp8& a8, const _Tp9& a9, const _Tp10& a10) + { + set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5); + set(6, a6); set(7, a7); set(8, a8); set(9, a9); set(10, a10); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, + const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, + const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11) + { + set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5); + set(6, a6); set(7, a7); set(8, a8); set(9, a9); set(10, a10); set(11, a11); return *this; + } + + void run(int dims, size_t offset[], size_t globalsize[], + size_t localsize[], bool sync, const Queue& q=Queue()); + void runTask(bool sync, const Queue& q=Queue()); + + size_t workGroupSize() const; + bool compileWorkGroupSize(size_t wsz[]) const; + size_t localMemSize() const; + + void* ptr() const; + struct Impl; + +protected: + Impl* p; +}; + +class CV_EXPORTS Program +{ +public: + Program(); + Program(const ProgramSource& src, + const String& buildflags, String& errmsg); + explicit Program(const String& buf); + Program(const Program& prog); + + Program& operator = (const Program& prog); + ~Program(); + + bool create(const ProgramSource& src, + const String& buildflags, String& errmsg); + bool read(const String& buf, const String& buildflags); + bool write(String& buf) const; + + const ProgramSource& source() const; + void* ptr() const; + + String getPrefix() const; + static String getPrefix(const String& buildflags); + +protected: + struct Impl; + Impl* p; +}; + + +class CV_EXPORTS ProgramSource +{ +public: + typedef uint64 hash_t; + + ProgramSource(); + explicit ProgramSource(const String& prog); + explicit ProgramSource(const char* prog); + ~ProgramSource(); + ProgramSource(const ProgramSource& prog); + ProgramSource& operator = (const ProgramSource& prog); + + const String& source() const; + hash_t hash() const; + +protected: + struct Impl; + Impl* p; +}; + +}} + +#endif diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index 5cd6894600..b6c413040e 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -271,7 +271,7 @@ void cv::split(InputArray _m, OutputArrayOfArrays _mv) _mv.release(); return; } - CV_Assert( !_mv.fixedType() || CV_MAT_TYPE(_mv.flags) == m.depth() ); + CV_Assert( !_mv.fixedType() || _mv.empty() || _mv.type() == m.depth() ); _mv.create(m.channels(), 1, m.depth()); Mat* dst = &_mv.getMatRef(0); split(m, dst); diff --git a/modules/core/src/matop.cpp b/modules/core/src/matop.cpp index 016435650a..243c821a59 100644 --- a/modules/core/src/matop.cpp +++ b/modules/core/src/matop.cpp @@ -1610,7 +1610,7 @@ MatExpr Mat::mul(InputArray m, double scale) const MatExpr e; if(m.kind() == _InputArray::EXPR) { - const MatExpr& me = *(const MatExpr*)m.obj; + const MatExpr& me = *(const MatExpr*)m.getObj(); me.op->multiply(MatExpr(*this), me, e, scale); } else diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 8bfa925574..e64bae42c9 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -48,6 +48,156 @@ namespace cv { +class StdMatAllocator : public MatAllocator +{ +public: + UMatData* allocate(int dims, const int* sizes, int type, size_t* step) const + { + size_t total = CV_ELEM_SIZE(type); + for( int i = dims-1; i >= 0; i-- ) + { + if( step ) + step[i] = total; + total *= sizes[i]; + } + uchar* data = (uchar*)fastMalloc(total); + UMatData* u = new UMatData(this); + u->data = u->origdata = data; + u->size = total; + u->refcount = 1; + + return u; + } + + bool allocate(UMatData* u, int accessFlags) const + { + if(!u) return false; + if(u->handle != 0) + return true; + return UMat::getStdAllocator()->allocate(u, accessFlags); + } + + void deallocate(UMatData* u) const + { + if(u) + { + fastFree(u->origdata); + delete u; + } + } + + void map(UMatData*, int) const + { + } + + void unmap(UMatData* u) const + { + if(u->urefcount == 0) + deallocate(u); + } + + void download(UMatData* u, void* dstptr, + int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dststep[]) const + { + if(!u) + return; + int isz[CV_MAX_DIM]; + uchar* srcptr = u->data; + for( int i = 0; i < dims; i++ ) + { + CV_Assert( sz[i] <= (size_t)INT_MAX ); + if( sz[i] == 0 ) + return; + if( srcofs ) + srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1); + isz[i] = (int)sz[i]; + } + + Mat src(dims, isz, CV_8U, srcptr, srcstep); + Mat dst(dims, isz, CV_8U, dstptr, dststep); + + const Mat* arrays[] = { &src, &dst }; + uchar* ptrs[2]; + NAryMatIterator it(arrays, ptrs, 2); + size_t j, planesz = it.size; + + for( j = 0; j < it.nplanes; j++, ++it ) + memcpy(ptrs[1], ptrs[0], planesz); + } + + void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[], + const size_t dstofs[], const size_t dststep[], + const size_t srcstep[]) const + { + if(!u) + return; + int isz[CV_MAX_DIM]; + uchar* dstptr = u->data; + for( int i = 0; i < dims; i++ ) + { + CV_Assert( sz[i] <= (size_t)INT_MAX ); + if( sz[i] == 0 ) + return; + if( dstofs ) + dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1); + isz[i] = (int)sz[i]; + } + + Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep); + Mat dst(dims, isz, CV_8U, dstptr, dststep); + + const Mat* arrays[] = { &src, &dst }; + uchar* ptrs[2]; + NAryMatIterator it(arrays, ptrs, 2); + size_t j, planesz = it.size; + + for( j = 0; j < it.nplanes; j++, ++it ) + memcpy(ptrs[1], ptrs[0], planesz); + } + + void copy(UMatData* usrc, UMatData* udst, int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dstofs[], const size_t dststep[], bool) const + { + if(!usrc || !udst) + return; + int isz[CV_MAX_DIM]; + uchar* srcptr = usrc->data; + uchar* dstptr = udst->data; + for( int i = 0; i < dims; i++ ) + { + CV_Assert( sz[i] <= (size_t)INT_MAX ); + if( sz[i] == 0 ) + return; + if( srcofs ) + srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1); + if( dstofs ) + dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1); + isz[i] = (int)sz[i]; + } + + Mat src(dims, isz, CV_8U, srcptr, srcstep); + Mat dst(dims, isz, CV_8U, dstptr, dststep); + + const Mat* arrays[] = { &src, &dst }; + uchar* ptrs[2]; + NAryMatIterator it(arrays, ptrs, 2); + size_t j, planesz = it.size; + + for( j = 0; j < it.nplanes; j++, ++it ) + memcpy(ptrs[1], ptrs[0], planesz); + } +}; + + +MatAllocator* Mat::getStdAllocator() +{ + static StdMatAllocator allocator; + return &allocator; +} + void swap( Mat& a, Mat& b ) { std::swap(a.flags, b.flags); @@ -55,11 +205,11 @@ void swap( Mat& a, Mat& b ) std::swap(a.rows, b.rows); std::swap(a.cols, b.cols); std::swap(a.data, b.data); - std::swap(a.refcount, b.refcount); std::swap(a.datastart, b.datastart); std::swap(a.dataend, b.dataend); std::swap(a.datalimit, b.datalimit); std::swap(a.allocator, b.allocator); + std::swap(a.u, b.u); std::swap(a.size.p, b.size.p); std::swap(a.step.p, b.step.p); @@ -161,6 +311,8 @@ static void finalizeHdr(Mat& m) int d = m.dims; if( d > 2 ) m.rows = m.cols = -1; + if(m.u) + m.data = m.datastart = m.u->data; if( m.data ) { m.datalimit = m.datastart + m.size[0]*m.step[0]; @@ -203,36 +355,25 @@ void Mat::create(int d, const int* _sizes, int _type) if( total() > 0 ) { + MatAllocator *a = allocator, *a0 = getStdAllocator(); #ifdef HAVE_TGPU - if( !allocator || allocator == tegra::getAllocator() ) allocator = tegra::getAllocator(d, _sizes, _type); + if( !a || a == tegra::getAllocator() ) + a = tegra::getAllocator(d, _sizes, _type); #endif - if( !allocator ) + if(!a) + a = a0; + try { - size_t totalsize = alignSize(step.p[0]*size.p[0], (int)sizeof(*refcount)); - data = datastart = (uchar*)fastMalloc(totalsize + (int)sizeof(*refcount)); - refcount = (int*)(data + totalsize); - *refcount = 1; + u = a->allocate(dims, size, _type, step.p); + CV_Assert(u != 0); } - else + catch(...) { -#ifdef HAVE_TGPU - try - { - allocator->allocate(dims, size, _type, refcount, datastart, data, step.p); - CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) ); - }catch(...) - { - allocator = 0; - size_t totalSize = alignSize(step.p[0]*size.p[0], (int)sizeof(*refcount)); - data = datastart = (uchar*)fastMalloc(totalSize + (int)sizeof(*refcount)); - refcount = (int*)(data + totalSize); - *refcount = 1; - } -#else - allocator->allocate(dims, size, _type, refcount, datastart, data, step.p); - CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) ); -#endif + if(a != a0) + u = a0->allocate(dims, size, _type, step.p); + CV_Assert(u != 0); } + CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) ); } finalizeHdr(*this); @@ -250,19 +391,13 @@ void Mat::copySize(const Mat& m) void Mat::deallocate() { - if( allocator ) - allocator->deallocate(refcount, datastart, data); - else - { - CV_DbgAssert(refcount != 0); - fastFree(datastart); - } + if(u) + (u->currAllocator ? u->currAllocator : allocator ? allocator : getStdAllocator())->unmap(u); } - Mat::Mat(const Mat& m, const Range& _rowRange, const Range& _colRange) - : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), - datalimit(0), allocator(0), size(&rows) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), + datalimit(0), allocator(0), u(0), size(&rows) { CV_Assert( m.dims >= 2 ); if( m.dims > 2 ) @@ -307,9 +442,9 @@ Mat::Mat(const Mat& m, const Range& _rowRange, const Range& _colRange) Mat::Mat(const Mat& m, const Rect& roi) : flags(m.flags), dims(2), rows(roi.height), cols(roi.width), - data(m.data + roi.y*m.step[0]), refcount(m.refcount), + data(m.data + roi.y*m.step[0]), datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), - allocator(m.allocator), size(&rows) + allocator(m.allocator), u(m.u), size(&rows) { CV_Assert( m.dims <= 2 ); flags &= roi.width < m.cols ? ~CONTINUOUS_FLAG : -1; @@ -319,8 +454,8 @@ Mat::Mat(const Mat& m, const Rect& roi) data += roi.x*esz; CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows ); - if( refcount ) - CV_XADD(refcount, 1); + if( u ) + CV_XADD(&u->refcount, 1); if( roi.width < m.cols || roi.height < m.rows ) flags |= SUBMATRIX_FLAG; @@ -335,8 +470,8 @@ Mat::Mat(const Mat& m, const Rect& roi) Mat::Mat(int _dims, const int* _sizes, int _type, void* _data, const size_t* _steps) - : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), - datalimit(0), allocator(0), size(&rows) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), + datalimit(0), allocator(0), u(0), size(&rows) { flags |= CV_MAT_TYPE(_type); data = datastart = (uchar*)_data; @@ -346,8 +481,8 @@ Mat::Mat(int _dims, const int* _sizes, int _type, void* _data, const size_t* _st Mat::Mat(const Mat& m, const Range* ranges) - : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), - datalimit(0), allocator(0), size(&rows) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), + datalimit(0), allocator(0), u(0), size(&rows) { int i, d = m.dims; @@ -938,20 +1073,10 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to) Input/Output Array \*************************************************************************************************/ -_InputArray::_InputArray() : flags(0), obj(0) {} -_InputArray::_InputArray(const Mat& m) : flags(MAT), obj((void*)&m) {} -_InputArray::_InputArray(const std::vector& vec) : flags(STD_VECTOR_MAT), obj((void*)&vec) {} -_InputArray::_InputArray(const double& val) : flags(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F), obj((void*)&val), sz(Size(1,1)) {} -_InputArray::_InputArray(const MatExpr& expr) : flags(FIXED_TYPE + FIXED_SIZE + EXPR), obj((void*)&expr) {} -_InputArray::_InputArray(const cuda::GpuMat& d_mat) : flags(GPU_MAT), obj((void*)&d_mat) {} -_InputArray::_InputArray(const ogl::Buffer& buf) : flags(OPENGL_BUFFER), obj((void*)&buf) {} -_InputArray::_InputArray(const cuda::CudaMem& cuda_mem) : flags(CUDA_MEM), obj((void*)&cuda_mem) {} - -_InputArray::~_InputArray() {} - Mat _InputArray::getMat(int i) const { int k = kind(); + int accessFlags = flags & ACCESS_MASK; if( k == MAT ) { @@ -961,6 +1086,14 @@ Mat _InputArray::getMat(int i) const return m->row(i); } + if( k == UMAT ) + { + const UMat* m = (const UMat*)obj; + if( i < 0 ) + return m->getMat(accessFlags); + return m->getMat(accessFlags).row(i); + } + if( k == EXPR ) { CV_Assert( i < 0 ); @@ -995,11 +1128,6 @@ Mat _InputArray::getMat(int i) const return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat(); } - if( k == OCL_MAT ) - { - CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); - } - if( k == STD_VECTOR_MAT ) { const std::vector& v = *(const std::vector*)obj; @@ -1008,6 +1136,14 @@ Mat _InputArray::getMat(int i) const return v[i]; } + if( k == STD_VECTOR_UMAT ) + { + const std::vector& v = *(const std::vector*)obj; + CV_Assert( 0 <= i && i < (int)v.size() ); + + return v[i].getMat(accessFlags); + } + if( k == OPENGL_BUFFER ) { CV_Assert( i < 0 ); @@ -1022,8 +1158,7 @@ Mat _InputArray::getMat(int i) const return Mat(); } - CV_Assert( k == CUDA_MEM ); - //if( k == CUDA_MEM ) + if( k == CUDA_MEM ) { CV_Assert( i < 0 ); @@ -1031,12 +1166,49 @@ Mat _InputArray::getMat(int i) const return cuda_mem->createMatHeader(); } + + CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type"); + return Mat(); +} + + +UMat _InputArray::getUMat(int i) const +{ + int k = kind(); + int accessFlags = flags & ACCESS_MASK; + + if( k == UMAT ) + { + const UMat* m = (const UMat*)obj; + if( i < 0 ) + return *m; + return m->row(i); + } + + if( k == STD_VECTOR_UMAT ) + { + const std::vector& v = *(const std::vector*)obj; + CV_Assert( 0 <= i && i < (int)v.size() ); + + return v[i]; + } + + if( k == MAT ) + { + const Mat* m = (const Mat*)obj; + if( i < 0 ) + return m->getUMat(accessFlags); + return m->row(i).getUMat(accessFlags); + } + + return getMat(i).getUMat(accessFlags); } void _InputArray::getMatVector(std::vector& mv) const { int k = kind(); + int accessFlags = flags & ACCESS_MASK; if( k == MAT ) { @@ -1105,19 +1277,29 @@ void _InputArray::getMatVector(std::vector& mv) const return; } - if( k == OCL_MAT ) + if( k == STD_VECTOR_MAT ) { - CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); + const std::vector& v = *(const std::vector*)obj; + size_t i, n = v.size(); + mv.resize(n); + + for( i = 0; i < n; i++ ) + mv[i] = v[i]; + return; } - CV_Assert( k == STD_VECTOR_MAT ); - //if( k == STD_VECTOR_MAT ) + if( k == STD_VECTOR_UMAT ) { - const std::vector& v = *(const std::vector*)obj; - mv.resize(v.size()); - std::copy(v.begin(), v.end(), mv.begin()); + const std::vector& v = *(const std::vector*)obj; + size_t i, n = v.size(); + mv.resize(n); + + for( i = 0; i < n; i++ ) + mv[i] = v[i].getMat(accessFlags); return; } + + CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type"); } cuda::GpuMat _InputArray::getGpuMat() const @@ -1180,6 +1362,12 @@ Size _InputArray::size(int i) const return ((const MatExpr*)obj)->size(); } + if( k == UMAT ) + { + CV_Assert( i < 0 ); + return ((const UMat*)obj)->size(); + } + if( k == MATX ) { CV_Assert( i < 0 ); @@ -1258,6 +1446,12 @@ size_t _InputArray::total(int i) const return ((const Mat*)obj)->total(); } + if( k == UMAT ) + { + CV_Assert( i < 0 ); + return ((const UMat*)obj)->total(); + } + if( k == STD_VECTOR_MAT ) { const std::vector& vv = *(const std::vector*)obj; @@ -1278,6 +1472,9 @@ int _InputArray::type(int i) const if( k == MAT ) return ((const Mat*)obj)->type(); + if( k == UMAT ) + return ((const UMat*)obj)->type(); + if( k == EXPR ) return ((const MatExpr*)obj)->type(); @@ -1290,8 +1487,12 @@ int _InputArray::type(int i) const if( k == STD_VECTOR_MAT ) { const std::vector& vv = *(const std::vector*)obj; + if( vv.empty() ) + { + CV_Assert((flags & FIXED_TYPE) != 0); + return CV_MAT_TYPE(flags); + } CV_Assert( i < (int)vv.size() ); - return vv[i >= 0 ? i : 0].type(); } @@ -1323,6 +1524,9 @@ bool _InputArray::empty() const if( k == MAT ) return ((const Mat*)obj)->empty(); + if( k == UMAT ) + return ((const UMat*)obj)->empty(); + if( k == EXPR ) return false; @@ -1367,21 +1571,6 @@ bool _InputArray::empty() const } -_OutputArray::_OutputArray() {} -_OutputArray::_OutputArray(Mat& m) : _InputArray(m) {} -_OutputArray::_OutputArray(std::vector& vec) : _InputArray(vec) {} -_OutputArray::_OutputArray(cuda::GpuMat& d_mat) : _InputArray(d_mat) {} -_OutputArray::_OutputArray(ogl::Buffer& buf) : _InputArray(buf) {} -_OutputArray::_OutputArray(cuda::CudaMem& cuda_mem) : _InputArray(cuda_mem) {} - -_OutputArray::_OutputArray(const Mat& m) : _InputArray(m) {flags |= FIXED_SIZE|FIXED_TYPE;} -_OutputArray::_OutputArray(const std::vector& vec) : _InputArray(vec) {flags |= FIXED_SIZE;} -_OutputArray::_OutputArray(const cuda::GpuMat& d_mat) : _InputArray(d_mat) {flags |= FIXED_SIZE|FIXED_TYPE;} -_OutputArray::_OutputArray(const ogl::Buffer& buf) : _InputArray(buf) {flags |= FIXED_SIZE|FIXED_TYPE;} -_OutputArray::_OutputArray(const cuda::CudaMem& cuda_mem) : _InputArray(cuda_mem) {flags |= FIXED_SIZE|FIXED_TYPE;} - -_OutputArray::~_OutputArray() {} - bool _OutputArray::fixedSize() const { return (flags & FIXED_SIZE) == FIXED_SIZE; @@ -1402,6 +1591,13 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int ((Mat*)obj)->create(_sz, mtype); return; } + if( k == UMAT && i < 0 && !allowTransposed && fixedDepthMask == 0 ) + { + CV_Assert(!fixedSize() || ((UMat*)obj)->size.operator()() == _sz); + CV_Assert(!fixedType() || ((UMat*)obj)->type() == mtype); + ((UMat*)obj)->create(_sz, mtype); + return; + } if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == _sz); @@ -1437,6 +1633,13 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp ((Mat*)obj)->create(rows, cols, mtype); return; } + if( k == UMAT && i < 0 && !allowTransposed && fixedDepthMask == 0 ) + { + CV_Assert(!fixedSize() || ((UMat*)obj)->size.operator()() == Size(cols, rows)); + CV_Assert(!fixedType() || ((UMat*)obj)->type() == mtype); + ((UMat*)obj)->create(rows, cols, mtype); + return; + } if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(cols, rows)); @@ -1462,7 +1665,8 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp create(2, sizes, mtype, i, allowTransposed, fixedDepthMask); } -void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool allowTransposed, int fixedDepthMask) const +void _OutputArray::create(int dims, const int* sizes, int mtype, int i, + bool allowTransposed, int fixedDepthMask) const { int k = kind(); mtype = CV_MAT_TYPE(mtype); @@ -1501,6 +1705,40 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool all return; } + if( k == UMAT ) + { + CV_Assert( i < 0 ); + UMat& m = *(UMat*)obj; + if( allowTransposed ) + { + if( !m.isContinuous() ) + { + CV_Assert(!fixedType() && !fixedSize()); + m.release(); + } + + if( dims == 2 && m.dims == 2 && !m.empty() && + m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] ) + return; + } + + if(fixedType()) + { + if(CV_MAT_CN(mtype) == m.channels() && ((1 << CV_MAT_TYPE(flags)) & fixedDepthMask) != 0 ) + mtype = m.type(); + else + CV_Assert(CV_MAT_TYPE(mtype) == m.type()); + } + if(fixedSize()) + { + CV_Assert(m.dims == dims); + for(int j = 0; j < dims; ++j) + CV_Assert(m.size[j] == sizes[j]); + } + m.create(dims, sizes, mtype); + return; + } + if( k == MATX ) { CV_Assert( i < 0 ); @@ -1593,19 +1831,13 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool all return; } - if( k == OCL_MAT ) - { - CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); - } - if( k == NONE ) { CV_Error(CV_StsNullPtr, "create() called for the missing output array" ); return; } - CV_Assert( k == STD_VECTOR_MAT ); - //if( k == STD_VECTOR_MAT ) + if( k == STD_VECTOR_MAT ) { std::vector& v = *(std::vector*)obj; @@ -1661,7 +1893,10 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool all } m.create(dims, sizes, mtype); + return; } + + CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type"); } void _OutputArray::release() const @@ -1709,16 +1944,13 @@ void _OutputArray::release() const return; } - if( k == OCL_MAT ) - { - CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); - } - - CV_Assert( k == STD_VECTOR_MAT ); - //if( k == STD_VECTOR_MAT ) + if( k == STD_VECTOR_MAT ) { ((std::vector*)obj)->clear(); + return; } + + CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type"); } void _OutputArray::clear() const @@ -1778,8 +2010,8 @@ cuda::CudaMem& _OutputArray::getCudaMemRef() const return *(cuda::CudaMem*)obj; } -static _OutputArray _none; -OutputArray noArray() { return _none; } +static _InputOutputArray _none; +InputOutputArray noArray() { return _none; } } diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp new file mode 100644 index 0000000000..094a80d974 --- /dev/null +++ b/modules/core/src/ocl.cpp @@ -0,0 +1,2972 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include + +/* + Part of the file is an extract from the standard OpenCL headers from Khronos site. + Below is the original copyright. +*/ + +/******************************************************************************* + * Copyright (c) 2008 - 2012 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#if 0 //defined __APPLE__ +#define HAVE_OPENCL 1 +#else +#undef HAVE_OPENCL +#endif + +#define OPENCV_CL_NOT_IMPLEMENTED -1000 + +#ifdef HAVE_OPENCL + +#if defined __APPLE__ +#include +#else +#include +#endif + +static const bool g_haveOpenCL = true; + +#else + +extern "C" { + +struct _cl_platform_id { int dummy; }; +struct _cl_device_id { int dummy; }; +struct _cl_context { int dummy; }; +struct _cl_command_queue { int dummy; }; +struct _cl_mem { int dummy; }; +struct _cl_program { int dummy; }; +struct _cl_kernel { int dummy; }; +struct _cl_event { int dummy; }; +struct _cl_sampler { int dummy; }; + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef int cl_int; +typedef unsigned cl_uint; +typedef long cl_long; +typedef unsigned long cl_ulong; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +typedef cl_bitfield cl_command_queue_properties; +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +typedef cl_bitfield cl_mem_migration_flags; +typedef cl_uint cl_image_info; +typedef cl_uint cl_buffer_create_type; +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +typedef cl_uint cl_program_binary_type; +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +typedef cl_uint cl_kernel_work_group_info; +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; + + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; + cl_mem buffer; +} cl_image_desc; + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + + +////////////////////////////////////////////////////////// + +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#define CL_INVALID_PROPERTY -64 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 + +/*#define CL_VERSION_1_0 1 +#define CL_VERSION_1_1 1 +#define CL_VERSION_1_2 1*/ + +#define CL_FALSE 0 +#define CL_TRUE 1 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE + +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 + +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B + +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#define CL_FP_SOFT_FLOAT (1 << 6) +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) + +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) + +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#define CL_CONTEXT_NUM_DEVICES 0x1083 +#define CL_CONTEXT_PLATFORM 0x1084 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 + +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +// reserved (1 << 6) +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) + +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE + +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE +#define CL_UNORM_INT24 0x10DF + +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 + +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 + +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A + +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 + +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 + +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) + +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 +#define CL_PROGRAM_BINARY_TYPE 0x1184 +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 + +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 +#define CL_KERNEL_ATTRIBUTES 0x1195 +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 + +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#define CL_EVENT_CONTEXT 0x11D4 + +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 + +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 + +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 + +#define CL_CALLBACK CV_STDCALL + +static volatile bool g_haveOpenCL = false; +static const char* oclFuncToCheck = "clEnqueueReadBufferRect"; + +#if defined(__APPLE__) +#include + +static void* initOpenCLAndLoad(const char* funcname) +{ + static bool initialized = false; + static void* handle = 0; + if (!handle) + { + if(!initialized) + { + handle = dlopen("/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL", RTLD_LAZY); + initialized = true; + g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0; + } + if(!handle) + return 0; + } + + return funcname ? dlsym(handle, funcname) : 0; +} + +#elif defined WIN32 || defined _WIN32 + +#ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?) + #define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx +#endif +#include +#if (_WIN32_WINNT >= 0x0602) + #include +#endif +#undef small +#undef min +#undef max +#undef abs + +static void* initOpenCLAndLoad(const char* funcname) +{ + static bool initialized = false; + static HMODULE handle = 0; + if (!handle) + { + if(!initialized) + { + handle = LoadLibraryA("OpenCL.dll"); + initialized = true; + g_haveOpenCL = handle != 0 && GetProcAddress(handle, oclFuncToCheck) != 0; + } + if(!handle) + return 0; + } + + return funcname ? (void*)GetProcAddress(handle, funcname) : 0; +} + +#elif defined(__linux) + +#include +#include + +static void* initOpenCLAndLoad(const char* funcname) +{ + static bool initialized = false; + static void* handle = 0; + if (!handle) + { + if(!initialized) + { + handle = dlopen("libOpenCL.so", RTLD_LAZY); + if(!handle) + handle = dlopen("libCL.so", RTLD_LAZY); + initialized = true; + g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0; + } + if(!handle) + return 0; + } + + return funcname ? (void*)dlsym(handle, funcname) : 0; +} + +#else + +static void* initOpenCLAndLoad(const char*) +{ + return 0; +} + +#endif + + +#define OCL_FUNC(rettype, funcname, argsdecl, args) \ + typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \ + static rettype funcname argsdecl \ + { \ + static funcname##_t funcname##_p = 0; \ + if( !funcname##_p ) \ + { \ + funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \ + if( !funcname##_p ) \ + return OPENCV_CL_NOT_IMPLEMENTED; \ + } \ + return funcname##_p args; \ + } + + +#define OCL_FUNC_P(rettype, funcname, argsdecl, args) \ + typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \ + static rettype funcname argsdecl \ + { \ + static funcname##_t funcname##_p = 0; \ + if( !funcname##_p ) \ + { \ + funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \ + if( !funcname##_p ) \ + { \ + if( errcode_ret ) \ + *errcode_ret = OPENCV_CL_NOT_IMPLEMENTED; \ + return 0; \ + } \ + } \ + return funcname##_p args; \ + } + +OCL_FUNC(cl_int, clGetPlatformIDs, + (cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms), + (num_entries, platforms, num_platforms)) + +OCL_FUNC(cl_int, clGetPlatformInfo, + (cl_platform_id platform, cl_platform_info param_name, + size_t param_value_size, void * param_value, + size_t * param_value_size_ret), + (platform, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clGetDeviceInfo, + (cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (device, param_name, param_value_size, param_value, param_value_size_ret)) + + +OCL_FUNC(cl_int, clGetDeviceIDs, + (cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices), + (platform, device_type, num_entries, devices, num_devices)) + +OCL_FUNC_P(cl_context, clCreateContext, + (const cl_context_properties * properties, + cl_uint num_devices, + const cl_device_id * devices, + void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *), + void * user_data, + cl_int * errcode_ret), + (properties, num_devices, devices, pfn_notify, user_data, errcode_ret)) + +OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context)) + +/* +OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context)) + +OCL_FUNC_P(cl_context, clCreateContextFromType, + (const cl_context_properties * properties, + cl_device_type device_type, + void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *), + void * user_data, + cl_int * errcode_ret), + (properties, device_type, pfn_notify, user_data, errcode_ret)) + +OCL_FUNC(cl_int, clGetContextInfo, + (cl_context context, + cl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (context, param_name, param_value_size, + param_value, param_value_size_ret)) +*/ +OCL_FUNC_P(cl_command_queue, clCreateCommandQueue, + (cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int * errcode_ret), + (context, device, properties, errcode_ret)) + +OCL_FUNC(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue), (command_queue)) + +OCL_FUNC_P(cl_mem, clCreateBuffer, + (cl_context context, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret), + (context, flags, size, host_ptr, errcode_ret)) + +/* +OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue)) + +OCL_FUNC(cl_int, clGetCommandQueueInfo, + (cl_command_queue command_queue, + cl_command_queue_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (command_queue, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC_P(cl_mem, clCreateSubBuffer, + (cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * errcode_ret), + (buffer, flags, buffer_create_type, buffer_create_info, errcode_ret)) + +OCL_FUNC_P(cl_mem, clCreateImage, + (cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret), + (context, flags, image_format, image_desc, host_ptr, errcode_ret)) + +OCL_FUNC(cl_int, clGetSupportedImageFormats, + (cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_image_format * image_formats, + cl_uint * num_image_formats), + (context, flags, image_type, num_entries, image_formats, num_image_formats)) + +OCL_FUNC(cl_int, clGetMemObjectInfo, + (cl_mem memobj, + cl_mem_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (memobj, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clGetImageInfo, + (cl_mem image, + cl_image_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (image, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clCreateKernelsInProgram, + (cl_program program, + cl_uint num_kernels, + cl_kernel * kernels, + cl_uint * num_kernels_ret), + (program, num_kernels, kernels, num_kernels_ret)) + +OCL_FUNC(cl_int, clRetainKernel, (cl_kernel kernel), (kernel)) + +OCL_FUNC(cl_int, clGetKernelArgInfo, + (cl_kernel kernel, + cl_uint arg_indx, + cl_kernel_arg_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (kernel, arg_indx, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clEnqueueReadImage, + (cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_read, + const size_t * origin[3], + const size_t * region[3], + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, image, blocking_read, origin, region, + row_pitch, slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event)) + +OCL_FUNC(cl_int, clEnqueueWriteImage, + (cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_write, + const size_t * origin[3], + const size_t * region[3], + size_t input_row_pitch, + size_t input_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, image, blocking_write, origin, region, input_row_pitch, + input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueFillImage, + (cl_command_queue command_queue, + cl_mem image, + const void * fill_color, + const size_t * origin[3], + const size_t * region[3], + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, image, fill_color, origin, region, + num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueCopyImage, + (cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t * src_origin[3], + const size_t * dst_origin[3], + const size_t * region[3], + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, src_image, dst_image, src_origin, dst_origin, + region, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer, + (cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t * src_origin[3], + const size_t * region[3], + size_t dst_offset, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, src_image, dst_buffer, src_origin, region, dst_offset, + num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueCopyBufferToImage, + (cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t * dst_origin[3], + const size_t * region[3], + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, src_buffer, dst_image, src_offset, dst_origin, + region, num_events_in_wait_list, event_wait_list, event)) + + +OCL_FUNC_P(void*, clEnqueueMapImage, + (cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_map, + cl_map_flags map_flags, + const size_t * origin[3], + const size_t * region[3], + size_t * image_row_pitch, + size_t * image_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret), + (command_queue, image, blocking_map, map_flags, origin, region, + image_row_pitch, image_slice_pitch, num_events_in_wait_list, + event_wait_list, event, errcode_ret)) + +OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program)) + +OCL_FUNC(cl_int, clGetKernelInfo, + (cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (kernel, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj)) + +*/ + +OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj)) + + +OCL_FUNC_P(cl_program, clCreateProgramWithSource, + (cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret), + (context, count, strings, lengths, errcode_ret)) + +OCL_FUNC_P(cl_program, clCreateProgramWithBinary, + (cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const size_t * lengths, + const unsigned char ** binaries, + cl_int * binary_status, + cl_int * errcode_ret), + (context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret)) + +OCL_FUNC(cl_int, clReleaseProgram, (cl_program program), (program)) + +OCL_FUNC(cl_int, clBuildProgram, + (cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + void (CL_CALLBACK * pfn_notify)(cl_program, void *), + void * user_data), + (program, num_devices, device_list, options, pfn_notify, user_data)) + +OCL_FUNC(cl_int, clGetProgramInfo, + (cl_program program, + cl_program_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (program, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clGetProgramBuildInfo, + (cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (program, device, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC_P(cl_kernel, clCreateKernel, + (cl_program program, + const char * kernel_name, + cl_int * errcode_ret), + (program, kernel_name, errcode_ret)) + +OCL_FUNC(cl_int, clReleaseKernel, (cl_kernel kernel), (kernel)) + +OCL_FUNC(cl_int, clSetKernelArg, + (cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void * arg_value), + (kernel, arg_index, arg_size, arg_value)) + +OCL_FUNC(cl_int, clGetKernelWorkGroupInfo, + (cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret), + (kernel, device, param_name, param_value_size, param_value, param_value_size_ret)) + +OCL_FUNC(cl_int, clFinish, (cl_command_queue command_queue), (command_queue)) + +OCL_FUNC(cl_int, clEnqueueReadBuffer, + (cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t size, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, buffer, blocking_read, offset, size, ptr, + num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueReadBufferRect, + (cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + const size_t * buffer_offset, + const size_t * host_offset, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, buffer, blocking_read, buffer_offset, host_offset, region, buffer_row_pitch, + buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, num_events_in_wait_list, + event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueWriteBuffer, + (cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t size, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, buffer, blocking_write, offset, size, ptr, + num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueWriteBufferRect, + (cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + const size_t * buffer_offset, + const size_t * host_offset, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, buffer, blocking_write, buffer_offset, host_offset, + region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, + host_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event)) + +/*OCL_FUNC(cl_int, clEnqueueFillBuffer, + (cl_command_queue command_queue, + cl_mem buffer, + const void * pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, buffer, pattern, pattern_size, offset, size, + num_events_in_wait_list, event_wait_list, event))*/ + +OCL_FUNC(cl_int, clEnqueueCopyBuffer, + (cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, src_buffer, dst_buffer, src_offset, dst_offset, + size, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueCopyBufferRect, + (cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, src_buffer, dst_buffer, src_origin, dst_origin, + region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch, + num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC_P(void*, clEnqueueMapBuffer, + (cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret), + (command_queue, buffer, blocking_map, map_flags, offset, size, + num_events_in_wait_list, event_wait_list, event, errcode_ret)) + +OCL_FUNC(cl_int, clEnqueueUnmapMemObject, + (cl_command_queue command_queue, + cl_mem memobj, + void * mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueNDRangeKernel, + (cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, kernel, work_dim, global_work_offset, global_work_size, + local_work_size, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clEnqueueTask, + (cl_command_queue command_queue, + cl_kernel kernel, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event), + (command_queue, kernel, num_events_in_wait_list, event_wait_list, event)) + +OCL_FUNC(cl_int, clSetEventCallback, + (cl_event event, + cl_int command_exec_callback_type , + void (CL_CALLBACK *pfn_event_notify) (cl_event event, cl_int event_command_exec_status, void *user_data), + void *user_data), + (event, command_exec_callback_type, pfn_event_notify, user_data)) + +OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event)) + +} + +#endif + +namespace cv { namespace ocl { + +struct UMat2D +{ + UMat2D(const UMat& m, int accessFlags) + { + CV_Assert(m.dims == 2); + data = (cl_mem)m.handle(accessFlags); + offset = m.offset; + step = m.step; + rows = m.rows; + cols = m.cols; + } + cl_mem data; + size_t offset; + size_t step; + int rows; + int cols; +}; + +struct UMat3D +{ + UMat3D(const UMat& m, int accessFlags) + { + CV_Assert(m.dims == 3); + data = (cl_mem)m.handle(accessFlags); + offset = m.offset; + step = m.step.p[1]; + slicestep = m.step.p[0]; + slices = m.size.p[0]; + rows = m.size.p[1]; + cols = m.size.p[2]; + } + cl_mem data; + size_t offset; + size_t slicestep; + size_t step; + int slices; + int rows; + int cols; +}; + +// Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182 +static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 ) +{ + static uint64 table[256]; + static bool initialized = false; + + if( !initialized ) + { + for( int i = 0; i < 256; i++ ) + { + uint64 c = i; + for( int j = 0; j < 8; j++ ) + c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1); + table[i] = c; + } + initialized = true; + } + + uint64 crc = ~crc0; + for( size_t idx = 0; idx < size; idx++ ) + crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8); + + return ~crc; +} + +struct HashKey +{ + typedef uint64 part; + HashKey(part _a, part _b) : a(_a), b(_b) {} + part a, b; +}; + +inline bool operator == (const HashKey& h1, const HashKey& h2) +{ + return h1.a == h2.a && h1.b == h2.b; +} + +inline bool operator < (const HashKey& h1, const HashKey& h2) +{ + return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b); +} + +bool haveOpenCL() +{ + initOpenCLAndLoad(0); + return g_haveOpenCL; +} + +bool useOpenCL() +{ + TLSData* data = TLSData::get(); + if( data->useOpenCL < 0 ) + data->useOpenCL = (int)haveOpenCL(); + return data->useOpenCL > 0; +} + +void setUseOpenCL(bool flag) +{ + if( haveOpenCL() ) + { + TLSData* data = TLSData::get(); + data->useOpenCL = flag ? 1 : 0; + } +} + +void finish() +{ + Queue::getDefault().finish(); +} + +#define IMPLEMENT_REFCOUNTABLE() \ + void addref() { CV_XADD(&refcount, 1); } \ + void release() { if( CV_XADD(&refcount, -1) == 1 ) delete this; } \ + int refcount + +class Platform +{ +public: + Platform(); + ~Platform(); + Platform(const Platform& p); + Platform& operator = (const Platform& p); + + void* ptr() const; + static Platform& getDefault(); +protected: + struct Impl; + Impl* p; +}; + +struct Platform::Impl +{ + Impl() + { + refcount = 1; + handle = 0; + initialized = false; + } + + ~Impl() {} + + void init() + { + if( !initialized ) + { + //cl_uint num_entries + cl_uint n = 0; + if( clGetPlatformIDs(1, &handle, &n) < 0 || n == 0 ) + handle = 0; + if( handle != 0 ) + { + char buf[1000]; + size_t len = 0; + clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len); + buf[len] = '\0'; + vendor = String(buf); + } + + initialized = true; + } + } + + IMPLEMENT_REFCOUNTABLE(); + + cl_platform_id handle; + String vendor; + bool initialized; +}; + +Platform::Platform() +{ + p = 0; +} + +Platform::~Platform() +{ + if(p) + p->release(); +} + +Platform::Platform(const Platform& pl) +{ + p = (Impl*)pl.p; + if(p) + p->addref(); +} + +Platform& Platform::operator = (const Platform& pl) +{ + Impl* newp = (Impl*)pl.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +void* Platform::ptr() const +{ + return p ? p->handle : 0; +} + +Platform& Platform::getDefault() +{ + static Platform p; + if( !p.p ) + { + p.p = new Impl; + p.p->init(); + } + return p; +} + +/////////////////////////////////////////////////////////////////////////////////// + +struct Device::Impl +{ + Impl(void* d) + { + handle = (cl_device_id)d; + } + + template + _TpOut getProp(cl_device_info prop) const + { + _TpCL temp=_TpCL(); + size_t sz = 0; + + return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) >= 0 && + sz == sizeof(temp) ? _TpOut(temp) : _TpOut(); + } + + bool getBoolProp(cl_device_info prop) const + { + cl_bool temp = CL_FALSE; + size_t sz = 0; + + return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) >= 0 && + sz == sizeof(temp) ? temp != 0 : false; + } + + String getStrProp(cl_device_info prop) const + { + char buf[1024]; + size_t sz=0; + return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) >= 0 && + sz < sizeof(buf) ? String(buf) : String(); + } + + IMPLEMENT_REFCOUNTABLE(); + cl_device_id handle; +}; + + +Device::Device() +{ + p = 0; +} + +Device::Device(void* d) +{ + p = 0; + set(d); +} + +Device::Device(const Device& d) +{ + p = d.p; + if(p) + p->addref(); +} + +Device& Device::operator = (const Device& d) +{ + Impl* newp = (Impl*)d.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +Device::~Device() +{ + if(p) + p->release(); +} + +void Device::set(void* d) +{ + if(p) + p->release(); + p = new Impl(d); +} + +void* Device::ptr() const +{ + return p ? p->handle : 0; +} + +String Device::name() const +{ return p ? p->getStrProp(CL_DEVICE_NAME) : String(); } + +String Device::extensions() const +{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); } + +String Device::vendor() const +{ return p ? p->getStrProp(CL_DEVICE_VENDOR) : String(); } + +String Device::OpenCL_C_Version() const +{ return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); } + +String Device::OpenCLVersion() const +{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); } + +String Device::driverVersion() const +{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); } + +int Device::type() const +{ return p ? p->getProp(CL_DEVICE_TYPE) : 0; } + +int Device::addressBits() const +{ return p ? p->getProp(CL_DEVICE_ADDRESS_BITS) : 0; } + +bool Device::available() const +{ return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; } + +bool Device::compilerAvailable() const +{ return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; } + +bool Device::linkerAvailable() const +{ return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; } + +int Device::doubleFPConfig() const +{ return p ? p->getProp(CL_DEVICE_DOUBLE_FP_CONFIG) : 0; } + +int Device::singleFPConfig() const +{ return p ? p->getProp(CL_DEVICE_SINGLE_FP_CONFIG) : 0; } + +int Device::halfFPConfig() const +{ return p ? p->getProp(CL_DEVICE_HALF_FP_CONFIG) : 0; } + +bool Device::endianLittle() const +{ return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; } + +bool Device::errorCorrectionSupport() const +{ return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; } + +int Device::executionCapabilities() const +{ return p ? p->getProp(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; } + +size_t Device::globalMemCacheSize() const +{ return p ? p->getProp(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; } + +int Device::globalMemCacheType() const +{ return p ? p->getProp(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; } + +int Device::globalMemCacheLineSize() const +{ return p ? p->getProp(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; } + +size_t Device::globalMemSize() const +{ return p ? p->getProp(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; } + +size_t Device::localMemSize() const +{ return p ? p->getProp(CL_DEVICE_LOCAL_MEM_SIZE) : 0; } + +int Device::localMemType() const +{ return p ? p->getProp(CL_DEVICE_LOCAL_MEM_TYPE) : 0; } + +bool Device::hostUnifiedMemory() const +{ return p ? p->getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY) : false; } + +bool Device::imageSupport() const +{ return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; } + +size_t Device::image2DMaxWidth() const +{ return p ? p->getProp(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; } + +size_t Device::image2DMaxHeight() const +{ return p ? p->getProp(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; } + +size_t Device::image3DMaxWidth() const +{ return p ? p->getProp(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; } + +size_t Device::image3DMaxHeight() const +{ return p ? p->getProp(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; } + +size_t Device::image3DMaxDepth() const +{ return p ? p->getProp(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; } + +size_t Device::imageMaxBufferSize() const +{ return p ? p->getProp(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; } + +size_t Device::imageMaxArraySize() const +{ return p ? p->getProp(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; } + +int Device::maxClockFrequency() const +{ return p ? p->getProp(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; } + +int Device::maxComputeUnits() const +{ return p ? p->getProp(CL_DEVICE_MAX_COMPUTE_UNITS) : 0; } + +int Device::maxConstantArgs() const +{ return p ? p->getProp(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; } + +size_t Device::maxConstantBufferSize() const +{ return p ? p->getProp(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; } + +size_t Device::maxMemAllocSize() const +{ return p ? p->getProp(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; } + +size_t Device::maxParameterSize() const +{ return p ? p->getProp(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; } + +int Device::maxReadImageArgs() const +{ return p ? p->getProp(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; } + +int Device::maxWriteImageArgs() const +{ return p ? p->getProp(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; } + +int Device::maxSamplers() const +{ return p ? p->getProp(CL_DEVICE_MAX_SAMPLERS) : 0; } + +size_t Device::maxWorkGroupSize() const +{ return p ? p->getProp(CL_DEVICE_MAX_WORK_GROUP_SIZE) : 0; } + +int Device::maxWorkItemDims() const +{ return p ? p->getProp(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; } + +void Device::maxWorkItemSizes(size_t* sizes) const +{ + if(p) + { + const int MAX_DIMS = 32; + size_t retsz = 0; + clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES, + MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz); + } +} + +int Device::memBaseAddrAlign() const +{ return p ? p->getProp(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; } + +int Device::nativeVectorWidthChar() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; } + +int Device::nativeVectorWidthShort() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; } + +int Device::nativeVectorWidthInt() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; } + +int Device::nativeVectorWidthLong() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; } + +int Device::nativeVectorWidthFloat() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; } + +int Device::nativeVectorWidthDouble() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; } + +int Device::nativeVectorWidthHalf() const +{ return p ? p->getProp(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; } + +int Device::preferredVectorWidthChar() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; } + +int Device::preferredVectorWidthShort() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; } + +int Device::preferredVectorWidthInt() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; } + +int Device::preferredVectorWidthLong() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; } + +int Device::preferredVectorWidthFloat() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; } + +int Device::preferredVectorWidthDouble() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; } + +int Device::preferredVectorWidthHalf() const +{ return p ? p->getProp(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; } + +size_t Device::printfBufferSize() const +{ return p ? p->getProp(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; } + +size_t Device::profilingTimerResolution() const +{ return p ? p->getProp(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; } + +const Device& Device::getDefault() +{ + const Context& ctx = Context::getDefault(); + int idx = TLSData::get()->device; + return ctx.device(idx); +} + +///////////////////////////////////////////////////////////////////////////////////////// + +struct Context::Impl +{ + Impl(int dtype0) + { + refcount = 1; + handle = 0; + + cl_int retval = 0; + cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr(); + cl_context_properties prop[] = + { + CL_CONTEXT_PLATFORM, (cl_context_properties)pl, + 0 + }; + + cl_uint i, nd0 = 0, nd = 0; + int dtype = dtype0 & 15; + clGetDeviceIDs( pl, dtype, 0, 0, &nd0 ); + if(retval < 0) + return; + AutoBuffer dlistbuf(nd0*2+1); + cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf; + cl_device_id* dlist_new = dlist + nd0; + clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ); + String name0; + + for(i = 0; i < nd0; i++) + { + Device d(dlist[i]); + if( !d.available() || !d.compilerAvailable() ) + continue; + if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() ) + continue; + if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() ) + continue; + String name = d.name(); + if( nd != 0 && name != name0 ) + continue; + name0 = name; + dlist_new[nd++] = dlist[i]; + } + + if(nd == 0) + return; + + // !!! in the current implementation force the number of devices to 1 !!! + nd = 1; + + handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval); + bool ok = handle != 0 && retval >= 0; + if( ok ) + { + devices.resize(nd); + for( i = 0; i < nd; i++ ) + devices[i].set(dlist_new[i]); + } + } + + ~Impl() + { + if(handle) + clReleaseContext(handle); + devices.clear(); + } + + Program getProg(const ProgramSource& src, + const String& buildflags, String& errmsg) + { + String prefix = Program::getPrefix(buildflags); + HashKey k(src.hash(), crc64((const uchar*)prefix.c_str(), prefix.size())); + phash_t::iterator it = phash.find(k); + if( it != phash.end() ) + return it->second; + //String filename = format("%08x%08x_%08x%08x.clb2", + Program prog(src, buildflags, errmsg); + phash.insert(std::pair(k, prog)); + return prog; + } + + IMPLEMENT_REFCOUNTABLE(); + + cl_context handle; + std::vector devices; + bool initialized; + + typedef ProgramSource::hash_t hash_t; + + struct HashKey + { + HashKey(hash_t _a, hash_t _b) : a(_a), b(_b) {} + bool operator < (const HashKey& k) const { return a < k.a || (a == k.a && b < k.b); } + bool operator == (const HashKey& k) const { return a == k.a && b == k.b; } + bool operator != (const HashKey& k) const { return a != k.a || b != k.b; } + hash_t a, b; + }; + typedef std::map phash_t; + phash_t phash; +}; + + +Context::Context() +{ + p = 0; +} + +Context::Context(int dtype) +{ + p = 0; + create(dtype); +} + +bool Context::create(int dtype0) +{ + if( !haveOpenCL() ) + return false; + if(p) + p->release(); + p = new Impl(dtype0); + if(!p->handle) + { + delete p; + p = 0; + } + return p != 0; +} + +Context::~Context() +{ + p->release(); +} + +Context::Context(const Context& c) +{ + p = (Impl*)c.p; + if(p) + p->addref(); +} + +Context& Context::operator = (const Context& c) +{ + Impl* newp = (Impl*)c.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +void* Context::ptr() const +{ + return p->handle; +} + +size_t Context::ndevices() const +{ + return p ? p->devices.size() : 0; +} + +const Device& Context::device(size_t idx) const +{ + static Device dummy; + return !p || idx >= p->devices.size() ? dummy : p->devices[idx]; +} + +Context& Context::getDefault() +{ + static Context ctx; + if( !ctx.p && haveOpenCL() ) + { + // do not create new Context right away. + // First, try to retrieve existing context of the same type. + // In its turn, Platform::getContext() may call Context::create() + // if there is no such context. + ctx.create(Device::TYPE_ACCELERATOR); + if(!ctx.p) + ctx.create(Device::TYPE_DGPU); + if(!ctx.p) + ctx.create(Device::TYPE_IGPU); + if(!ctx.p) + ctx.create(Device::TYPE_CPU); + } + + return ctx; +} + +Program Context::getProg(const ProgramSource& prog, + const String& buildopts, String& errmsg) +{ + return p ? p->getProg(prog, buildopts, errmsg) : Program(); +} + +struct Queue::Impl +{ + Impl(const Context& c, const Device& d) + { + refcount = 1; + const Context* pc = &c; + cl_context ch = (cl_context)pc->ptr(); + if( !ch ) + { + pc = &Context::getDefault(); + ch = (cl_context)pc->ptr(); + } + cl_device_id dh = (cl_device_id)d.ptr(); + if( !dh ) + dh = (cl_device_id)pc->device(0).ptr(); + cl_int retval = 0; + handle = clCreateCommandQueue(ch, dh, 0, &retval); + } + + ~Impl() + { + if(handle) + { + clFinish(handle); + clReleaseCommandQueue(handle); + } + } + + IMPLEMENT_REFCOUNTABLE(); + + cl_command_queue handle; + bool initialized; +}; + +Queue::Queue() +{ + p = 0; +} + +Queue::Queue(const Context& c, const Device& d) +{ + p = 0; + create(c, d); +} + +Queue::Queue(const Queue& q) +{ + p = q.p; + if(p) + p->addref(); +} + +Queue& Queue::operator = (const Queue& q) +{ + Impl* newp = (Impl*)q.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +Queue::~Queue() +{ + if(p) + p->release(); +} + +bool Queue::create(const Context& c, const Device& d) +{ + if(p) + p->release(); + p = new Impl(c, d); + return p->handle != 0; +} + +void Queue::finish() +{ + if(p && p->handle) + clFinish(p->handle); +} + +void* Queue::ptr() const +{ + return p ? p->handle : 0; +} + +Queue& Queue::getDefault() +{ + Queue& q = TLSData::get()->oclQueue; + if( !q.p ) + q.create(Context::getDefault()); + return q; +} + +static cl_command_queue getQueue(const Queue& q) +{ + cl_command_queue qq = (cl_command_queue)q.ptr(); + if(!qq) + qq = (cl_command_queue)Queue::getDefault().ptr(); + return qq; +} + +KernelArg::KernelArg(int _flags, UMat* _m, void* _obj, size_t _sz) + : flags(_flags), m(_m), obj(_obj), sz(_sz) +{ +} + +KernelArg KernelArg::Constant(const Mat& m) +{ + CV_Assert(m.isContinuous()); + return KernelArg(CONSTANT, 0, m.data, m.total()*m.elemSize()); +} + + +struct Kernel::Impl +{ + Impl(const char* kname, const Program& prog) + { + e = 0; refcount = 1; + cl_program ph = (cl_program)prog.ptr(); + cl_int retval = 0; + handle = ph != 0 ? + clCreateKernel(ph, kname, &retval) : 0; + for( int i = 0; i < MAX_ARRS; i++ ) + u[i] = 0; + } + + void cleanupUMats() + { + for( int i = 0; i < MAX_ARRS; i++ ) + if( u[i] ) + { + if( CV_XADD(&u[i]->urefcount, -1) == 1 ) + u[i]->currAllocator->deallocate(u[i]); + u[i] = 0; + } + nu = 0; + } + + void addUMat(const UMat& m) + { + CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0); + u[nu] = m.u; + CV_XADD(&m.u->urefcount, 1); + nu++; + } + + void finit() + { + cleanupUMats(); + if(e) { clReleaseEvent(e); e = 0; } + release(); + } + + ~Impl() + { + if(handle) + clReleaseKernel(handle); + } + + IMPLEMENT_REFCOUNTABLE(); + + cl_kernel handle; + cl_event e; + enum { MAX_ARRS = 16 }; + UMatData* u[MAX_ARRS]; + int nu; +}; + +}} + +extern "C" +{ +static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p) +{ + ((cv::ocl::Kernel::Impl*)p)->finit(); +} + +} + +namespace cv { namespace ocl { + +Kernel::Kernel() +{ + p = 0; +} + +Kernel::Kernel(const char* kname, const Program& prog) +{ + p = 0; + create(kname, prog); +} + +Kernel::Kernel(const char* kname, const ProgramSource& src, + const String& buildopts, String& errmsg) +{ + p = 0; + create(kname, src, buildopts, errmsg); +} + +Kernel::Kernel(const Kernel& k) +{ + p = k.p; + if(p) + p->addref(); +} + +Kernel& Kernel::operator = (const Kernel& k) +{ + Impl* newp = (Impl*)k.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +Kernel::~Kernel() +{ + if(p) + p->release(); +} + +bool Kernel::create(const char* kname, const Program& prog) +{ + if(p) + p->release(); + p = new Impl(kname, prog); + if(p->handle == 0) + { + p->release(); + p = 0; + } + return p != 0; +} + +bool Kernel::create(const char* kname, const ProgramSource& src, + const String& buildopts, String& errmsg) +{ + if(p) + { + p->release(); + p = 0; + } + const Program& prog = Context::getDefault().getProg(src, buildopts, errmsg); + return create(kname, prog); +} + +void* Kernel::ptr() const +{ + return p ? p->handle : 0; +} + +void Kernel::set(int i, const void* value, size_t sz) +{ + CV_Assert( p && clSetKernelArg(p->handle, (cl_uint)i, sz, value) >= 0 ); + if( i == 0 ) + p->cleanupUMats(); +} + +void Kernel::set(int i, const UMat& m) +{ + set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0)); +} + +void Kernel::set(int i, const KernelArg& arg) +{ + CV_Assert( p && p->handle ); + if( i == 0 ) + p->cleanupUMats(); + if( arg.m ) + { + int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) + + ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0); + if( arg.m->dims <= 2 ) + { + UMat2D u2d(*arg.m, accessFlags); + clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d), &u2d); + } + else + { + UMat3D u3d(*arg.m, accessFlags); + clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d), &u3d); + } + p->addUMat(*arg.m); + } + else + { + clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj); + } +} + + +void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsize[], + bool sync, const Queue& q) +{ + CV_Assert(p && p->handle && p->e == 0); + cl_command_queue qq = getQueue(q); + clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims, + offset, globalsize, localsize, 0, 0, + sync ? 0 : &p->e); + if( sync ) + { + clFinish(qq); + p->cleanupUMats(); + } + else + { + p->addref(); + clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p); + } +} + +void Kernel::runTask(bool sync, const Queue& q) +{ + CV_Assert(p && p->handle && p->e == 0); + cl_command_queue qq = getQueue(q); + clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e); + if( sync ) + { + clFinish(qq); + p->cleanupUMats(); + } + else + { + p->addref(); + clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p); + } +} + + +size_t Kernel::workGroupSize() const +{ + if(!p) + return 0; + size_t val = 0, retsz = 0; + cl_device_id dev = (cl_device_id)Device::getDefault().ptr(); + return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(val), &val, &retsz) >= 0 ? val : 0; +} + +bool Kernel::compileWorkGroupSize(size_t wsz[]) const +{ + if(!p || !wsz) + return 0; + size_t retsz = 0; + cl_device_id dev = (cl_device_id)Device::getDefault().ptr(); + return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, + sizeof(wsz[0]*3), wsz, &retsz) >= 0; +} + +size_t Kernel::localMemSize() const +{ + if(!p) + return 0; + size_t retsz = 0; + cl_ulong val = 0; + cl_device_id dev = (cl_device_id)Device::getDefault().ptr(); + return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(val), &val, &retsz) >= 0 ? (size_t)val : 0; +} + +//////////////////////////////////////////////////////////////////////////////////////// + +struct Program::Impl +{ + Impl(const ProgramSource& _src, + const String& _buildflags, String& errmsg) + { + refcount = 1; + const Context& ctx = Context::getDefault(); + src = _src; + buildflags = _buildflags; + const String& srcstr = src.source(); + const char* srcptr = srcstr.c_str(); + size_t srclen = srcstr.size(); + cl_int retval = 0; + + handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval); + if( handle && retval >= 0 ) + { + int i, n = ctx.ndevices(); + AutoBuffer deviceListBuf(n+1); + void** deviceList = deviceListBuf; + for( i = 0; i < n; i++ ) + deviceList[i] = ctx.device(i).ptr(); + retval = clBuildProgram(handle, n, + (const cl_device_id*)deviceList, + buildflags.c_str(), 0, 0); + if( retval == CL_BUILD_PROGRAM_FAILURE ) + { + char buf[1024]; + size_t retsz = 0; + clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0], CL_PROGRAM_BUILD_LOG, + sizeof(buf)-16, buf, &retsz); + errmsg = String(buf); + } + } + } + + Impl(const String& _buf, const String& _buildflags) + { + refcount = 1; + handle = 0; + buildflags = _buildflags; + if(_buf.empty()) + return; + String prefix0 = Program::getPrefix(buildflags); + const Context& ctx = Context::getDefault(); + const Device& dev = Device::getDefault(); + const char* pos0 = _buf.c_str(); + const char* pos1 = strchr(pos0, '\n'); + if(!pos1) + return; + const char* pos2 = strchr(pos1+1, '\n'); + if(!pos2) + return; + const char* pos3 = strchr(pos2+1, '\n'); + if(!pos3) + return; + size_t prefixlen = (pos3 - pos0)+1; + String prefix(pos0, prefixlen); + if( prefix != prefix0 ) + return; + const uchar* bin = (uchar*)(pos3+1); + void* devid = dev.ptr(); + size_t codelen = _buf.length() - prefixlen; + cl_int binstatus = 0, retval = 0; + handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), 1, (cl_device_id*)&devid, + &codelen, &bin, &binstatus, &retval); + } + + String store() + { + if(!handle) + return String(); + size_t progsz = 0, retsz = 0; + String prefix = Program::getPrefix(buildflags); + size_t prefixlen = prefix.length(); + if(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(progsz), &progsz, &retsz) < 0) + return String(); + AutoBuffer bufbuf(prefixlen + progsz + 16); + uchar* buf = bufbuf; + memcpy(buf, prefix.c_str(), prefixlen); + buf += prefixlen; + if(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(buf), &buf, &retsz) < 0) + return String(); + buf[progsz] = (uchar)'\0'; + return String((const char*)(uchar*)bufbuf, prefixlen + progsz); + } + + ~Impl() + { + if( handle ) + clReleaseProgram(handle); + } + + IMPLEMENT_REFCOUNTABLE(); + + ProgramSource src; + String buildflags; + cl_program handle; +}; + + +Program::Program() { p = 0; } + +Program::Program(const ProgramSource& src, + const String& buildflags, String& errmsg) +{ + p = 0; + create(src, buildflags, errmsg); +} + +Program::Program(const Program& prog) +{ + p = prog.p; + if(p) + p->addref(); +} + +Program& Program::operator = (const Program& prog) +{ + Impl* newp = (Impl*)prog.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +Program::~Program() +{ + if(p) + p->release(); +} + +bool Program::create(const ProgramSource& src, + const String& buildflags, String& errmsg) +{ + if(p) + p->release(); + p = new Impl(src, buildflags, errmsg); + if(!p->handle) + { + p->release(); + p = 0; + } + return p != 0; +} + +const ProgramSource& Program::source() const +{ + static ProgramSource dummy; + return p ? p->src : dummy; +} + +void* Program::ptr() const +{ + return p ? p->handle : 0; +} + +bool Program::read(const String& bin, const String& buildflags) +{ + if(p) + p->release(); + p = new Impl(bin, buildflags); + return p->handle != 0; +} + +bool Program::write(String& bin) const +{ + if(!p) + return false; + bin = p->store(); + return !bin.empty(); +} + +String Program::getPrefix() const +{ + if(!p) + return String(); + return getPrefix(p->buildflags); +} + +String Program::getPrefix(const String& buildflags) +{ + const Context& ctx = Context::getDefault(); + const Device& dev = ctx.device(0); + return format("name=%s\ndriver=%s\nbuildflags=%s\n", + dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str()); +} + +//////////////////////////////////////////////////////////////////////////////////////// + +struct ProgramSource::Impl +{ + Impl(const char* _src) + { + init(String(_src)); + } + Impl(const String& _src) + { + init(_src); + } + void init(const String& _src) + { + refcount = 1; + src = _src; + h = crc64((uchar*)src.c_str(), src.size()); + } + + IMPLEMENT_REFCOUNTABLE(); + String src; + ProgramSource::hash_t h; +}; + + +ProgramSource::ProgramSource() +{ + p = 0; +} + +ProgramSource::ProgramSource(const char* prog) +{ + p = new Impl(prog); +} + +ProgramSource::ProgramSource(const String& prog) +{ + p = new Impl(prog); +} + +ProgramSource::~ProgramSource() +{ + if(p) + p->release(); +} + +ProgramSource::ProgramSource(const ProgramSource& prog) +{ + p = prog.p; + if(p) + p->addref(); +} + +ProgramSource& ProgramSource::operator = (const ProgramSource& prog) +{ + Impl* newp = (Impl*)prog.p; + if(newp) + newp->addref(); + if(p) + p->release(); + p = newp; + return *this; +} + +const String& ProgramSource::source() const +{ + static String dummy; + return p ? p->src : dummy; +} + +ProgramSource::hash_t ProgramSource::hash() const +{ + return p ? p->h : 0; +} + +////////////////////////////////////////////////////////////////////////////////////////////// + +class OpenCLAllocator : public MatAllocator +{ +public: + OpenCLAllocator() {} + + UMatData* defaultAllocate(int dims, const int* sizes, int type, size_t* step) const + { + UMatData* u = Mat::getStdAllocator()->allocate(dims, sizes, type, step); + u->urefcount = 1; + u->refcount = 0; + return u; + } + + void getBestFlags(const Context& ctx, int& createFlags, int& flags0) const + { + const Device& dev = ctx.device(0); + createFlags = CL_MEM_READ_WRITE; + + if( dev.hostUnifiedMemory() ) + flags0 = 0; + else + flags0 = UMatData::COPY_ON_MAP; + } + + UMatData* allocate(int dims, const int* sizes, int type, size_t* step) const + { + if(!useOpenCL()) + return defaultAllocate(dims, sizes, type, step); + size_t total = CV_ELEM_SIZE(type); + for( int i = dims-1; i >= 0; i-- ) + { + if( step ) + step[i] = total; + total *= sizes[i]; + } + + Context& ctx = Context::getDefault(); + int createFlags = 0, flags0 = 0; + getBestFlags(ctx, createFlags, flags0); + + cl_int retval = 0; + void* handle = clCreateBuffer((cl_context)ctx.ptr(), + createFlags, total, 0, &retval); + if( !handle || retval < 0 ) + return defaultAllocate(dims, sizes, type, step); + UMatData* u = new UMatData(this); + u->data = 0; + u->size = total; + u->handle = handle; + u->urefcount = 1; + u->flags = flags0; + + return u; + } + + bool allocate(UMatData* u, int accessFlags) const + { + if(!u) + return false; + + UMatDataAutoLock lock(u); + + if(u->handle == 0) + { + CV_Assert(u->origdata != 0); + Context& ctx = Context::getDefault(); + int createFlags = 0, flags0 = 0; + getBestFlags(ctx, createFlags, flags0); + + cl_context ctx_handle = (cl_context)ctx.ptr(); + cl_int retval = 0; + int tempUMatFlags = UMatData::TEMP_UMAT; + u->handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags, + u->size, u->origdata, &retval); + if((!u->handle || retval < 0) && !(accessFlags & ACCESS_FAST)) + { + u->handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|createFlags, + u->size, u->origdata, &retval); + tempUMatFlags = UMatData::TEMP_COPIED_UMAT; + } + if(!u->handle || retval < 0) + return false; + u->prevAllocator = u->currAllocator; + u->currAllocator = this; + u->flags |= tempUMatFlags; + } + if(accessFlags & ACCESS_WRITE) + u->markHostCopyObsolete(true); + CV_XADD(&u->urefcount, 1); + return true; + } + + void deallocate(UMatData* u) const + { + if(!u) + return; + + // TODO: !!! when we add Shared Virtual Memory Support, + // this function (as well as the others should be corrected) + CV_Assert(u->handle != 0 && u->urefcount == 0); + if(u->tempUMat()) + { + if( u->hostCopyObsolete() && u->refcount > 0 && u->tempCopiedUMat() ) + { + clEnqueueWriteBuffer((cl_command_queue)Queue::getDefault().ptr(), + (cl_mem)u->handle, CL_TRUE, 0, + u->size, u->origdata, 0, 0, 0); + } + u->markHostCopyObsolete(false); + clReleaseMemObject((cl_mem)u->handle); + u->currAllocator = u->prevAllocator; + if(u->data && u->copyOnMap()) + fastFree(u->data); + u->data = u->origdata; + if(u->refcount == 0) + u->currAllocator->deallocate(u); + } + else + { + if(u->data && u->copyOnMap()) + fastFree(u->data); + clReleaseMemObject((cl_mem)u->handle); + delete u; + } + } + + void map(UMatData* u, int accessFlags) const + { + if(!u) + return; + + CV_Assert( u->handle != 0 ); + + UMatDataAutoLock autolock(u); + + if(accessFlags & ACCESS_WRITE) + u->markDeviceCopyObsolete(true); + + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + + if( u->refcount == 0 ) + { + if( !u->copyOnMap() ) + { + CV_Assert(u->data == 0); + // because there can be other map requests for the same UMat with different access flags, + // we use the universal (read-write) access mode. + cl_int retval = 0; + u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, + (CL_MAP_READ | CL_MAP_WRITE), + 0, u->size, 0, 0, 0, &retval); + if(u->data && retval >= 0) + { + u->markHostCopyObsolete(false); + return; + } + + // if map failed, switch to copy-on-map mode for the particular buffer + u->flags |= UMatData::COPY_ON_MAP; + } + + if(!u->data) + { + u->data = (uchar*)fastMalloc(u->size); + u->markHostCopyObsolete(true); + } + } + + if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() ) + { + CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, + u->size, u->data, 0, 0, 0) >= 0 ); + u->markHostCopyObsolete(false); + } + } + + void unmap(UMatData* u) const + { + if(!u) + return; + + CV_Assert(u->handle != 0); + + UMatDataAutoLock autolock(u); + + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + if( !u->copyOnMap() && u->data ) + { + CV_Assert( clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0) >= 0 ); + u->data = 0; + } + else if( u->copyOnMap() && u->deviceCopyObsolete() ) + { + CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, + u->size, u->data, 0, 0, 0) >= 0 ); + } + u->markDeviceCopyObsolete(false); + u->markHostCopyObsolete(false); + } + + bool checkContinuous(int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dstofs[], const size_t dststep[], + size_t& total, size_t new_sz[], + size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[], + size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const + { + bool iscontinuous = true; + srcrawofs = srcofs ? srcofs[dims-1] : 0; + dstrawofs = dstofs ? dstofs[dims-1] : 0; + total = sz[dims-1]; + for( int i = dims-2; i >= 0; i-- ) + { + if( i >= 0 && (total != srcstep[i] || total != dststep[i]) ) + iscontinuous = false; + total *= sz[i]; + if( srcofs ) + srcrawofs += srcofs[i]*srcstep[i]; + if( dstofs ) + dstrawofs += dstofs[i]*dststep[i]; + } + + if( !iscontinuous ) + { + // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order. + if( dims == 2 ) + { + new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1; + // we assume that new_... arrays are initialized by caller + // with 0's, so there is no else branch + if( srcofs ) + { + new_srcofs[0] = srcofs[1]; + new_srcofs[1] = srcofs[0]; + new_srcofs[2] = 0; + } + + if( dstofs ) + { + new_dstofs[0] = dstofs[1]; + new_dstofs[1] = dstofs[0]; + new_dstofs[2] = 0; + } + + new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0; + new_dststep[0] = dststep[0]; new_dststep[1] = 0; + } + else + { + // we could check for dims == 3 here, + // but from user perspective this one is more informative + CV_Assert(dims <= 3); + new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0]; + if( srcofs ) + { + new_srcofs[0] = srcofs[2]; + new_srcofs[1] = srcofs[1]; + new_srcofs[2] = srcofs[0]; + } + + if( dstofs ) + { + new_dstofs[0] = dstofs[2]; + new_dstofs[1] = dstofs[1]; + new_dstofs[2] = dstofs[0]; + } + + new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0]; + new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0]; + } + } + return iscontinuous; + } + + void download(UMatData* u, void* dstptr, int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dststep[]) const + { + if(!u) + return; + UMatDataAutoLock autolock(u); + + if( u->data && !u->hostCopyObsolete() ) + { + Mat::getStdAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep); + return; + } + CV_Assert( u->handle != 0 ); + + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + + size_t total = 0, new_sz[] = {0, 0, 0}; + size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0}; + size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0}; + + bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep, + total, new_sz, + srcrawofs, new_srcofs, new_srcstep, + dstrawofs, new_dstofs, new_dststep); + if( iscontinuous ) + { + CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, + srcrawofs, total, dstptr, 0, 0, 0) >= 0 ); + } + else + { + CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE, + new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1], + new_dststep[0], new_dststep[1], dstptr, 0, 0, 0) >= 0 ); + } + clFinish(q); + } + + void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[], + const size_t dstofs[], const size_t dststep[], + const size_t srcstep[]) const + { + if(!u) + return; + + // there should be no user-visible CPU copies of the UMat which we are going to copy to + CV_Assert(u->refcount == 0); + + size_t total = 0, new_sz[] = {0, 0, 0}; + size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0}; + size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0}; + + bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep, + total, new_sz, + srcrawofs, new_srcofs, new_srcstep, + dstrawofs, new_dstofs, new_dststep); + + UMatDataAutoLock autolock(u); + + // if there is cached CPU copy of the GPU matrix, + // we could use it as a destination. + // we can do it in 2 cases: + // 1. we overwrite the whole content + // 2. we overwrite part of the matrix, but the GPU copy is out-of-date + if( u->data && (u->hostCopyObsolete() <= u->deviceCopyObsolete() || total == u->size)) + { + Mat::getStdAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep); + u->markHostCopyObsolete(false); + u->markDeviceCopyObsolete(true); + return; + } + + CV_Assert( u->handle != 0 ); + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + + if( iscontinuous ) + { + CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle, + CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) >= 0 ); + } + else + { + CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE, + new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1], + new_srcstep[0], new_srcstep[1], srcptr, 0, 0, 0) >= 0 ); + } + + u->markHostCopyObsolete(true); + u->markDeviceCopyObsolete(false); + + clFinish(q); + } + + void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dstofs[], const size_t dststep[], bool sync) const + { + if(!src || !dst) + return; + + size_t total = 0, new_sz[] = {0, 0, 0}; + size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0}; + size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0}; + + bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep, + total, new_sz, + srcrawofs, new_srcofs, new_srcstep, + dstrawofs, new_dstofs, new_dststep); + + UMatDataAutoLock src_autolock(src); + UMatDataAutoLock dst_autolock(dst); + + if( !src->handle || (src->data && src->hostCopyObsolete() <= src->deviceCopyObsolete()) ) + { + upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep); + return; + } + if( !dst->handle || (dst->data && dst->hostCopyObsolete() <= dst->deviceCopyObsolete()) ) + { + download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep); + dst->markHostCopyObsolete(false); + dst->markDeviceCopyObsolete(true); + return; + } + + // there should be no user-visible CPU copies of the UMat which we are going to copy to + CV_Assert(dst->refcount == 0); + cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + + if( iscontinuous ) + { + CV_Assert( clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle, + srcrawofs, dstrawofs, total, 0, 0, 0) >= 0 ); + } + else + { + CV_Assert( clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle, + new_srcofs, new_dstofs, new_sz, + new_srcstep[0], new_srcstep[1], new_dststep[0], new_dststep[1], + 0, 0, 0) >= 0 ); + } + + dst->markHostCopyObsolete(true); + dst->markDeviceCopyObsolete(false); + + if( sync ) + clFinish(q); + } +}; + +MatAllocator* getOpenCLAllocator() +{ + static OpenCLAllocator allocator; + return &allocator; +} + +}} diff --git a/modules/core/src/precomp.hpp b/modules/core/src/precomp.hpp index 5a022af79a..073a54e034 100644 --- a/modules/core/src/precomp.hpp +++ b/modules/core/src/precomp.hpp @@ -50,6 +50,7 @@ #include "opencv2/core/private.hpp" #include "opencv2/core/private.cuda.hpp" +#include "opencv2/core/ocl.hpp" #include #include @@ -105,7 +106,7 @@ extern const uchar g_Saturate8u[]; #if defined WIN32 || defined _WIN32 void deleteThreadAllocData(); -void deleteThreadRNGData(); +void deleteThreadData(); #endif template struct OpAdd @@ -215,6 +216,19 @@ inline bool checkScalar(const Mat& sc, int atype, int sckind, int akind) void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize ); +struct TLSData +{ + TLSData(); + RNG rng; + int device; + ocl::Queue oclQueue; + int useOpenCL; // 1 - use, 0 - do not use, -1 - auto/not initialized + + static TLSData* get(); +}; + +namespace ocl { MatAllocator* getOpenCLAllocator(); } + } #endif /*_CXCORE_INTERNAL_H_*/ diff --git a/modules/core/src/rand.cpp b/modules/core/src/rand.cpp index ffce63484a..75942d4fcd 100644 --- a/modules/core/src/rand.cpp +++ b/modules/core/src/rand.cpp @@ -727,85 +727,11 @@ void RNG::fill( InputOutputArray _mat, int disttype, } } -#ifdef WIN32 - - -#ifdef HAVE_WINRT -// using C++11 thread attribute for local thread data -__declspec( thread ) RNG* rng = NULL; - - void deleteThreadRNGData() - { - if (rng) - delete rng; } -RNG& theRNG() +cv::RNG& cv::theRNG() { - if (!rng) - { - rng = new RNG; - } - return *rng; -} -#else -#ifdef WINCE -# define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF) -#endif -static DWORD tlsRNGKey = TLS_OUT_OF_INDEXES; - - void deleteThreadRNGData() - { - if( tlsRNGKey != TLS_OUT_OF_INDEXES ) - delete (RNG*)TlsGetValue( tlsRNGKey ); -} - -RNG& theRNG() -{ - if( tlsRNGKey == TLS_OUT_OF_INDEXES ) - { - tlsRNGKey = TlsAlloc(); - CV_Assert(tlsRNGKey != TLS_OUT_OF_INDEXES); - } - RNG* rng = (RNG*)TlsGetValue( tlsRNGKey ); - if( !rng ) - { - rng = new RNG; - TlsSetValue( tlsRNGKey, rng ); - } - return *rng; -} -#endif //HAVE_WINRT -#else - -static pthread_key_t tlsRNGKey = 0; -static pthread_once_t tlsRNGKeyOnce = PTHREAD_ONCE_INIT; - -static void deleteRNG(void* data) -{ - delete (RNG*)data; -} - -static void makeRNGKey() -{ - int errcode = pthread_key_create(&tlsRNGKey, deleteRNG); - CV_Assert(errcode == 0); -} - -RNG& theRNG() -{ - pthread_once(&tlsRNGKeyOnce, makeRNGKey); - RNG* rng = (RNG*)pthread_getspecific(tlsRNGKey); - if( !rng ) - { - rng = new RNG; - pthread_setspecific(tlsRNGKey, rng); - } - return *rng; -} - -#endif - + return TLSData::get()->rng; } void cv::randu(InputOutputArray dst, InputArray low, InputArray high) diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 6fb5933ca6..c45ffa0811 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -716,7 +716,7 @@ BOOL WINAPI DllMain( HINSTANCE, DWORD fdwReason, LPVOID ) if( fdwReason == DLL_THREAD_DETACH || fdwReason == DLL_PROCESS_DETACH ) { cv::deleteThreadAllocData(); - cv::deleteThreadRNGData(); + cv::deleteThreadData(); } return TRUE; } @@ -830,4 +830,92 @@ bool Mutex::trylock() { return impl->trylock(); } } +//////////////////////////////// thread-local storage //////////////////////////////// + +namespace cv +{ + +TLSData::TLSData() +{ + device = 0; + useOpenCL = -1; +} + +#ifdef WIN32 + +#ifdef HAVE_WINRT + // using C++11 thread attribute for local thread data + static __declspec( thread ) TLSData* g_tlsdata = NULL; + + static void deleteThreadRNGData() + { + if (g_tlsdata) + delete g_tlsdata; + } + + TLSData* TLSData::get() + { + if (!g_tlsdata) + { + g_tlsdata = new TLSData; + } + return g_tlsdata; + } +#else +#ifdef WINCE +# define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF) +#endif + static DWORD tlsKey = TLS_OUT_OF_INDEXES; + + void deleteThreadData() + { + if( tlsKey != TLS_OUT_OF_INDEXES ) + delete (TLSData*)TlsGetValue( tlsKey ); + } + + TLSData* TLSData::get() + { + if( tlsKey == TLS_OUT_OF_INDEXES ) + { + tlsKey = TlsAlloc(); + CV_Assert(tlsKey != TLS_OUT_OF_INDEXES); + } + TLSData* d = (TLSData*)TlsGetValue( tlsKey ); + if( !d ) + { + d = new TLSData; + TlsSetValue( tlsKey, d ); + } + return d; + } +#endif //HAVE_WINRT +#else + static pthread_key_t tlsKey = 0; + static pthread_once_t tlsKeyOnce = PTHREAD_ONCE_INIT; + + static void deleteTLSData(void* data) + { + delete (TLSData*)data; + } + + static void makeKey() + { + int errcode = pthread_key_create(&tlsKey, deleteTLSData); + CV_Assert(errcode == 0); + } + + TLSData* TLSData::get() + { + pthread_once(&tlsKeyOnce, makeKey); + TLSData* d = (TLSData*)pthread_getspecific(tlsKey); + if( !d ) + { + d = new TLSData; + pthread_setspecific(tlsKey, d); + } + return d; + } +#endif +} + /* End of file. */ diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp new file mode 100644 index 0000000000..2ea71acc8b --- /dev/null +++ b/modules/core/src/umatrix.cpp @@ -0,0 +1,644 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +///////////////////////////////// UMat implementation /////////////////////////////// + +namespace cv { + +// it should be a prime number for the best hash function +enum { UMAT_NLOCKS = 31 }; +static Mutex umatLocks[UMAT_NLOCKS]; + +UMatData::UMatData(const MatAllocator* allocator) +{ + prevAllocator = currAllocator = allocator; + urefcount = refcount = 0; + data = origdata = 0; + size = 0; + flags = 0; + handle = 0; + userdata = 0; +} + +void UMatData::lock() +{ + umatLocks[(size_t)(void*)this % UMAT_NLOCKS].lock(); +} + +void UMatData::unlock() +{ + umatLocks[(size_t)(void*)this % UMAT_NLOCKS].unlock(); +} + + +MatAllocator* UMat::getStdAllocator() +{ + return ocl::getOpenCLAllocator(); +} + +void swap( UMat& a, UMat& b ) +{ + std::swap(a.flags, b.flags); + std::swap(a.dims, b.dims); + std::swap(a.rows, b.rows); + std::swap(a.cols, b.cols); + std::swap(a.allocator, b.allocator); + std::swap(a.u, b.u); + std::swap(a.offset, b.offset); + + std::swap(a.size.p, b.size.p); + std::swap(a.step.p, b.step.p); + std::swap(a.step.buf[0], b.step.buf[0]); + std::swap(a.step.buf[1], b.step.buf[1]); + + if( a.step.p == b.step.buf ) + { + a.step.p = a.step.buf; + a.size.p = &a.rows; + } + + if( b.step.p == a.step.buf ) + { + b.step.p = b.step.buf; + b.size.p = &b.rows; + } +} + + +static inline void setSize( UMat& m, int _dims, const int* _sz, + const size_t* _steps, bool autoSteps=false ) +{ + CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM ); + if( m.dims != _dims ) + { + if( m.step.p != m.step.buf ) + { + fastFree(m.step.p); + m.step.p = m.step.buf; + m.size.p = &m.rows; + } + if( _dims > 2 ) + { + m.step.p = (size_t*)fastMalloc(_dims*sizeof(m.step.p[0]) + (_dims+1)*sizeof(m.size.p[0])); + m.size.p = (int*)(m.step.p + _dims) + 1; + m.size.p[-1] = _dims; + m.rows = m.cols = -1; + } + } + + m.dims = _dims; + if( !_sz ) + return; + + size_t esz = CV_ELEM_SIZE(m.flags), total = esz; + int i; + for( i = _dims-1; i >= 0; i-- ) + { + int s = _sz[i]; + CV_Assert( s >= 0 ); + m.size.p[i] = s; + + if( _steps ) + m.step.p[i] = i < _dims-1 ? _steps[i] : esz; + else if( autoSteps ) + { + m.step.p[i] = total; + int64 total1 = (int64)total*s; + if( (uint64)total1 != (size_t)total1 ) + CV_Error( CV_StsOutOfRange, "The total matrix size does not fit to \"size_t\" type" ); + total = (size_t)total1; + } + } + + if( _dims == 1 ) + { + m.dims = 2; + m.cols = 1; + m.step[1] = esz; + } +} + +static void updateContinuityFlag(UMat& m) +{ + int i, j; + for( i = 0; i < m.dims; i++ ) + { + if( m.size[i] > 1 ) + break; + } + + for( j = m.dims-1; j > i; j-- ) + { + if( m.step[j]*m.size[j] < m.step[j-1] ) + break; + } + + uint64 t = (uint64)m.step[0]*m.size[0]; + if( j <= i && t == (size_t)t ) + m.flags |= UMat::CONTINUOUS_FLAG; + else + m.flags &= ~UMat::CONTINUOUS_FLAG; +} + + +static void finalizeHdr(UMat& m) +{ + updateContinuityFlag(m); + int d = m.dims; + if( d > 2 ) + m.rows = m.cols = -1; +} + + +UMat Mat::getUMat(int accessFlags) const +{ + UMat hdr; + if(!u) + return hdr; + UMat::getStdAllocator()->allocate(u, accessFlags); + setSize(hdr, dims, size.p, step.p); + finalizeHdr(hdr); + hdr.u = u; + hdr.offset = data - datastart; + return hdr; +} + +void UMat::create(int d, const int* _sizes, int _type) +{ + int i; + CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes); + _type = CV_MAT_TYPE(_type); + + if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() ) + { + if( d == 2 && rows == _sizes[0] && cols == _sizes[1] ) + return; + for( i = 0; i < d; i++ ) + if( size[i] != _sizes[i] ) + break; + if( i == d && (d > 1 || size[1] == 1)) + return; + } + + release(); + if( d == 0 ) + return; + flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL; + setSize(*this, d, _sizes, 0, true); + offset = 0; + + if( total() > 0 ) + { + MatAllocator *a = allocator, *a0 = getStdAllocator(); + if(!a) + a = a0; + try + { + u = a->allocate(dims, size, _type, step.p); + CV_Assert(u != 0); + } + catch(...) + { + if(a != a0) + u = a0->allocate(dims, size, _type, step.p); + CV_Assert(u != 0); + } + CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) ); + } + + finalizeHdr(*this); +} + +void UMat::copySize(const UMat& m) +{ + setSize(*this, m.dims, 0, 0); + for( int i = 0; i < dims; i++ ) + { + size[i] = m.size[i]; + step[i] = m.step[i]; + } +} + +void UMat::deallocate() +{ + u->currAllocator->deallocate(u); +} + + +UMat::UMat(const UMat& m, const Range& _rowRange, const Range& _colRange) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows) +{ + CV_Assert( m.dims >= 2 ); + if( m.dims > 2 ) + { + AutoBuffer rs(m.dims); + rs[0] = _rowRange; + rs[1] = _colRange; + for( int i = 2; i < m.dims; i++ ) + rs[i] = Range::all(); + *this = m(rs); + return; + } + + *this = m; + if( _rowRange != Range::all() && _rowRange != Range(0,rows) ) + { + CV_Assert( 0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows ); + rows = _rowRange.size(); + offset += step*_rowRange.start; + flags |= SUBMATRIX_FLAG; + } + + if( _colRange != Range::all() && _colRange != Range(0,cols) ) + { + CV_Assert( 0 <= _colRange.start && _colRange.start <= _colRange.end && _colRange.end <= m.cols ); + cols = _colRange.size(); + offset += _colRange.start*elemSize(); + flags &= cols < m.cols ? ~CONTINUOUS_FLAG : -1; + flags |= SUBMATRIX_FLAG; + } + + if( rows == 1 ) + flags |= CONTINUOUS_FLAG; + + if( rows <= 0 || cols <= 0 ) + { + release(); + rows = cols = 0; + } +} + + +UMat::UMat(const UMat& m, const Rect& roi) + : flags(m.flags), dims(2), rows(roi.height), cols(roi.width), + allocator(m.allocator), u(m.u), offset(m.offset + roi.y*m.step[0]), size(&rows) +{ + CV_Assert( m.dims <= 2 ); + flags &= roi.width < m.cols ? ~CONTINUOUS_FLAG : -1; + flags |= roi.height == 1 ? CONTINUOUS_FLAG : 0; + + size_t esz = CV_ELEM_SIZE(flags); + offset += roi.x*esz; + CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && + 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows ); + if( u ) + CV_XADD(&(u->urefcount), 1); + if( roi.width < m.cols || roi.height < m.rows ) + flags |= SUBMATRIX_FLAG; + + step[0] = m.step[0]; step[1] = esz; + + if( rows <= 0 || cols <= 0 ) + { + release(); + rows = cols = 0; + } +} + + +UMat::UMat(const UMat& m, const Range* ranges) + : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows) +{ + int i, d = m.dims; + + CV_Assert(ranges); + for( i = 0; i < d; i++ ) + { + Range r = ranges[i]; + CV_Assert( r == Range::all() || (0 <= r.start && r.start < r.end && r.end <= m.size[i]) ); + } + *this = m; + for( i = 0; i < d; i++ ) + { + Range r = ranges[i]; + if( r != Range::all() && r != Range(0, size.p[i])) + { + size.p[i] = r.end - r.start; + offset += r.start*step.p[i]; + flags |= SUBMATRIX_FLAG; + } + } + updateContinuityFlag(*this); +} + +UMat UMat::diag(int d) const +{ + CV_Assert( dims <= 2 ); + UMat m = *this; + size_t esz = elemSize(); + int len; + + if( d >= 0 ) + { + len = std::min(cols - d, rows); + m.offset += esz*d; + } + else + { + len = std::min(rows + d, cols); + m.offset -= step[0]*d; + } + CV_DbgAssert( len > 0 ); + + m.size[0] = m.rows = len; + m.size[1] = m.cols = 1; + m.step[0] += (len > 1 ? esz : 0); + + if( m.rows > 1 ) + m.flags &= ~CONTINUOUS_FLAG; + else + m.flags |= CONTINUOUS_FLAG; + + if( size() != Size(1,1) ) + m.flags |= SUBMATRIX_FLAG; + + return m; +} + +void UMat::locateROI( Size& wholeSize, Point& ofs ) const +{ + CV_Assert( dims <= 2 && step[0] > 0 ); + size_t esz = elemSize(), minstep; + ptrdiff_t delta1 = (ptrdiff_t)offset, delta2 = (ptrdiff_t)u->size; + + if( delta1 == 0 ) + ofs.x = ofs.y = 0; + else + { + ofs.y = (int)(delta1/step[0]); + ofs.x = (int)((delta1 - step[0]*ofs.y)/esz); + CV_DbgAssert( offset == (size_t)(ofs.y*step[0] + ofs.x*esz) ); + } + minstep = (ofs.x + cols)*esz; + wholeSize.height = (int)((delta2 - minstep)/step[0] + 1); + wholeSize.height = std::max(wholeSize.height, ofs.y + rows); + wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz); + wholeSize.width = std::max(wholeSize.width, ofs.x + cols); +} + + +UMat& UMat::adjustROI( int dtop, int dbottom, int dleft, int dright ) +{ + CV_Assert( dims <= 2 && step[0] > 0 ); + Size wholeSize; Point ofs; + size_t esz = elemSize(); + locateROI( wholeSize, ofs ); + int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height); + int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width); + offset += (row1 - ofs.y)*step + (col1 - ofs.x)*esz; + rows = row2 - row1; cols = col2 - col1; + size.p[0] = rows; size.p[1] = cols; + if( esz*cols == step[0] || rows == 1 ) + flags |= CONTINUOUS_FLAG; + else + flags &= ~CONTINUOUS_FLAG; + return *this; +} + + +UMat UMat::reshape(int new_cn, int new_rows) const +{ + int cn = channels(); + UMat hdr = *this; + + if( dims > 2 && new_rows == 0 && new_cn != 0 && size[dims-1]*cn % new_cn == 0 ) + { + hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT); + hdr.step[dims-1] = CV_ELEM_SIZE(hdr.flags); + hdr.size[dims-1] = hdr.size[dims-1]*cn / new_cn; + return hdr; + } + + CV_Assert( dims <= 2 ); + + if( new_cn == 0 ) + new_cn = cn; + + int total_width = cols * cn; + + if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 ) + new_rows = rows * total_width / new_cn; + + if( new_rows != 0 && new_rows != rows ) + { + int total_size = total_width * rows; + if( !isContinuous() ) + CV_Error( CV_BadStep, + "The matrix is not continuous, thus its number of rows can not be changed" ); + + if( (unsigned)new_rows > (unsigned)total_size ) + CV_Error( CV_StsOutOfRange, "Bad new number of rows" ); + + total_width = total_size / new_rows; + + if( total_width * new_rows != total_size ) + CV_Error( CV_StsBadArg, "The total number of matrix elements " + "is not divisible by the new number of rows" ); + + hdr.rows = new_rows; + hdr.step[0] = total_width * elemSize1(); + } + + int new_width = total_width / new_cn; + + if( new_width * new_cn != total_width ) + CV_Error( CV_BadNumChannels, + "The total width is not divisible by the new number of channels" ); + + hdr.cols = new_width; + hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT); + hdr.step[1] = CV_ELEM_SIZE(hdr.flags); + return hdr; +} + +UMat UMat::diag(const UMat& d) +{ + CV_Assert( d.cols == 1 || d.rows == 1 ); + int len = d.rows + d.cols - 1; + UMat m(len, len, d.type(), Scalar(0)); + UMat md = m.diag(); + if( d.cols == 1 ) + d.copyTo(md); + else + transpose(d, md); + return m; +} + +int UMat::checkVector(int _elemChannels, int _depth, bool _requireContinuous) const +{ + return (depth() == _depth || _depth <= 0) && + (isContinuous() || !_requireContinuous) && + ((dims == 2 && (((rows == 1 || cols == 1) && channels() == _elemChannels) || + (cols == _elemChannels && channels() == 1))) || + (dims == 3 && channels() == 1 && size.p[2] == _elemChannels && (size.p[0] == 1 || size.p[1] == 1) && + (isContinuous() || step.p[1] == step.p[2]*size.p[2]))) + ? (int)(total()*channels()/_elemChannels) : -1; +} + + +UMat UMat::cross(InputArray) const +{ + CV_Error(CV_StsNotImplemented, ""); + return UMat(); +} + + +UMat UMat::reshape(int _cn, int _newndims, const int* _newsz) const +{ + if(_newndims == dims) + { + if(_newsz == 0) + return reshape(_cn); + if(_newndims == 2) + return reshape(_cn, _newsz[0]); + } + + CV_Error(CV_StsNotImplemented, ""); + // TBD + return UMat(); +} + + +Mat UMat::getMat(int accessFlags) const +{ + if(!u) + return Mat(); + u->currAllocator->map(u, accessFlags); + CV_Assert(u->data != 0); + Mat hdr(dims, size.p, type(), u->data + offset, step.p); + hdr.u = u; + hdr.datastart = hdr.data = u->data; + hdr.datalimit = hdr.dataend = u->data + u->size; + CV_XADD(&hdr.u->refcount, 1); + return hdr; +} + +void* UMat::handle(int /*accessFlags*/) const +{ + if( !u ) + return 0; + + // check flags: if CPU copy is newer, copy it back to GPU. + if( u->deviceCopyObsolete() ) + { + CV_Assert(u->refcount == 0); + u->currAllocator->unmap(u); + } + /*else if( u->refcount > 0 && (accessFlags & ACCESS_WRITE) ) + { + CV_Error(Error::StsError, + "it's not allowed to access UMat handle for writing " + "while it's mapped; call Mat::release() first for all its mappings"); + }*/ + return u->handle; +} + +void UMat::ndoffset(size_t* ofs) const +{ + // offset = step[0]*ofs[0] + step[1]*ofs[1] + step[2]*ofs[2] + ...; + size_t val = offset; + for( int i = 0; i < dims; i++ ) + { + size_t s = step.p[i]; + ofs[i] = val / s; + val -= ofs[i]*s; + } +} + +void UMat::copyTo(OutputArray _dst) const +{ + int dtype = _dst.type(); + if( _dst.fixedType() && dtype != type() ) + { + CV_Assert( channels() == CV_MAT_CN(dtype) ); + convertTo( _dst, dtype ); + return; + } + + if( empty() ) + { + _dst.release(); + return; + } + + size_t i, sz[CV_MAX_DIM], srcofs[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize(); + for( i = 0; i < (size_t)dims; i++ ) + sz[i] = size.p[i]; + sz[dims-1] *= esz; + ndoffset(srcofs); + srcofs[dims-1] *= esz; + + _dst.create( dims, size.p, type() ); + if( _dst.kind() == _InputArray::UMAT ) + { + UMat dst = _dst.getUMat(); + void* srchandle = handle(ACCESS_READ); + void* dsthandle = dst.handle(ACCESS_WRITE); + if( srchandle == dsthandle && dst.offset == offset ) + return; + ndoffset(dstofs); + CV_Assert(u->currAllocator == dst.u->currAllocator); + u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false); + } + else + { + Mat dst = _dst.getMat(); + u->currAllocator->download(u, dst.data, dims, sz, srcofs, step.p, dst.step.p); + } +} + +void UMat::convertTo(OutputArray, int, double, double) const +{ + CV_Error(Error::StsNotImplemented, ""); +} + +UMat& UMat::operator = (const Scalar&) +{ + CV_Error(Error::StsNotImplemented, ""); + return *this; +} + +} + +/* End of file. */ diff --git a/modules/core/test/test_umat.cpp b/modules/core/test/test_umat.cpp new file mode 100644 index 0000000000..56ec72c7a2 --- /dev/null +++ b/modules/core/test/test_umat.cpp @@ -0,0 +1,137 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the OpenCV Foundation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +#include +#include +#include +#include +#include +#include +#include "opencv2/core/ocl.hpp" + +using namespace cv; +using namespace std; + +class CV_UMatTest : public cvtest::BaseTest +{ +public: + CV_UMatTest() {} + ~CV_UMatTest() {} +protected: + void run(int); + + struct test_excep + { + test_excep(const string& _s=string("")) : s(_s) {}; + string s; + }; + + bool TestUMat(); + + void checkDiff(const Mat& m1, const Mat& m2, const string& s) + { + if (norm(m1, m2, NORM_INF) != 0) + throw test_excep(s); + } + void checkDiffF(const Mat& m1, const Mat& m2, const string& s) + { + if (norm(m1, m2, NORM_INF) > 1e-5) + throw test_excep(s); + } +}; + +#define STR(a) STR2(a) +#define STR2(a) #a + +#define CHECK_DIFF(a, b) checkDiff(a, b, "(" #a ") != (" #b ") at l." STR(__LINE__)) +#define CHECK_DIFF_FLT(a, b) checkDiffF(a, b, "(" #a ") !=(eps) (" #b ") at l." STR(__LINE__)) + + +bool CV_UMatTest::TestUMat() +{ + try + { + Mat a(100, 100, CV_16S), b; + randu(a, Scalar::all(-100), Scalar::all(100)); + Rect roi(1, 3, 10, 20); + Mat ra(a, roi), rb; + UMat ua, ura; + a.copyTo(ua); + ua.copyTo(b); + CHECK_DIFF(a, b); + + ura = ua(roi); + ura.copyTo(rb); + + CHECK_DIFF(ra, rb); + + ra += Scalar::all(1.f); + { + Mat temp = ura.getMat(ACCESS_RW); + temp += Scalar::all(1.f); + } + ra.copyTo(rb); + CHECK_DIFF(ra, rb); + } + catch (const test_excep& e) + { + ts->printf(cvtest::TS::LOG, "%s\n", e.s.c_str()); + ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH); + return false; + } + return true; +} + +void CV_UMatTest::run( int /* start_from */) +{ + printf("Use OpenCL: %s\nHave OpenCL: %s\n", + ocl::useOpenCL() ? "TRUE" : "FALSE", + ocl::haveOpenCL() ? "TRUE" : "FALSE" ); + + if (!TestUMat()) + return; + + ts->set_failed_test_info(cvtest::TS::OK); +} + +TEST(Core_UMat, base) { CV_UMatTest test; test.safe_run(); } diff --git a/modules/imgproc/perf/perf_histogram.cpp b/modules/imgproc/perf/perf_histogram.cpp index 1789470c20..8d2a7989ed 100644 --- a/modules/imgproc/perf/perf_histogram.cpp +++ b/modules/imgproc/perf/perf_histogram.cpp @@ -9,7 +9,7 @@ using std::tr1::get; typedef tr1::tuple Size_Source_t; typedef TestBaseWithParam Size_Source; -typedef TestBaseWithParam MatSize; +typedef TestBaseWithParam TestMatSize; static const float rangeHight = 256.0f; static const float rangeLow = 0.0f; @@ -99,6 +99,7 @@ PERF_TEST_P(Size_Source, calcHist3d, SANITY_CHECK(hist); } +#define MatSize TestMatSize PERF_TEST_P(MatSize, equalizeHist, testing::Values(TYPICAL_MAT_SIZES) ) @@ -115,6 +116,7 @@ PERF_TEST_P(MatSize, equalizeHist, SANITY_CHECK(destination); } +#undef MatSize typedef tr1::tuple Sz_ClipLimit_t; typedef TestBaseWithParam Sz_ClipLimit; diff --git a/modules/legacy/src/em.cpp b/modules/legacy/src/em.cpp index b49eb91316..b6ff6c8263 100644 --- a/modules/legacy/src/em.cpp +++ b/modules/legacy/src/em.cpp @@ -102,7 +102,8 @@ float CvEM::predict( const CvMat* _sample, CvMat* _probs ) const { Mat prbs0 = cvarrToMat(_probs), prbs = prbs0, sample = cvarrToMat(_sample); - int cls = static_cast(emObj.predict(sample, _probs ? _OutputArray(prbs) : cv::noArray())[1]); + int cls = static_cast(emObj.predict(sample, _probs ? _OutputArray(prbs) : + (OutputArray)cv::noArray())[1]); if(_probs) { if( prbs.data != prbs0.data ) @@ -208,13 +209,16 @@ bool CvEM::train( const Mat& _samples, const Mat& _sample_idx, bool isOk = false; if( _params.start_step == EM::START_AUTO_STEP ) isOk = emObj.train(_samples, - logLikelihoods, _labels ? _OutputArray(*_labels) : cv::noArray(), probs); + logLikelihoods, _labels ? _OutputArray(*_labels) : + (OutputArray)cv::noArray(), probs); else if( _params.start_step == EM::START_E_STEP ) isOk = emObj.trainE(_samples, means, covshdrs, weights, - logLikelihoods, _labels ? _OutputArray(*_labels) : cv::noArray(), probs); + logLikelihoods, _labels ? _OutputArray(*_labels) : + (OutputArray)cv::noArray(), probs); else if( _params.start_step == EM::START_M_STEP ) isOk = emObj.trainM(_samples, prbs, - logLikelihoods, _labels ? _OutputArray(*_labels) : cv::noArray(), probs); + logLikelihoods, _labels ? _OutputArray(*_labels) : + (OutputArray)cv::noArray(), probs); else CV_Error(CV_StsBadArg, "Bad start type of EM algorithm"); @@ -230,7 +234,9 @@ bool CvEM::train( const Mat& _samples, const Mat& _sample_idx, float CvEM::predict( const Mat& _sample, Mat* _probs ) const { - return static_cast(emObj.predict(_sample, _probs ? _OutputArray(*_probs) : cv::noArray())[1]); + return static_cast(emObj.predict(_sample, _probs ? + _OutputArray(*_probs) : + (OutputArray)cv::noArray())[1]); } int CvEM::getNClusters() const diff --git a/modules/legacy/src/features2d.cpp b/modules/legacy/src/features2d.cpp index f313875ac4..b2fb3b2367 100644 --- a/modules/legacy/src/features2d.cpp +++ b/modules/legacy/src/features2d.cpp @@ -82,7 +82,7 @@ cvExtractSURF( const CvArr* _img, const CvArr* _mask, surf->set("upright", params.upright != 0); surf->set("extended", params.extended != 0); - surf->operator()(img, mask, kpt, _descriptors ? _OutputArray(descr) : noArray(), + surf->operator()(img, mask, kpt, _descriptors ? _OutputArray(descr) : (OutputArray)noArray(), useProvidedKeyPts != 0); if( _keypoints ) diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index f3dc7b56f5..68e42970af 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -154,30 +154,24 @@ void cv::ocl::oclMat::upload(const Mat &m) cv::ocl::oclMat::operator cv::_InputArray() { - _InputArray newInputArray; - newInputArray.flags = cv::_InputArray::OCL_MAT; - newInputArray.obj = reinterpret_cast(this); - return newInputArray; + return _InputArray(cv::_InputArray::OCL_MAT, this); } cv::ocl::oclMat::operator cv::_OutputArray() { - _OutputArray newOutputArray; - newOutputArray.flags = cv::_InputArray::OCL_MAT; - newOutputArray.obj = reinterpret_cast(this); - return newOutputArray; + return _OutputArray(cv::_InputArray::OCL_MAT, this); } cv::ocl::oclMat& cv::ocl::getOclMatRef(InputArray src) { - CV_Assert(src.flags & cv::_InputArray::OCL_MAT); - return *reinterpret_cast(src.obj); + CV_Assert(src.kind() == cv::_InputArray::OCL_MAT); + return *(oclMat*)src.getObj(); } cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src) { - CV_Assert(src.flags & cv::_InputArray::OCL_MAT); - return *reinterpret_cast(src.obj); + CV_Assert(src.kind() == cv::_InputArray::OCL_MAT); + return *(oclMat*)src.getObj(); } void cv::ocl::oclMat::download(cv::Mat &m) const diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index 03328ee601..20b4128a2c 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -175,27 +175,27 @@ static PyObject* failmsgp(const char *fmt, ...) return 0; } -static size_t REFCOUNT_OFFSET = (size_t)&(((PyObject*)0)->ob_refcnt) + - (0x12345678 != *(const size_t*)"\x78\x56\x34\x12\0\0\0\0\0")*sizeof(int); - -static inline PyObject* pyObjectFromRefcount(const int* refcount) -{ - return (PyObject*)((size_t)refcount - REFCOUNT_OFFSET); -} - -static inline int* refcountFromPyObject(const PyObject* obj) -{ - return (int*)((size_t)obj + REFCOUNT_OFFSET); -} - class NumpyAllocator : public MatAllocator { public: - NumpyAllocator() {} + NumpyAllocator() { stdAllocator = Mat::getStdAllocator(); } ~NumpyAllocator() {} - void allocate(int dims, const int* sizes, int type, int*& refcount, - uchar*& datastart, uchar*& data, size_t* step) + UMatData* allocate(PyObject* o, int dims, const int* sizes, int type, size_t* step) const + { + UMatData* u = new UMatData(this); + u->refcount = 1; + u->data = u->origdata = (uchar*)PyArray_DATA((PyArrayObject*) o); + npy_intp* _strides = PyArray_STRIDES((PyArrayObject*) o); + for( int i = 0; i < dims - 1; i++ ) + step[i] = (size_t)_strides[i]; + step[dims-1] = CV_ELEM_SIZE(type); + u->size = sizes[0]*step[0]; + u->userdata = o; + return u; + } + + UMatData* allocate(int dims0, const int* sizes, int type, size_t* step) const { PyEnsureGIL gil; @@ -203,10 +203,10 @@ public: int cn = CV_MAT_CN(type); const int f = (int)(sizeof(size_t)/8); int typenum = depth == CV_8U ? NPY_UBYTE : depth == CV_8S ? NPY_BYTE : - depth == CV_16U ? NPY_USHORT : depth == CV_16S ? NPY_SHORT : - depth == CV_32S ? NPY_INT : depth == CV_32F ? NPY_FLOAT : - depth == CV_64F ? NPY_DOUBLE : f*NPY_ULONGLONG + (f^1)*NPY_UINT; - int i; + depth == CV_16U ? NPY_USHORT : depth == CV_16S ? NPY_SHORT : + depth == CV_32S ? NPY_INT : depth == CV_32F ? NPY_FLOAT : + depth == CV_64F ? NPY_DOUBLE : f*NPY_ULONGLONG + (f^1)*NPY_UINT; + int i, dims = dims0; cv::AutoBuffer _sizes(dims + 1); for( i = 0; i < dims; i++ ) _sizes[i] = sizes[i]; @@ -215,22 +215,58 @@ public: PyObject* o = PyArray_SimpleNew(dims, _sizes, typenum); if(!o) CV_Error_(Error::StsError, ("The numpy array of typenum=%d, ndims=%d can not be created", typenum, dims)); - refcount = refcountFromPyObject(o); - npy_intp* _strides = PyArray_STRIDES((PyArrayObject*) o); - for( i = 0; i < dims - (cn > 1); i++ ) - step[i] = (size_t)_strides[i]; - datastart = data = (uchar*)PyArray_DATA((PyArrayObject*) o); + return allocate(o, dims0, sizes, type, step); } - void deallocate(int* refcount, uchar*, uchar*) + bool allocate(UMatData* u, int accessFlags) const { - PyEnsureGIL gil; - if( !refcount ) - return; - PyObject* o = pyObjectFromRefcount(refcount); - Py_INCREF(o); - Py_DECREF(o); + return stdAllocator->allocate(u, accessFlags); + } + + void deallocate(UMatData* u) const + { + if(u) + { + PyEnsureGIL gil; + PyObject* o = (PyObject*)u->userdata; + Py_DECREF(o); + delete u; + } } + + void map(UMatData*, int) const + { + } + + void unmap(UMatData* u) const + { + if(u->urefcount == 0) + deallocate(u); + } + + void download(UMatData* u, void* dstptr, + int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dststep[]) const + { + stdAllocator->download(u, dstptr, dims, sz, srcofs, srcstep, dststep); + } + + void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[], + const size_t dstofs[], const size_t dststep[], + const size_t srcstep[]) const + { + stdAllocator->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep); + } + + void copy(UMatData* usrc, UMatData* udst, int dims, const size_t sz[], + const size_t srcofs[], const size_t srcstep[], + const size_t dstofs[], const size_t dststep[], bool sync) const + { + stdAllocator->copy(usrc, udst, dims, sz, srcofs, srcstep, dstofs, dststep, sync); + } + + const MatAllocator* stdAllocator; }; NumpyAllocator g_numpyAllocator; @@ -400,16 +436,12 @@ static bool pyopencv_to(PyObject* o, Mat& m, const ArgInfo info) } m = Mat(ndims, size, type, PyArray_DATA(oarr), step); + m.u = g_numpyAllocator.allocate(o, ndims, size, type, step); - if( m.data ) + if( !needcopy ) { - m.refcount = refcountFromPyObject(o); - if (!needcopy) - { - m.addref(); // protect the original numpy array from deallocation - // (since Mat destructor will decrement the reference counter) - } - }; + Py_INCREF(o); + } m.allocator = &g_numpyAllocator; return true; @@ -421,14 +453,15 @@ PyObject* pyopencv_from(const Mat& m) if( !m.data ) Py_RETURN_NONE; Mat temp, *p = (Mat*)&m; - if(!p->refcount || p->allocator != &g_numpyAllocator) + if(!p->u || p->allocator != &g_numpyAllocator) { temp.allocator = &g_numpyAllocator; ERRWRAP2(m.copyTo(temp)); p = &temp; } - p->addref(); - return pyObjectFromRefcount(p->refcount); + PyObject* o = (PyObject*)p->u->userdata; + Py_INCREF(o); + return o; } template<> diff --git a/modules/superres/src/optical_flow.cpp b/modules/superres/src/optical_flow.cpp index e32c5f044d..0389a78fda 100644 --- a/modules/superres/src/optical_flow.cpp +++ b/modules/superres/src/optical_flow.cpp @@ -163,7 +163,9 @@ namespace void Farneback::impl(const Mat& input0, const Mat& input1, OutputArray dst) { - calcOpticalFlowFarneback(input0, input1, dst, pyrScale_, numLevels_, winSize_, numIters_, polyN_, polySigma_, flags_); + calcOpticalFlowFarneback(input0, input1, (InputOutputArray)dst, pyrScale_, + numLevels_, winSize_, numIters_, + polyN_, polySigma_, flags_); } } @@ -325,7 +327,7 @@ namespace alg_->set("iterations", iterations_); alg_->set("useInitialFlow", useInitialFlow_); - alg_->calc(input0, input1, dst); + alg_->calc(input0, input1, (InputOutputArray)dst); } void DualTVL1::collectGarbage() diff --git a/modules/video/src/compat_video.cpp b/modules/video/src/compat_video.cpp index e6dc960303..1773d52955 100644 --- a/modules/video/src/compat_video.cpp +++ b/modules/video/src/compat_video.cpp @@ -352,7 +352,7 @@ cvCalcOpticalFlowPyrLK( const void* arrA, const void* arrB, if( error ) err = cv::Mat(count, 1, CV_32F, (void*)error); cv::calcOpticalFlowPyrLK( A, B, ptA, ptB, st, - error ? cv::_OutputArray(err) : cv::noArray(), + error ? cv::_OutputArray(err) : (cv::_OutputArray)cv::noArray(), winSize, level, criteria, flags); } diff --git a/modules/video/src/optflowgf.cpp b/modules/video/src/optflowgf.cpp index 18dd3544e4..19e96885bd 100644 --- a/modules/video/src/optflowgf.cpp +++ b/modules/video/src/optflowgf.cpp @@ -564,7 +564,7 @@ FarnebackUpdateFlow_GaussianBlur( const Mat& _R0, const Mat& _R1, } void cv::calcOpticalFlowFarneback( InputArray _prev0, InputArray _next0, - OutputArray _flow0, double pyr_scale, int levels, int winsize, + InputOutputArray _flow0, double pyr_scale, int levels, int winsize, int iterations, int poly_n, double poly_sigma, int flags ) { Mat prev0 = _prev0.getMat(), next0 = _next0.getMat();