mirror of https://github.com/opencv/opencv.git
Merge pull request #974 from jet47:gpu-core-refactoring
commit
81c6b46fc6
96 changed files with 5303 additions and 4069 deletions
@ -0,0 +1,691 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_GPU_HPP__ |
||||
#define __OPENCV_CORE_GPU_HPP__ |
||||
|
||||
#ifndef __cplusplus |
||||
# error gpu.hpp header must be compiled as C++ |
||||
#endif |
||||
|
||||
#include "opencv2/core.hpp" |
||||
#include "opencv2/core/gpu_types.hpp" |
||||
|
||||
namespace cv { namespace gpu { |
||||
|
||||
//////////////////////////////// GpuMat ///////////////////////////////
|
||||
|
||||
// Smart pointer for GPU memory with reference counting.
|
||||
// Its interface is mostly similar with cv::Mat.
|
||||
|
||||
class CV_EXPORTS GpuMat |
||||
{ |
||||
public: |
||||
//! default constructor
|
||||
GpuMat(); |
||||
|
||||
//! constructs GpuMat of the specified size and type
|
||||
GpuMat(int rows, int cols, int type); |
||||
GpuMat(Size size, int type); |
||||
|
||||
//! constucts GpuMat and fills it with the specified value _s
|
||||
GpuMat(int rows, int cols, int type, Scalar s); |
||||
GpuMat(Size size, int type, Scalar s); |
||||
|
||||
//! copy constructor
|
||||
GpuMat(const GpuMat& m); |
||||
|
||||
//! constructor for GpuMat headers pointing to user-allocated data
|
||||
GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP); |
||||
GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP); |
||||
|
||||
//! creates a GpuMat header for a part of the bigger matrix
|
||||
GpuMat(const GpuMat& m, Range rowRange, Range colRange); |
||||
GpuMat(const GpuMat& m, Rect roi); |
||||
|
||||
//! builds GpuMat from host memory (Blocking call)
|
||||
explicit GpuMat(InputArray arr); |
||||
|
||||
//! destructor - calls release()
|
||||
~GpuMat(); |
||||
|
||||
//! assignment operators
|
||||
GpuMat& operator =(const GpuMat& m); |
||||
|
||||
//! allocates new GpuMat data unless the GpuMat already has specified size and type
|
||||
void create(int rows, int cols, int type); |
||||
void create(Size size, int type); |
||||
|
||||
//! decreases reference counter, deallocate the data when reference counter reaches 0
|
||||
void release(); |
||||
|
||||
//! swaps with other smart pointer
|
||||
void swap(GpuMat& mat); |
||||
|
||||
//! pefroms upload data to GpuMat (Blocking call)
|
||||
void upload(InputArray arr); |
||||
|
||||
//! pefroms upload data to GpuMat (Non-Blocking call)
|
||||
void upload(InputArray arr, Stream& stream); |
||||
|
||||
//! pefroms download data from device to host memory (Blocking call)
|
||||
void download(OutputArray dst) const; |
||||
|
||||
//! pefroms download data from device to host memory (Non-Blocking call)
|
||||
void download(OutputArray dst, Stream& stream) const; |
||||
|
||||
//! returns deep copy of the GpuMat, i.e. the data is copied
|
||||
GpuMat clone() const; |
||||
|
||||
//! copies the GpuMat content to device memory (Blocking call)
|
||||
void copyTo(OutputArray dst) const; |
||||
|
||||
//! copies the GpuMat content to device memory (Non-Blocking call)
|
||||
void copyTo(OutputArray dst, Stream& stream) const; |
||||
|
||||
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
|
||||
void copyTo(OutputArray dst, InputArray mask) const; |
||||
|
||||
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
|
||||
void copyTo(OutputArray dst, InputArray mask, Stream& stream) const; |
||||
|
||||
//! sets some of the GpuMat elements to s (Blocking call)
|
||||
GpuMat& setTo(Scalar s); |
||||
|
||||
//! sets some of the GpuMat elements to s (Non-Blocking call)
|
||||
GpuMat& setTo(Scalar s, Stream& stream); |
||||
|
||||
//! sets some of the GpuMat elements to s, according to the mask (Blocking call)
|
||||
GpuMat& setTo(Scalar s, InputArray mask); |
||||
|
||||
//! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
|
||||
GpuMat& setTo(Scalar s, InputArray mask, Stream& stream); |
||||
|
||||
//! converts GpuMat to another datatype (Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype) const; |
||||
|
||||
//! converts GpuMat to another datatype (Non-Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype, Stream& stream) const; |
||||
|
||||
//! converts GpuMat to another datatype with scaling (Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const; |
||||
|
||||
//! converts GpuMat to another datatype with scaling (Non-Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const; |
||||
|
||||
//! converts GpuMat to another datatype with scaling (Non-Blocking call)
|
||||
void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const; |
||||
|
||||
void assignTo(GpuMat& m, int type=-1) const; |
||||
|
||||
//! returns pointer to y-th row
|
||||
uchar* ptr(int y = 0); |
||||
const uchar* ptr(int y = 0) const; |
||||
|
||||
//! template version of the above method
|
||||
template<typename _Tp> _Tp* ptr(int y = 0); |
||||
template<typename _Tp> const _Tp* ptr(int y = 0) const; |
||||
|
||||
template <typename _Tp> operator PtrStepSz<_Tp>() const; |
||||
template <typename _Tp> operator PtrStep<_Tp>() const; |
||||
|
||||
//! returns a new GpuMat header for the specified row
|
||||
GpuMat row(int y) const; |
||||
|
||||
//! returns a new GpuMat header for the specified column
|
||||
GpuMat col(int x) const; |
||||
|
||||
//! ... for the specified row span
|
||||
GpuMat rowRange(int startrow, int endrow) const; |
||||
GpuMat rowRange(Range r) const; |
||||
|
||||
//! ... for the specified column span
|
||||
GpuMat colRange(int startcol, int endcol) const; |
||||
GpuMat colRange(Range r) const; |
||||
|
||||
//! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
|
||||
GpuMat operator ()(Range rowRange, Range colRange) const; |
||||
GpuMat operator ()(Rect roi) const; |
||||
|
||||
//! creates alternative GpuMat header for the same data, with different
|
||||
//! number of channels and/or different number of rows
|
||||
GpuMat reshape(int cn, int rows = 0) const; |
||||
|
||||
//! locates GpuMat header within a parent GpuMat
|
||||
void locateROI(Size& wholeSize, Point& ofs) const; |
||||
|
||||
//! moves/resizes the current GpuMat ROI inside the parent GpuMat
|
||||
GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright); |
||||
|
||||
//! returns true iff the GpuMat data is continuous
|
||||
//! (i.e. when there are no gaps between successive rows)
|
||||
bool isContinuous() const; |
||||
|
||||
//! returns element size in bytes
|
||||
size_t elemSize() const; |
||||
|
||||
//! returns the size of element channel in bytes
|
||||
size_t elemSize1() const; |
||||
|
||||
//! returns element type
|
||||
int type() const; |
||||
|
||||
//! returns element type
|
||||
int depth() const; |
||||
|
||||
//! returns number of channels
|
||||
int channels() const; |
||||
|
||||
//! returns step/elemSize1()
|
||||
size_t step1() const; |
||||
|
||||
//! returns GpuMat size : width == number of columns, height == number of rows
|
||||
Size size() const; |
||||
|
||||
//! returns true if GpuMat data is NULL
|
||||
bool empty() const; |
||||
|
||||
/*! includes several bit-fields:
|
||||
- the magic signature |
||||
- continuity flag |
||||
- depth |
||||
- number of channels |
||||
*/ |
||||
int flags; |
||||
|
||||
//! the number of rows and columns
|
||||
int rows, cols; |
||||
|
||||
//! a distance between successive rows in bytes; includes the gap if any
|
||||
size_t step; |
||||
|
||||
//! pointer to the data
|
||||
uchar* data; |
||||
|
||||
//! pointer to the reference counter;
|
||||
//! when GpuMat points to user-allocated data, the pointer is NULL
|
||||
int* refcount; |
||||
|
||||
//! helper fields used in locateROI and adjustROI
|
||||
uchar* datastart; |
||||
uchar* dataend; |
||||
}; |
||||
|
||||
//! creates continuous matrix
|
||||
CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr); |
||||
|
||||
//! ensures that size of the given matrix is not less than (rows, cols) size
|
||||
//! and matrix type is match specified one too
|
||||
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr); |
||||
|
||||
CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat); |
||||
|
||||
//////////////////////////////// CudaMem ////////////////////////////////
|
||||
|
||||
// CudaMem is limited cv::Mat with page locked memory allocation.
|
||||
// Page locked memory is only needed for async and faster coping to GPU.
|
||||
// It is convertable to cv::Mat header without reference counting
|
||||
// so you can use it with other opencv functions.
|
||||
|
||||
class CV_EXPORTS CudaMem |
||||
{ |
||||
public: |
||||
enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 }; |
||||
|
||||
explicit CudaMem(AllocType alloc_type = PAGE_LOCKED); |
||||
|
||||
CudaMem(const CudaMem& m); |
||||
|
||||
CudaMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED); |
||||
CudaMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED); |
||||
|
||||
//! creates from host memory with coping data
|
||||
explicit CudaMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED); |
||||
|
||||
~CudaMem(); |
||||
|
||||
CudaMem& operator =(const CudaMem& m); |
||||
|
||||
//! swaps with other smart pointer
|
||||
void swap(CudaMem& b); |
||||
|
||||
//! returns deep copy of the matrix, i.e. the data is copied
|
||||
CudaMem clone() const; |
||||
|
||||
//! allocates new matrix data unless the matrix already has specified size and type.
|
||||
void create(int rows, int cols, int type); |
||||
void create(Size size, int type); |
||||
|
||||
//! creates alternative CudaMem header for the same data, with different
|
||||
//! number of channels and/or different number of rows
|
||||
CudaMem reshape(int cn, int rows = 0) const; |
||||
|
||||
//! decrements reference counter and released memory if needed.
|
||||
void release(); |
||||
|
||||
//! returns matrix header with disabled reference counting for CudaMem data.
|
||||
Mat createMatHeader() const; |
||||
|
||||
//! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
|
||||
GpuMat createGpuMatHeader() const; |
||||
|
||||
// Please see cv::Mat for descriptions
|
||||
bool isContinuous() const; |
||||
size_t elemSize() const; |
||||
size_t elemSize1() const; |
||||
int type() const; |
||||
int depth() const; |
||||
int channels() const; |
||||
size_t step1() const; |
||||
Size size() const; |
||||
bool empty() const; |
||||
|
||||
// Please see cv::Mat for descriptions
|
||||
int flags; |
||||
int rows, cols; |
||||
size_t step; |
||||
|
||||
uchar* data; |
||||
int* refcount; |
||||
|
||||
uchar* datastart; |
||||
uchar* dataend; |
||||
|
||||
AllocType alloc_type; |
||||
}; |
||||
|
||||
//! page-locks the matrix m memory and maps it for the device(s)
|
||||
CV_EXPORTS void registerPageLocked(Mat& m); |
||||
|
||||
//! unmaps the memory of matrix m, and makes it pageable again
|
||||
CV_EXPORTS void unregisterPageLocked(Mat& m); |
||||
|
||||
///////////////////////////////// Stream //////////////////////////////////
|
||||
|
||||
// Encapculates Cuda Stream. Provides interface for async coping.
|
||||
// Passed to each function that supports async kernel execution.
|
||||
// Reference counting is enabled.
|
||||
|
||||
class CV_EXPORTS Stream |
||||
{ |
||||
typedef void (Stream::*bool_type)() const; |
||||
void this_type_does_not_support_comparisons() const {} |
||||
|
||||
public: |
||||
typedef void (*StreamCallback)(int status, void* userData); |
||||
|
||||
//! creates a new asynchronous stream
|
||||
Stream(); |
||||
|
||||
//! queries an asynchronous stream for completion status
|
||||
bool queryIfComplete() const; |
||||
|
||||
//! waits for stream tasks to complete
|
||||
void waitForCompletion(); |
||||
|
||||
//! makes a compute stream wait on an event
|
||||
void waitEvent(const Event& event); |
||||
|
||||
//! adds a callback to be called on the host after all currently enqueued items in the stream have completed
|
||||
void enqueueHostCallback(StreamCallback callback, void* userData); |
||||
|
||||
//! return Stream object for default CUDA stream
|
||||
static Stream& Null(); |
||||
|
||||
//! returns true if stream object is not default (!= 0)
|
||||
operator bool_type() const; |
||||
|
||||
// obsolete methods
|
||||
|
||||
void enqueueDownload(const GpuMat& src, OutputArray dst); |
||||
|
||||
void enqueueUpload(InputArray src, GpuMat& dst); |
||||
|
||||
void enqueueCopy(const GpuMat& src, OutputArray dst); |
||||
|
||||
void enqueueMemSet(GpuMat& src, Scalar val); |
||||
void enqueueMemSet(GpuMat& src, Scalar val, InputArray mask); |
||||
|
||||
void enqueueConvert(const GpuMat& src, OutputArray dst, int dtype, double alpha = 1.0, double beta = 0.0); |
||||
|
||||
class Impl; |
||||
|
||||
private: |
||||
Ptr<Impl> impl_; |
||||
Stream(const Ptr<Impl>& impl); |
||||
|
||||
friend struct StreamAccessor; |
||||
}; |
||||
|
||||
class CV_EXPORTS Event |
||||
{ |
||||
public: |
||||
enum CreateFlags |
||||
{ |
||||
DEFAULT = 0x00, /**< Default event flag */ |
||||
BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */ |
||||
DISABLE_TIMING = 0x02, /**< Event will not record timing data */ |
||||
INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */ |
||||
}; |
||||
|
||||
explicit Event(CreateFlags flags = DEFAULT); |
||||
|
||||
//! records an event
|
||||
void record(Stream& stream = Stream::Null()); |
||||
|
||||
//! queries an event's status
|
||||
bool queryIfComplete() const; |
||||
|
||||
//! waits for an event to complete
|
||||
void waitForCompletion(); |
||||
|
||||
//! computes the elapsed time between events
|
||||
static float elapsedTime(const Event& start, const Event& end); |
||||
|
||||
class Impl; |
||||
|
||||
private: |
||||
Ptr<Impl> impl_; |
||||
|
||||
friend struct EventAccessor; |
||||
}; |
||||
|
||||
//////////////////////////////// Initialization & Info ////////////////////////
|
||||
|
||||
//! this is the only function that do not throw exceptions if the library is compiled without CUDA
|
||||
CV_EXPORTS int getCudaEnabledDeviceCount(); |
||||
|
||||
//! set device to be used for GPU executions for the calling host thread
|
||||
CV_EXPORTS void setDevice(int device); |
||||
|
||||
//! returns which device is currently being used for the calling host thread
|
||||
CV_EXPORTS int getDevice(); |
||||
|
||||
//! explicitly destroys and cleans up all resources associated with the current device in the current process
|
||||
//! any subsequent API call to this device will reinitialize the device
|
||||
CV_EXPORTS void resetDevice(); |
||||
|
||||
enum FeatureSet |
||||
{ |
||||
FEATURE_SET_COMPUTE_10 = 10, |
||||
FEATURE_SET_COMPUTE_11 = 11, |
||||
FEATURE_SET_COMPUTE_12 = 12, |
||||
FEATURE_SET_COMPUTE_13 = 13, |
||||
FEATURE_SET_COMPUTE_20 = 20, |
||||
FEATURE_SET_COMPUTE_21 = 21, |
||||
FEATURE_SET_COMPUTE_30 = 30, |
||||
FEATURE_SET_COMPUTE_35 = 35, |
||||
|
||||
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11, |
||||
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12, |
||||
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13, |
||||
WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30, |
||||
DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35 |
||||
}; |
||||
|
||||
//! checks whether current device supports the given feature
|
||||
CV_EXPORTS bool deviceSupports(FeatureSet feature_set); |
||||
|
||||
//! information about what GPU archs this OpenCV GPU module was compiled for
|
||||
class CV_EXPORTS TargetArchs |
||||
{ |
||||
public: |
||||
static bool builtWith(FeatureSet feature_set); |
||||
|
||||
static bool has(int major, int minor); |
||||
static bool hasPtx(int major, int minor); |
||||
static bool hasBin(int major, int minor); |
||||
|
||||
static bool hasEqualOrLessPtx(int major, int minor); |
||||
static bool hasEqualOrGreater(int major, int minor); |
||||
static bool hasEqualOrGreaterPtx(int major, int minor); |
||||
static bool hasEqualOrGreaterBin(int major, int minor); |
||||
}; |
||||
|
||||
//! information about the given GPU.
|
||||
class CV_EXPORTS DeviceInfo |
||||
{ |
||||
public: |
||||
//! creates DeviceInfo object for the current GPU
|
||||
DeviceInfo(); |
||||
|
||||
//! creates DeviceInfo object for the given GPU
|
||||
DeviceInfo(int device_id); |
||||
|
||||
//! device number.
|
||||
int deviceID() const; |
||||
|
||||
//! ASCII string identifying device
|
||||
const char* name() const; |
||||
|
||||
//! global memory available on device in bytes
|
||||
size_t totalGlobalMem() const; |
||||
|
||||
//! shared memory available per block in bytes
|
||||
size_t sharedMemPerBlock() const; |
||||
|
||||
//! 32-bit registers available per block
|
||||
int regsPerBlock() const; |
||||
|
||||
//! warp size in threads
|
||||
int warpSize() const; |
||||
|
||||
//! maximum pitch in bytes allowed by memory copies
|
||||
size_t memPitch() const; |
||||
|
||||
//! maximum number of threads per block
|
||||
int maxThreadsPerBlock() const; |
||||
|
||||
//! maximum size of each dimension of a block
|
||||
Vec3i maxThreadsDim() const; |
||||
|
||||
//! maximum size of each dimension of a grid
|
||||
Vec3i maxGridSize() const; |
||||
|
||||
//! clock frequency in kilohertz
|
||||
int clockRate() const; |
||||
|
||||
//! constant memory available on device in bytes
|
||||
size_t totalConstMem() const; |
||||
|
||||
//! major compute capability
|
||||
int major() const; |
||||
|
||||
//! minor compute capability
|
||||
int minor() const; |
||||
|
||||
//! alignment requirement for textures
|
||||
size_t textureAlignment() const; |
||||
|
||||
//! pitch alignment requirement for texture references bound to pitched memory
|
||||
size_t texturePitchAlignment() const; |
||||
|
||||
//! number of multiprocessors on device
|
||||
int multiProcessorCount() const; |
||||
|
||||
//! specified whether there is a run time limit on kernels
|
||||
bool kernelExecTimeoutEnabled() const; |
||||
|
||||
//! device is integrated as opposed to discrete
|
||||
bool integrated() const; |
||||
|
||||
//! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
|
||||
bool canMapHostMemory() const; |
||||
|
||||
enum ComputeMode |
||||
{ |
||||
ComputeModeDefault, /**< default compute mode (Multiple threads can use ::cudaSetDevice() with this device) */ |
||||
ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use ::cudaSetDevice() with this device) */ |
||||
ComputeModeProhibited, /**< compute-prohibited mode (No threads can use ::cudaSetDevice() with this device) */ |
||||
ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use ::cudaSetDevice() with this device) */ |
||||
}; |
||||
|
||||
//! compute mode
|
||||
ComputeMode computeMode() const; |
||||
|
||||
//! maximum 1D texture size
|
||||
int maxTexture1D() const; |
||||
|
||||
//! maximum 1D mipmapped texture size
|
||||
int maxTexture1DMipmap() const; |
||||
|
||||
//! maximum size for 1D textures bound to linear memory
|
||||
int maxTexture1DLinear() const; |
||||
|
||||
//! maximum 2D texture dimensions
|
||||
Vec2i maxTexture2D() const; |
||||
|
||||
//! maximum 2D mipmapped texture dimensions
|
||||
Vec2i maxTexture2DMipmap() const; |
||||
|
||||
//! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
|
||||
Vec3i maxTexture2DLinear() const; |
||||
|
||||
//! maximum 2D texture dimensions if texture gather operations have to be performed
|
||||
Vec2i maxTexture2DGather() const; |
||||
|
||||
//! maximum 3D texture dimensions
|
||||
Vec3i maxTexture3D() const; |
||||
|
||||
//! maximum Cubemap texture dimensions
|
||||
int maxTextureCubemap() const; |
||||
|
||||
//! maximum 1D layered texture dimensions
|
||||
Vec2i maxTexture1DLayered() const; |
||||
|
||||
//! maximum 2D layered texture dimensions
|
||||
Vec3i maxTexture2DLayered() const; |
||||
|
||||
//! maximum Cubemap layered texture dimensions
|
||||
Vec2i maxTextureCubemapLayered() const; |
||||
|
||||
//! maximum 1D surface size
|
||||
int maxSurface1D() const; |
||||
|
||||
//! maximum 2D surface dimensions
|
||||
Vec2i maxSurface2D() const; |
||||
|
||||
//! maximum 3D surface dimensions
|
||||
Vec3i maxSurface3D() const; |
||||
|
||||
//! maximum 1D layered surface dimensions
|
||||
Vec2i maxSurface1DLayered() const; |
||||
|
||||
//! maximum 2D layered surface dimensions
|
||||
Vec3i maxSurface2DLayered() const; |
||||
|
||||
//! maximum Cubemap surface dimensions
|
||||
int maxSurfaceCubemap() const; |
||||
|
||||
//! maximum Cubemap layered surface dimensions
|
||||
Vec2i maxSurfaceCubemapLayered() const; |
||||
|
||||
//! alignment requirements for surfaces
|
||||
size_t surfaceAlignment() const; |
||||
|
||||
//! device can possibly execute multiple kernels concurrently
|
||||
bool concurrentKernels() const; |
||||
|
||||
//! device has ECC support enabled
|
||||
bool ECCEnabled() const; |
||||
|
||||
//! PCI bus ID of the device
|
||||
int pciBusID() const; |
||||
|
||||
//! PCI device ID of the device
|
||||
int pciDeviceID() const; |
||||
|
||||
//! PCI domain ID of the device
|
||||
int pciDomainID() const; |
||||
|
||||
//! true if device is a Tesla device using TCC driver, false otherwise
|
||||
bool tccDriver() const; |
||||
|
||||
//! number of asynchronous engines
|
||||
int asyncEngineCount() const; |
||||
|
||||
//! device shares a unified address space with the host
|
||||
bool unifiedAddressing() const; |
||||
|
||||
//! peak memory clock frequency in kilohertz
|
||||
int memoryClockRate() const; |
||||
|
||||
//! global memory bus width in bits
|
||||
int memoryBusWidth() const; |
||||
|
||||
//! size of L2 cache in bytes
|
||||
int l2CacheSize() const; |
||||
|
||||
//! maximum resident threads per multiprocessor
|
||||
int maxThreadsPerMultiProcessor() const; |
||||
|
||||
//! gets free and total device memory
|
||||
void queryMemory(size_t& totalMemory, size_t& freeMemory) const; |
||||
size_t freeMemory() const; |
||||
size_t totalMemory() const; |
||||
|
||||
//! checks whether device supports the given feature
|
||||
bool supports(FeatureSet feature_set) const; |
||||
|
||||
//! checks whether the GPU module can be run on the given device
|
||||
bool isCompatible() const; |
||||
|
||||
private: |
||||
int device_id_; |
||||
}; |
||||
|
||||
CV_EXPORTS void printCudaDeviceInfo(int device); |
||||
CV_EXPORTS void printShortCudaDeviceInfo(int device); |
||||
|
||||
}} // namespace cv { namespace gpu {
|
||||
|
||||
namespace cv { |
||||
|
||||
template <> CV_EXPORTS void Ptr<cv::gpu::Stream::Impl>::delete_obj(); |
||||
template <> CV_EXPORTS void Ptr<cv::gpu::Event::Impl>::delete_obj(); |
||||
|
||||
} |
||||
|
||||
#include "opencv2/core/gpu.inl.hpp" |
||||
|
||||
#endif /* __OPENCV_CORE_GPU_HPP__ */ |
@ -0,0 +1,641 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CORE_GPUINL_HPP__ |
||||
#define __OPENCV_CORE_GPUINL_HPP__ |
||||
|
||||
#include "opencv2/core/gpu.hpp" |
||||
|
||||
namespace cv { namespace gpu { |
||||
|
||||
//////////////////////////////// GpuMat ///////////////////////////////
|
||||
|
||||
inline |
||||
GpuMat::GpuMat() |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) |
||||
{} |
||||
|
||||
inline |
||||
GpuMat::GpuMat(int rows_, int cols_, int type_) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) |
||||
{ |
||||
if (rows_ > 0 && cols_ > 0) |
||||
create(rows_, cols_, type_); |
||||
} |
||||
|
||||
inline |
||||
GpuMat::GpuMat(Size size_, int type_) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) |
||||
{ |
||||
if (size_.height > 0 && size_.width > 0) |
||||
create(size_.height, size_.width, type_); |
||||
} |
||||
|
||||
inline |
||||
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) |
||||
{ |
||||
if (rows_ > 0 && cols_ > 0) |
||||
{ |
||||
create(rows_, cols_, type_); |
||||
setTo(s_); |
||||
} |
||||
} |
||||
|
||||
inline |
||||
GpuMat::GpuMat(Size size_, int type_, Scalar s_) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) |
||||
{ |
||||
if (size_.height > 0 && size_.width > 0) |
||||
{ |
||||
create(size_.height, size_.width, type_); |
||||
setTo(s_); |
||||
} |
||||
} |
||||
|
||||
inline |
||||
GpuMat::GpuMat(const GpuMat& m) |
||||
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend) |
||||
{ |
||||
if (refcount) |
||||
CV_XADD(refcount, 1); |
||||
} |
||||
|
||||
inline |
||||
GpuMat::GpuMat(InputArray arr) : |
||||
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) |
||||
{ |
||||
upload(arr); |
||||
} |
||||
|
||||
inline |
||||
GpuMat::~GpuMat() |
||||
{ |
||||
release(); |
||||
} |
||||
|
||||
inline |
||||
GpuMat& GpuMat::operator =(const GpuMat& m) |
||||
{ |
||||
if (this != &m) |
||||
{ |
||||
GpuMat temp(m); |
||||
swap(temp); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
inline |
||||
void GpuMat::create(Size size_, int type_) |
||||
{ |
||||
create(size_.height, size_.width, type_); |
||||
} |
||||
|
||||
inline |
||||
void GpuMat::swap(GpuMat& b) |
||||
{ |
||||
std::swap(flags, b.flags); |
||||
std::swap(rows, b.rows); |
||||
std::swap(cols, b.cols); |
||||
std::swap(step, b.step); |
||||
std::swap(data, b.data); |
||||
std::swap(datastart, b.datastart); |
||||
std::swap(dataend, b.dataend); |
||||
std::swap(refcount, b.refcount); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::clone() const |
||||
{ |
||||
GpuMat m; |
||||
copyTo(m); |
||||
return m; |
||||
} |
||||
|
||||
inline |
||||
void GpuMat::copyTo(OutputArray dst, InputArray mask) const |
||||
{ |
||||
copyTo(dst, mask, Stream::Null()); |
||||
} |
||||
|
||||
inline |
||||
GpuMat& GpuMat::setTo(Scalar s) |
||||
{ |
||||
return setTo(s, Stream::Null()); |
||||
} |
||||
|
||||
inline |
||||
GpuMat& GpuMat::setTo(Scalar s, InputArray mask) |
||||
{ |
||||
return setTo(s, mask, Stream::Null()); |
||||
} |
||||
|
||||
inline |
||||
void GpuMat::convertTo(OutputArray dst, int rtype) const |
||||
{ |
||||
convertTo(dst, rtype, Stream::Null()); |
||||
} |
||||
|
||||
inline |
||||
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const |
||||
{ |
||||
convertTo(dst, rtype, alpha, beta, Stream::Null()); |
||||
} |
||||
|
||||
inline |
||||
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const |
||||
{ |
||||
convertTo(dst, rtype, alpha, 0.0, stream); |
||||
} |
||||
|
||||
inline |
||||
void GpuMat::assignTo(GpuMat& m, int _type) const |
||||
{ |
||||
if (_type < 0) |
||||
m = *this; |
||||
else |
||||
convertTo(m, _type); |
||||
} |
||||
|
||||
inline |
||||
uchar* GpuMat::ptr(int y) |
||||
{ |
||||
CV_DbgAssert( (unsigned)y < (unsigned)rows ); |
||||
return data + step * y; |
||||
} |
||||
|
||||
inline |
||||
const uchar* GpuMat::ptr(int y) const |
||||
{ |
||||
CV_DbgAssert( (unsigned)y < (unsigned)rows ); |
||||
return data + step * y; |
||||
} |
||||
|
||||
template<typename _Tp> inline |
||||
_Tp* GpuMat::ptr(int y) |
||||
{ |
||||
return (_Tp*)ptr(y); |
||||
} |
||||
|
||||
template<typename _Tp> inline |
||||
const _Tp* GpuMat::ptr(int y) const |
||||
{ |
||||
return (const _Tp*)ptr(y); |
||||
} |
||||
|
||||
template <class T> inline |
||||
GpuMat::operator PtrStepSz<T>() const |
||||
{ |
||||
return PtrStepSz<T>(rows, cols, (T*)data, step); |
||||
} |
||||
|
||||
template <class T> inline |
||||
GpuMat::operator PtrStep<T>() const |
||||
{ |
||||
return PtrStep<T>((T*)data, step); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::row(int y) const |
||||
{ |
||||
return GpuMat(*this, Range(y, y+1), Range::all()); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::col(int x) const |
||||
{ |
||||
return GpuMat(*this, Range::all(), Range(x, x+1)); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::rowRange(int startrow, int endrow) const |
||||
{ |
||||
return GpuMat(*this, Range(startrow, endrow), Range::all()); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::rowRange(Range r) const |
||||
{ |
||||
return GpuMat(*this, r, Range::all()); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::colRange(int startcol, int endcol) const |
||||
{ |
||||
return GpuMat(*this, Range::all(), Range(startcol, endcol)); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::colRange(Range r) const |
||||
{ |
||||
return GpuMat(*this, Range::all(), r); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const |
||||
{ |
||||
return GpuMat(*this, rowRange_, colRange_); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::operator ()(Rect roi) const |
||||
{ |
||||
return GpuMat(*this, roi); |
||||
} |
||||
|
||||
inline |
||||
bool GpuMat::isContinuous() const |
||||
{ |
||||
return (flags & Mat::CONTINUOUS_FLAG) != 0; |
||||
} |
||||
|
||||
inline |
||||
size_t GpuMat::elemSize() const |
||||
{ |
||||
return CV_ELEM_SIZE(flags); |
||||
} |
||||
|
||||
inline |
||||
size_t GpuMat::elemSize1() const |
||||
{ |
||||
return CV_ELEM_SIZE1(flags); |
||||
} |
||||
|
||||
inline |
||||
int GpuMat::type() const |
||||
{ |
||||
return CV_MAT_TYPE(flags); |
||||
} |
||||
|
||||
inline |
||||
int GpuMat::depth() const |
||||
{ |
||||
return CV_MAT_DEPTH(flags); |
||||
} |
||||
|
||||
inline |
||||
int GpuMat::channels() const |
||||
{ |
||||
return CV_MAT_CN(flags); |
||||
} |
||||
|
||||
inline |
||||
size_t GpuMat::step1() const |
||||
{ |
||||
return step / elemSize1(); |
||||
} |
||||
|
||||
inline |
||||
Size GpuMat::size() const |
||||
{ |
||||
return Size(cols, rows); |
||||
} |
||||
|
||||
inline |
||||
bool GpuMat::empty() const |
||||
{ |
||||
return data == 0; |
||||
} |
||||
|
||||
static inline |
||||
GpuMat createContinuous(int rows, int cols, int type) |
||||
{ |
||||
GpuMat m; |
||||
createContinuous(rows, cols, type, m); |
||||
return m; |
||||
} |
||||
|
||||
static inline |
||||
void createContinuous(Size size, int type, OutputArray arr) |
||||
{ |
||||
createContinuous(size.height, size.width, type, arr); |
||||
} |
||||
|
||||
static inline |
||||
GpuMat createContinuous(Size size, int type) |
||||
{ |
||||
GpuMat m; |
||||
createContinuous(size, type, m); |
||||
return m; |
||||
} |
||||
|
||||
static inline |
||||
void ensureSizeIsEnough(Size size, int type, OutputArray arr) |
||||
{ |
||||
ensureSizeIsEnough(size.height, size.width, type, arr); |
||||
} |
||||
|
||||
static inline |
||||
void swap(GpuMat& a, GpuMat& b) |
||||
{ |
||||
a.swap(b); |
||||
} |
||||
|
||||
//////////////////////////////// CudaMem ////////////////////////////////
|
||||
|
||||
inline |
||||
CudaMem::CudaMem(AllocType alloc_type_) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) |
||||
{ |
||||
} |
||||
|
||||
inline |
||||
CudaMem::CudaMem(const CudaMem& m) |
||||
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type) |
||||
{ |
||||
if( refcount ) |
||||
CV_XADD(refcount, 1); |
||||
} |
||||
|
||||
inline |
||||
CudaMem::CudaMem(int rows_, int cols_, int type_, AllocType alloc_type_) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) |
||||
{ |
||||
if (rows_ > 0 && cols_ > 0) |
||||
create(rows_, cols_, type_); |
||||
} |
||||
|
||||
inline |
||||
CudaMem::CudaMem(Size size_, int type_, AllocType alloc_type_) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) |
||||
{ |
||||
if (size_.height > 0 && size_.width > 0) |
||||
create(size_.height, size_.width, type_); |
||||
} |
||||
|
||||
inline |
||||
CudaMem::CudaMem(InputArray arr, AllocType alloc_type_) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_) |
||||
{ |
||||
arr.getMat().copyTo(*this); |
||||
} |
||||
|
||||
inline |
||||
CudaMem::~CudaMem() |
||||
{ |
||||
release(); |
||||
} |
||||
|
||||
inline |
||||
CudaMem& CudaMem::operator =(const CudaMem& m) |
||||
{ |
||||
if (this != &m) |
||||
{ |
||||
CudaMem temp(m); |
||||
swap(temp); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
inline |
||||
void CudaMem::swap(CudaMem& b) |
||||
{ |
||||
std::swap(flags, b.flags); |
||||
std::swap(rows, b.rows); |
||||
std::swap(cols, b.cols); |
||||
std::swap(step, b.step); |
||||
std::swap(data, b.data); |
||||
std::swap(datastart, b.datastart); |
||||
std::swap(dataend, b.dataend); |
||||
std::swap(refcount, b.refcount); |
||||
std::swap(alloc_type, b.alloc_type); |
||||
} |
||||
|
||||
inline |
||||
CudaMem CudaMem::clone() const |
||||
{ |
||||
CudaMem m(size(), type(), alloc_type); |
||||
createMatHeader().copyTo(m); |
||||
return m; |
||||
} |
||||
|
||||
inline |
||||
void CudaMem::create(Size size_, int type_) |
||||
{ |
||||
create(size_.height, size_.width, type_); |
||||
} |
||||
|
||||
inline |
||||
Mat CudaMem::createMatHeader() const |
||||
{ |
||||
return Mat(size(), type(), data, step); |
||||
} |
||||
|
||||
inline |
||||
bool CudaMem::isContinuous() const |
||||
{ |
||||
return (flags & Mat::CONTINUOUS_FLAG) != 0; |
||||
} |
||||
|
||||
inline |
||||
size_t CudaMem::elemSize() const |
||||
{ |
||||
return CV_ELEM_SIZE(flags); |
||||
} |
||||
|
||||
inline |
||||
size_t CudaMem::elemSize1() const |
||||
{ |
||||
return CV_ELEM_SIZE1(flags); |
||||
} |
||||
|
||||
inline |
||||
int CudaMem::type() const |
||||
{ |
||||
return CV_MAT_TYPE(flags); |
||||
} |
||||
|
||||
inline |
||||
int CudaMem::depth() const |
||||
{ |
||||
return CV_MAT_DEPTH(flags); |
||||
} |
||||
|
||||
inline |
||||
int CudaMem::channels() const |
||||
{ |
||||
return CV_MAT_CN(flags); |
||||
} |
||||
|
||||
inline |
||||
size_t CudaMem::step1() const |
||||
{ |
||||
return step / elemSize1(); |
||||
} |
||||
|
||||
inline |
||||
Size CudaMem::size() const |
||||
{ |
||||
return Size(cols, rows); |
||||
} |
||||
|
||||
inline |
||||
bool CudaMem::empty() const |
||||
{ |
||||
return data == 0; |
||||
} |
||||
|
||||
static inline |
||||
void swap(CudaMem& a, CudaMem& b) |
||||
{ |
||||
a.swap(b); |
||||
} |
||||
|
||||
//////////////////////////////// Stream ///////////////////////////////
|
||||
|
||||
inline |
||||
void Stream::enqueueDownload(const GpuMat& src, OutputArray dst) |
||||
{ |
||||
src.download(dst, *this); |
||||
} |
||||
|
||||
inline |
||||
void Stream::enqueueUpload(InputArray src, GpuMat& dst) |
||||
{ |
||||
dst.upload(src, *this); |
||||
} |
||||
|
||||
inline |
||||
void Stream::enqueueCopy(const GpuMat& src, OutputArray dst) |
||||
{ |
||||
src.copyTo(dst, *this); |
||||
} |
||||
|
||||
inline |
||||
void Stream::enqueueMemSet(GpuMat& src, Scalar val) |
||||
{ |
||||
src.setTo(val, *this); |
||||
} |
||||
|
||||
inline |
||||
void Stream::enqueueMemSet(GpuMat& src, Scalar val, InputArray mask) |
||||
{ |
||||
src.setTo(val, mask, *this); |
||||
} |
||||
|
||||
inline |
||||
void Stream::enqueueConvert(const GpuMat& src, OutputArray dst, int dtype, double alpha, double beta) |
||||
{ |
||||
src.convertTo(dst, dtype, alpha, beta, *this); |
||||
} |
||||
|
||||
inline |
||||
Stream::Stream(const Ptr<Impl>& impl) |
||||
: impl_(impl) |
||||
{ |
||||
} |
||||
|
||||
//////////////////////////////// Initialization & Info ////////////////////////
|
||||
|
||||
inline |
||||
bool TargetArchs::has(int major, int minor) |
||||
{ |
||||
return hasPtx(major, minor) || hasBin(major, minor); |
||||
} |
||||
|
||||
inline |
||||
bool TargetArchs::hasEqualOrGreater(int major, int minor) |
||||
{ |
||||
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor); |
||||
} |
||||
|
||||
inline |
||||
DeviceInfo::DeviceInfo() |
||||
{ |
||||
device_id_ = getDevice(); |
||||
} |
||||
|
||||
inline |
||||
DeviceInfo::DeviceInfo(int device_id) |
||||
{ |
||||
CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() ); |
||||
device_id_ = device_id; |
||||
} |
||||
|
||||
inline |
||||
int DeviceInfo::deviceID() const |
||||
{ |
||||
return device_id_; |
||||
} |
||||
|
||||
inline |
||||
size_t DeviceInfo::freeMemory() const |
||||
{ |
||||
size_t _totalMemory, _freeMemory; |
||||
queryMemory(_totalMemory, _freeMemory); |
||||
return _freeMemory; |
||||
} |
||||
|
||||
inline |
||||
size_t DeviceInfo::totalMemory() const |
||||
{ |
||||
size_t _totalMemory, _freeMemory; |
||||
queryMemory(_totalMemory, _freeMemory); |
||||
return _totalMemory; |
||||
} |
||||
|
||||
inline |
||||
bool DeviceInfo::supports(FeatureSet feature_set) const |
||||
{ |
||||
int version = major() * 10 + minor(); |
||||
return version >= feature_set; |
||||
} |
||||
|
||||
}} // namespace cv { namespace gpu {
|
||||
|
||||
//////////////////////////////// Mat ////////////////////////////////
|
||||
|
||||
namespace cv { |
||||
|
||||
inline |
||||
Mat::Mat(const gpu::GpuMat& m) |
||||
: flags(0), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), datalimit(0), allocator(0), size(&rows) |
||||
{ |
||||
m.download(*this); |
||||
} |
||||
|
||||
} |
||||
|
||||
#endif // __OPENCV_CORE_GPUINL_HPP__
|
@ -1,722 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#ifndef __OPENCV_GPUMAT_HPP__ |
||||
#define __OPENCV_GPUMAT_HPP__ |
||||
|
||||
#include "opencv2/core.hpp" |
||||
#include "opencv2/core/cuda_devptrs.hpp" |
||||
|
||||
namespace cv { namespace gpu |
||||
{ |
||||
//////////////////////////////// CudaMem ////////////////////////////////
|
||||
// CudaMem is limited cv::Mat with page locked memory allocation.
|
||||
// Page locked memory is only needed for async and faster coping to GPU.
|
||||
// It is convertable to cv::Mat header without reference counting
|
||||
// so you can use it with other opencv functions.
|
||||
|
||||
// Page-locks the matrix m memory and maps it for the device(s)
|
||||
CV_EXPORTS void registerPageLocked(Mat& m); |
||||
|
||||
// Unmaps the memory of matrix m, and makes it pageable again.
|
||||
CV_EXPORTS void unregisterPageLocked(Mat& m); |
||||
|
||||
class CV_EXPORTS CudaMem |
||||
{ |
||||
public: |
||||
enum { ALLOC_PAGE_LOCKED = 1, ALLOC_ZEROCOPY = 2, ALLOC_WRITE_COMBINED = 4 }; |
||||
|
||||
CudaMem(); |
||||
CudaMem(const CudaMem& m); |
||||
|
||||
CudaMem(int rows, int cols, int type, int _alloc_type = ALLOC_PAGE_LOCKED); |
||||
CudaMem(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED); |
||||
|
||||
|
||||
//! creates from cv::Mat with coping data
|
||||
explicit CudaMem(const Mat& m, int alloc_type = ALLOC_PAGE_LOCKED); |
||||
|
||||
~CudaMem(); |
||||
|
||||
CudaMem& operator = (const CudaMem& m); |
||||
|
||||
//! returns deep copy of the matrix, i.e. the data is copied
|
||||
CudaMem clone() const; |
||||
|
||||
//! allocates new matrix data unless the matrix already has specified size and type.
|
||||
void create(int rows, int cols, int type, int alloc_type = ALLOC_PAGE_LOCKED); |
||||
void create(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED); |
||||
|
||||
//! decrements reference counter and released memory if needed.
|
||||
void release(); |
||||
|
||||
//! returns matrix header with disabled reference counting for CudaMem data.
|
||||
Mat createMatHeader() const; |
||||
operator Mat() const; |
||||
|
||||
//! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
|
||||
GpuMat createGpuMatHeader() const; |
||||
operator GpuMat() const; |
||||
|
||||
//returns if host memory can be mapperd to gpu address space;
|
||||
static bool canMapHostMemory(); |
||||
|
||||
// Please see cv::Mat for descriptions
|
||||
bool isContinuous() const; |
||||
size_t elemSize() const; |
||||
size_t elemSize1() const; |
||||
int type() const; |
||||
int depth() const; |
||||
int channels() const; |
||||
size_t step1() const; |
||||
Size size() const; |
||||
bool empty() const; |
||||
|
||||
|
||||
// Please see cv::Mat for descriptions
|
||||
int flags; |
||||
int rows, cols; |
||||
size_t step; |
||||
|
||||
uchar* data; |
||||
int* refcount; |
||||
|
||||
uchar* datastart; |
||||
uchar* dataend; |
||||
|
||||
int alloc_type; |
||||
}; |
||||
|
||||
|
||||
//////////////////////////////// CudaStream ////////////////////////////////
|
||||
// Encapculates Cuda Stream. Provides interface for async coping.
|
||||
// Passed to each function that supports async kernel execution.
|
||||
// Reference counting is enabled
|
||||
|
||||
class CV_EXPORTS Stream |
||||
{ |
||||
public: |
||||
Stream(); |
||||
~Stream(); |
||||
|
||||
Stream(const Stream&); |
||||
Stream& operator =(const Stream&); |
||||
|
||||
bool queryIfComplete(); |
||||
void waitForCompletion(); |
||||
|
||||
//! downloads asynchronously
|
||||
// Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its subMat)
|
||||
void enqueueDownload(const GpuMat& src, CudaMem& dst); |
||||
void enqueueDownload(const GpuMat& src, Mat& dst); |
||||
|
||||
//! uploads asynchronously
|
||||
// Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its ROI)
|
||||
void enqueueUpload(const CudaMem& src, GpuMat& dst); |
||||
void enqueueUpload(const Mat& src, GpuMat& dst); |
||||
|
||||
//! copy asynchronously
|
||||
void enqueueCopy(const GpuMat& src, GpuMat& dst); |
||||
|
||||
//! memory set asynchronously
|
||||
void enqueueMemSet(GpuMat& src, Scalar val); |
||||
void enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask); |
||||
|
||||
//! converts matrix type, ex from float to uchar depending on type
|
||||
void enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double a = 1, double b = 0); |
||||
|
||||
//! adds a callback to be called on the host after all currently enqueued items in the stream have completed
|
||||
typedef void (*StreamCallback)(Stream& stream, int status, void* userData); |
||||
void enqueueHostCallback(StreamCallback callback, void* userData); |
||||
|
||||
static Stream& Null(); |
||||
|
||||
operator bool() const; |
||||
|
||||
private: |
||||
struct Impl; |
||||
|
||||
explicit Stream(Impl* impl); |
||||
void create(); |
||||
void release(); |
||||
|
||||
Impl *impl; |
||||
|
||||
friend struct StreamAccessor; |
||||
}; |
||||
|
||||
//////////////////////////////// Initialization & Info ////////////////////////
|
||||
|
||||
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
|
||||
CV_EXPORTS int getCudaEnabledDeviceCount(); |
||||
|
||||
//! Functions below throw cv::Expception if the library is compiled without Cuda.
|
||||
|
||||
CV_EXPORTS void setDevice(int device); |
||||
|
||||
CV_EXPORTS int getDevice(); |
||||
|
||||
//! Explicitly destroys and cleans up all resources associated with the current device in the current process.
|
||||
//! Any subsequent API call to this device will reinitialize the device.
|
||||
CV_EXPORTS void resetDevice(); |
||||
|
||||
enum FeatureSet |
||||
{ |
||||
FEATURE_SET_COMPUTE_10 = 10, |
||||
FEATURE_SET_COMPUTE_11 = 11, |
||||
FEATURE_SET_COMPUTE_12 = 12, |
||||
FEATURE_SET_COMPUTE_13 = 13, |
||||
FEATURE_SET_COMPUTE_20 = 20, |
||||
FEATURE_SET_COMPUTE_21 = 21, |
||||
FEATURE_SET_COMPUTE_30 = 30, |
||||
FEATURE_SET_COMPUTE_35 = 35, |
||||
|
||||
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11, |
||||
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12, |
||||
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13, |
||||
WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30, |
||||
DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35 |
||||
}; |
||||
|
||||
// Checks whether current device supports the given feature
|
||||
CV_EXPORTS bool deviceSupports(FeatureSet feature_set); |
||||
|
||||
// Gives information about what GPU archs this OpenCV GPU module was
|
||||
// compiled for
|
||||
class CV_EXPORTS TargetArchs |
||||
{ |
||||
public: |
||||
static bool builtWith(FeatureSet feature_set); |
||||
static bool has(int major, int minor); |
||||
static bool hasPtx(int major, int minor); |
||||
static bool hasBin(int major, int minor); |
||||
static bool hasEqualOrLessPtx(int major, int minor); |
||||
static bool hasEqualOrGreater(int major, int minor); |
||||
static bool hasEqualOrGreaterPtx(int major, int minor); |
||||
static bool hasEqualOrGreaterBin(int major, int minor); |
||||
private: |
||||
TargetArchs(); |
||||
}; |
||||
|
||||
// Gives information about the given GPU
|
||||
class CV_EXPORTS DeviceInfo |
||||
{ |
||||
public: |
||||
// Creates DeviceInfo object for the current GPU
|
||||
DeviceInfo() : device_id_(getDevice()) { query(); } |
||||
|
||||
// Creates DeviceInfo object for the given GPU
|
||||
DeviceInfo(int device_id) : device_id_(device_id) { query(); } |
||||
|
||||
String name() const { return name_; } |
||||
|
||||
// Return compute capability versions
|
||||
int majorVersion() const { return majorVersion_; } |
||||
int minorVersion() const { return minorVersion_; } |
||||
|
||||
int multiProcessorCount() const { return multi_processor_count_; } |
||||
|
||||
size_t sharedMemPerBlock() const; |
||||
|
||||
void queryMemory(size_t& totalMemory, size_t& freeMemory) const; |
||||
size_t freeMemory() const; |
||||
size_t totalMemory() const; |
||||
|
||||
// Checks whether device supports the given feature
|
||||
bool supports(FeatureSet feature_set) const; |
||||
|
||||
// Checks whether the GPU module can be run on the given device
|
||||
bool isCompatible() const; |
||||
|
||||
int deviceID() const { return device_id_; } |
||||
|
||||
private: |
||||
void query(); |
||||
|
||||
int device_id_; |
||||
|
||||
String name_; |
||||
int multi_processor_count_; |
||||
int majorVersion_; |
||||
int minorVersion_; |
||||
}; |
||||
|
||||
CV_EXPORTS void printCudaDeviceInfo(int device); |
||||
|
||||
CV_EXPORTS void printShortCudaDeviceInfo(int device); |
||||
|
||||
//////////////////////////////// GpuMat ///////////////////////////////
|
||||
|
||||
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
|
||||
class CV_EXPORTS GpuMat |
||||
{ |
||||
public: |
||||
//! default constructor
|
||||
GpuMat(); |
||||
|
||||
//! constructs GpuMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
|
||||
GpuMat(int rows, int cols, int type); |
||||
GpuMat(Size size, int type); |
||||
|
||||
//! constucts GpuMatrix and fills it with the specified value _s.
|
||||
GpuMat(int rows, int cols, int type, Scalar s); |
||||
GpuMat(Size size, int type, Scalar s); |
||||
|
||||
//! copy constructor
|
||||
GpuMat(const GpuMat& m); |
||||
|
||||
//! constructor for GpuMatrix headers pointing to user-allocated data
|
||||
GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP); |
||||
GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP); |
||||
|
||||
//! creates a matrix header for a part of the bigger matrix
|
||||
GpuMat(const GpuMat& m, Range rowRange, Range colRange); |
||||
GpuMat(const GpuMat& m, Rect roi); |
||||
|
||||
//! builds GpuMat from Mat. Perfom blocking upload to device.
|
||||
explicit GpuMat(const Mat& m); |
||||
|
||||
//! destructor - calls release()
|
||||
~GpuMat(); |
||||
|
||||
//! assignment operators
|
||||
GpuMat& operator = (const GpuMat& m); |
||||
|
||||
//! pefroms blocking upload data to GpuMat.
|
||||
void upload(const Mat& m); |
||||
|
||||
//! downloads data from device to host memory. Blocking calls.
|
||||
void download(Mat& m) const; |
||||
|
||||
//! returns a new GpuMatrix header for the specified row
|
||||
GpuMat row(int y) const; |
||||
//! returns a new GpuMatrix header for the specified column
|
||||
GpuMat col(int x) const; |
||||
//! ... for the specified row span
|
||||
GpuMat rowRange(int startrow, int endrow) const; |
||||
GpuMat rowRange(Range r) const; |
||||
//! ... for the specified column span
|
||||
GpuMat colRange(int startcol, int endcol) const; |
||||
GpuMat colRange(Range r) const; |
||||
|
||||
//! returns deep copy of the GpuMatrix, i.e. the data is copied
|
||||
GpuMat clone() const; |
||||
//! copies the GpuMatrix content to "m".
|
||||
// It calls m.create(this->size(), this->type()).
|
||||
void copyTo(GpuMat& m) const; |
||||
//! copies those GpuMatrix elements to "m" that are marked with non-zero mask elements.
|
||||
void copyTo(GpuMat& m, const GpuMat& mask) const; |
||||
//! converts GpuMatrix to another datatype with optional scalng. See cvConvertScale.
|
||||
void convertTo(GpuMat& m, int rtype, double alpha = 1, double beta = 0) const; |
||||
|
||||
void assignTo(GpuMat& m, int type=-1) const; |
||||
|
||||
//! sets every GpuMatrix element to s
|
||||
GpuMat& operator = (Scalar s); |
||||
//! sets some of the GpuMatrix elements to s, according to the mask
|
||||
GpuMat& setTo(Scalar s, const GpuMat& mask = GpuMat()); |
||||
//! creates alternative GpuMatrix header for the same data, with different
|
||||
// number of channels and/or different number of rows. see cvReshape.
|
||||
GpuMat reshape(int cn, int rows = 0) const; |
||||
|
||||
//! allocates new GpuMatrix data unless the GpuMatrix already has specified size and type.
|
||||
// previous data is unreferenced if needed.
|
||||
void create(int rows, int cols, int type); |
||||
void create(Size size, int type); |
||||
//! decreases reference counter;
|
||||
// deallocate the data when reference counter reaches 0.
|
||||
void release(); |
||||
|
||||
//! swaps with other smart pointer
|
||||
void swap(GpuMat& mat); |
||||
|
||||
//! locates GpuMatrix header within a parent GpuMatrix. See below
|
||||
void locateROI(Size& wholeSize, Point& ofs) const; |
||||
//! moves/resizes the current GpuMatrix ROI inside the parent GpuMatrix.
|
||||
GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright); |
||||
//! extracts a rectangular sub-GpuMatrix
|
||||
// (this is a generalized form of row, rowRange etc.)
|
||||
GpuMat operator()(Range rowRange, Range colRange) const; |
||||
GpuMat operator()(Rect roi) const; |
||||
|
||||
//! returns true iff the GpuMatrix data is continuous
|
||||
// (i.e. when there are no gaps between successive rows).
|
||||
// similar to CV_IS_GpuMat_CONT(cvGpuMat->type)
|
||||
bool isContinuous() const; |
||||
//! returns element size in bytes,
|
||||
// similar to CV_ELEM_SIZE(cvMat->type)
|
||||
size_t elemSize() const; |
||||
//! returns the size of element channel in bytes.
|
||||
size_t elemSize1() const; |
||||
//! returns element type, similar to CV_MAT_TYPE(cvMat->type)
|
||||
int type() const; |
||||
//! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
|
||||
int depth() const; |
||||
//! returns element type, similar to CV_MAT_CN(cvMat->type)
|
||||
int channels() const; |
||||
//! returns step/elemSize1()
|
||||
size_t step1() const; |
||||
//! returns GpuMatrix size:
|
||||
// width == number of columns, height == number of rows
|
||||
Size size() const; |
||||
//! returns true if GpuMatrix data is NULL
|
||||
bool empty() const; |
||||
|
||||
//! returns pointer to y-th row
|
||||
uchar* ptr(int y = 0); |
||||
const uchar* ptr(int y = 0) const; |
||||
|
||||
//! template version of the above method
|
||||
template<typename _Tp> _Tp* ptr(int y = 0); |
||||
template<typename _Tp> const _Tp* ptr(int y = 0) const; |
||||
|
||||
template <typename _Tp> operator PtrStepSz<_Tp>() const; |
||||
template <typename _Tp> operator PtrStep<_Tp>() const; |
||||
|
||||
// Deprecated function
|
||||
__CV_GPU_DEPR_BEFORE__ template <typename _Tp> operator DevMem2D_<_Tp>() const __CV_GPU_DEPR_AFTER__; |
||||
#undef __CV_GPU_DEPR_BEFORE__ |
||||
#undef __CV_GPU_DEPR_AFTER__ |
||||
|
||||
/*! includes several bit-fields:
|
||||
- the magic signature |
||||
- continuity flag |
||||
- depth |
||||
- number of channels |
||||
*/ |
||||
int flags; |
||||
|
||||
//! the number of rows and columns
|
||||
int rows, cols; |
||||
|
||||
//! a distance between successive rows in bytes; includes the gap if any
|
||||
size_t step; |
||||
|
||||
//! pointer to the data
|
||||
uchar* data; |
||||
|
||||
//! pointer to the reference counter;
|
||||
// when GpuMatrix points to user-allocated data, the pointer is NULL
|
||||
int* refcount; |
||||
|
||||
//! helper fields used in locateROI and adjustROI
|
||||
uchar* datastart; |
||||
uchar* dataend; |
||||
}; |
||||
|
||||
//! Creates continuous GPU matrix
|
||||
CV_EXPORTS void createContinuous(int rows, int cols, int type, GpuMat& m); |
||||
|
||||
//! Ensures that size of the given matrix is not less than (rows, cols) size
|
||||
//! and matrix type is match specified one too
|
||||
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m); |
||||
|
||||
CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat); |
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline |
||||
GpuMat::GpuMat() |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) |
||||
{} |
||||
|
||||
inline |
||||
GpuMat::GpuMat(int rows_, int cols_, int type_) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) |
||||
{ |
||||
if (rows_ > 0 && cols_ > 0) |
||||
create(rows_, cols_, type_); |
||||
} |
||||
|
||||
inline |
||||
GpuMat::GpuMat(Size size_, int type_) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) |
||||
{ |
||||
if (size_.height > 0 && size_.width > 0) |
||||
create(size_.height, size_.width, type_); |
||||
} |
||||
|
||||
inline |
||||
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) |
||||
{ |
||||
if (rows_ > 0 && cols_ > 0) |
||||
{ |
||||
create(rows_, cols_, type_); |
||||
setTo(s_); |
||||
} |
||||
} |
||||
|
||||
inline |
||||
GpuMat::GpuMat(Size size_, int type_, Scalar s_) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) |
||||
{ |
||||
if (size_.height > 0 && size_.width > 0) |
||||
{ |
||||
create(size_.height, size_.width, type_); |
||||
setTo(s_); |
||||
} |
||||
} |
||||
|
||||
inline |
||||
GpuMat::~GpuMat() |
||||
{ |
||||
release(); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::clone() const |
||||
{ |
||||
GpuMat m; |
||||
copyTo(m); |
||||
return m; |
||||
} |
||||
|
||||
inline |
||||
void GpuMat::assignTo(GpuMat& m, int _type) const |
||||
{ |
||||
if (_type < 0) |
||||
m = *this; |
||||
else |
||||
convertTo(m, _type); |
||||
} |
||||
|
||||
inline |
||||
size_t GpuMat::step1() const |
||||
{ |
||||
return step / elemSize1(); |
||||
} |
||||
|
||||
inline |
||||
bool GpuMat::empty() const |
||||
{ |
||||
return data == 0; |
||||
} |
||||
|
||||
template<typename _Tp> inline |
||||
_Tp* GpuMat::ptr(int y) |
||||
{ |
||||
return (_Tp*)ptr(y); |
||||
} |
||||
|
||||
template<typename _Tp> inline |
||||
const _Tp* GpuMat::ptr(int y) const |
||||
{ |
||||
return (const _Tp*)ptr(y); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::row(int y) const |
||||
{ |
||||
return GpuMat(*this, Range(y, y+1), Range::all()); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::col(int x) const |
||||
{ |
||||
return GpuMat(*this, Range::all(), Range(x, x+1)); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::rowRange(int startrow, int endrow) const |
||||
{ |
||||
return GpuMat(*this, Range(startrow, endrow), Range::all()); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::rowRange(Range r) const |
||||
{ |
||||
return GpuMat(*this, r, Range::all()); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::colRange(int startcol, int endcol) const |
||||
{ |
||||
return GpuMat(*this, Range::all(), Range(startcol, endcol)); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::colRange(Range r) const |
||||
{ |
||||
return GpuMat(*this, Range::all(), r); |
||||
} |
||||
|
||||
inline |
||||
void GpuMat::create(Size size_, int type_) |
||||
{ |
||||
create(size_.height, size_.width, type_); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::operator()(Range _rowRange, Range _colRange) const |
||||
{ |
||||
return GpuMat(*this, _rowRange, _colRange); |
||||
} |
||||
|
||||
inline |
||||
GpuMat GpuMat::operator()(Rect roi) const |
||||
{ |
||||
return GpuMat(*this, roi); |
||||
} |
||||
|
||||
inline |
||||
bool GpuMat::isContinuous() const |
||||
{ |
||||
return (flags & Mat::CONTINUOUS_FLAG) != 0; |
||||
} |
||||
|
||||
inline |
||||
size_t GpuMat::elemSize() const |
||||
{ |
||||
return CV_ELEM_SIZE(flags); |
||||
} |
||||
|
||||
inline |
||||
size_t GpuMat::elemSize1() const |
||||
{ |
||||
return CV_ELEM_SIZE1(flags); |
||||
} |
||||
|
||||
inline |
||||
int GpuMat::type() const |
||||
{ |
||||
return CV_MAT_TYPE(flags); |
||||
} |
||||
|
||||
inline |
||||
int GpuMat::depth() const |
||||
{ |
||||
return CV_MAT_DEPTH(flags); |
||||
} |
||||
|
||||
inline |
||||
int GpuMat::channels() const |
||||
{ |
||||
return CV_MAT_CN(flags); |
||||
} |
||||
|
||||
inline |
||||
Size GpuMat::size() const |
||||
{ |
||||
return Size(cols, rows); |
||||
} |
||||
|
||||
inline |
||||
uchar* GpuMat::ptr(int y) |
||||
{ |
||||
CV_DbgAssert((unsigned)y < (unsigned)rows); |
||||
return data + step * y; |
||||
} |
||||
|
||||
inline |
||||
const uchar* GpuMat::ptr(int y) const |
||||
{ |
||||
CV_DbgAssert((unsigned)y < (unsigned)rows); |
||||
return data + step * y; |
||||
} |
||||
|
||||
inline |
||||
GpuMat& GpuMat::operator = (Scalar s) |
||||
{ |
||||
setTo(s); |
||||
return *this; |
||||
} |
||||
|
||||
template <class T> inline |
||||
GpuMat::operator PtrStepSz<T>() const |
||||
{ |
||||
return PtrStepSz<T>(rows, cols, (T*)data, step); |
||||
} |
||||
|
||||
template <class T> inline |
||||
GpuMat::operator PtrStep<T>() const |
||||
{ |
||||
return PtrStep<T>((T*)data, step); |
||||
} |
||||
|
||||
template <class T> inline |
||||
GpuMat::operator DevMem2D_<T>() const |
||||
{ |
||||
return DevMem2D_<T>(rows, cols, (T*)data, step); |
||||
} |
||||
|
||||
static inline |
||||
void swap(GpuMat& a, GpuMat& b) |
||||
{ |
||||
a.swap(b); |
||||
} |
||||
|
||||
static inline |
||||
GpuMat createContinuous(int rows, int cols, int type) |
||||
{ |
||||
GpuMat m; |
||||
createContinuous(rows, cols, type, m); |
||||
return m; |
||||
} |
||||
|
||||
static inline |
||||
void createContinuous(Size size, int type, GpuMat& m) |
||||
{ |
||||
createContinuous(size.height, size.width, type, m); |
||||
} |
||||
|
||||
static inline |
||||
GpuMat createContinuous(Size size, int type) |
||||
{ |
||||
GpuMat m; |
||||
createContinuous(size, type, m); |
||||
return m; |
||||
} |
||||
|
||||
static inline |
||||
void ensureSizeIsEnough(Size size, int type, GpuMat& m) |
||||
{ |
||||
ensureSizeIsEnough(size.height, size.width, type, m); |
||||
} |
||||
|
||||
}} // cv::gpu
|
||||
|
||||
#endif // __OPENCV_GPUMAT_HPP__
|
@ -0,0 +1,57 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
void copyWithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream); |
||||
|
||||
template <typename T> |
||||
void set(PtrStepSz<T> mat, const T* scalar, int channels, cudaStream_t stream); |
||||
|
||||
template <typename T> |
||||
void set(PtrStepSz<T> mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); |
||||
|
||||
void convert(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream); |
||||
}}} |
@ -1,348 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
using namespace cv; |
||||
using namespace cv::gpu; |
||||
|
||||
#if !defined (HAVE_CUDA) |
||||
|
||||
cv::gpu::Stream::Stream() { throw_no_cuda(); } |
||||
cv::gpu::Stream::~Stream() {} |
||||
cv::gpu::Stream::Stream(const Stream&) { throw_no_cuda(); } |
||||
Stream& cv::gpu::Stream::operator=(const Stream&) { throw_no_cuda(); return *this; } |
||||
bool cv::gpu::Stream::queryIfComplete() { throw_no_cuda(); return false; } |
||||
void cv::gpu::Stream::waitForCompletion() { throw_no_cuda(); } |
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat&, Mat&) { throw_no_cuda(); } |
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat&, CudaMem&) { throw_no_cuda(); } |
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem&, GpuMat&) { throw_no_cuda(); } |
||||
void cv::gpu::Stream::enqueueUpload(const Mat&, GpuMat&) { throw_no_cuda(); } |
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat&, GpuMat&) { throw_no_cuda(); } |
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar) { throw_no_cuda(); } |
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar, const GpuMat&) { throw_no_cuda(); } |
||||
void cv::gpu::Stream::enqueueConvert(const GpuMat&, GpuMat&, int, double, double) { throw_no_cuda(); } |
||||
void cv::gpu::Stream::enqueueHostCallback(StreamCallback, void*) { throw_no_cuda(); } |
||||
Stream& cv::gpu::Stream::Null() { throw_no_cuda(); static Stream s; return s; } |
||||
cv::gpu::Stream::operator bool() const { throw_no_cuda(); return false; } |
||||
cv::gpu::Stream::Stream(Impl*) { throw_no_cuda(); } |
||||
void cv::gpu::Stream::create() { throw_no_cuda(); } |
||||
void cv::gpu::Stream::release() { throw_no_cuda(); } |
||||
|
||||
#else /* !defined (HAVE_CUDA) */ |
||||
|
||||
namespace cv { namespace gpu |
||||
{ |
||||
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream); |
||||
void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream); |
||||
void setTo(GpuMat& src, Scalar s, cudaStream_t stream); |
||||
void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream); |
||||
}} |
||||
|
||||
struct Stream::Impl |
||||
{ |
||||
static cudaStream_t getStream(const Impl* impl) |
||||
{ |
||||
return impl ? impl->stream : 0; |
||||
} |
||||
|
||||
cudaStream_t stream; |
||||
int ref_counter; |
||||
}; |
||||
|
||||
cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) |
||||
{ |
||||
return Stream::Impl::getStream(stream.impl); |
||||
} |
||||
|
||||
cv::gpu::Stream::Stream() : impl(0) |
||||
{ |
||||
create(); |
||||
} |
||||
|
||||
cv::gpu::Stream::~Stream() |
||||
{ |
||||
release(); |
||||
} |
||||
|
||||
cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl) |
||||
{ |
||||
if (impl) |
||||
CV_XADD(&impl->ref_counter, 1); |
||||
} |
||||
|
||||
Stream& cv::gpu::Stream::operator =(const Stream& stream) |
||||
{ |
||||
if (this != &stream) |
||||
{ |
||||
release(); |
||||
impl = stream.impl; |
||||
if (impl) |
||||
CV_XADD(&impl->ref_counter, 1); |
||||
} |
||||
|
||||
return *this; |
||||
} |
||||
|
||||
bool cv::gpu::Stream::queryIfComplete() |
||||
{ |
||||
cudaStream_t stream = Impl::getStream(impl); |
||||
cudaError_t err = cudaStreamQuery(stream); |
||||
|
||||
if (err == cudaErrorNotReady || err == cudaSuccess) |
||||
return err == cudaSuccess; |
||||
|
||||
cudaSafeCall(err); |
||||
return false; |
||||
} |
||||
|
||||
void cv::gpu::Stream::waitForCompletion() |
||||
{ |
||||
cudaStream_t stream = Impl::getStream(impl); |
||||
cudaSafeCall( cudaStreamSynchronize(stream) ); |
||||
} |
||||
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst) |
||||
{ |
||||
// if not -> allocation will be done, but after that dst will not point to page locked memory
|
||||
CV_Assert( src.size() == dst.size() && src.type() == dst.type() ); |
||||
|
||||
cudaStream_t stream = Impl::getStream(impl); |
||||
size_t bwidth = src.cols * src.elemSize(); |
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) ); |
||||
} |
||||
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) |
||||
{ |
||||
dst.create(src.size(), src.type(), CudaMem::ALLOC_PAGE_LOCKED); |
||||
|
||||
cudaStream_t stream = Impl::getStream(impl); |
||||
size_t bwidth = src.cols * src.elemSize(); |
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) ); |
||||
} |
||||
|
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst) |
||||
{ |
||||
dst.create(src.size(), src.type()); |
||||
|
||||
cudaStream_t stream = Impl::getStream(impl); |
||||
size_t bwidth = src.cols * src.elemSize(); |
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) ); |
||||
} |
||||
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) |
||||
{ |
||||
dst.create(src.size(), src.type()); |
||||
|
||||
cudaStream_t stream = Impl::getStream(impl); |
||||
size_t bwidth = src.cols * src.elemSize(); |
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) ); |
||||
} |
||||
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) |
||||
{ |
||||
dst.create(src.size(), src.type()); |
||||
|
||||
cudaStream_t stream = Impl::getStream(impl); |
||||
size_t bwidth = src.cols * src.elemSize(); |
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) ); |
||||
} |
||||
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val) |
||||
{ |
||||
const int sdepth = src.depth(); |
||||
|
||||
if (sdepth == CV_64F) |
||||
{ |
||||
if (!deviceSupports(NATIVE_DOUBLE)) |
||||
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); |
||||
} |
||||
|
||||
cudaStream_t stream = Impl::getStream(impl); |
||||
|
||||
if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0) |
||||
{ |
||||
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) ); |
||||
return; |
||||
} |
||||
|
||||
if (sdepth == CV_8U) |
||||
{ |
||||
int cn = src.channels(); |
||||
|
||||
if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3])) |
||||
{ |
||||
int ival = saturate_cast<uchar>(val[0]); |
||||
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) ); |
||||
return; |
||||
} |
||||
} |
||||
|
||||
setTo(src, val, stream); |
||||
} |
||||
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask) |
||||
{ |
||||
const int sdepth = src.depth(); |
||||
|
||||
if (sdepth == CV_64F) |
||||
{ |
||||
if (!deviceSupports(NATIVE_DOUBLE)) |
||||
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); |
||||
} |
||||
|
||||
CV_Assert(mask.type() == CV_8UC1); |
||||
|
||||
cudaStream_t stream = Impl::getStream(impl); |
||||
|
||||
setTo(src, val, mask, stream); |
||||
} |
||||
|
||||
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double alpha, double beta) |
||||
{ |
||||
if (dtype < 0) |
||||
dtype = src.type(); |
||||
else |
||||
dtype = CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()); |
||||
|
||||
const int sdepth = src.depth(); |
||||
const int ddepth = CV_MAT_DEPTH(dtype); |
||||
|
||||
if (sdepth == CV_64F || ddepth == CV_64F) |
||||
{ |
||||
if (!deviceSupports(NATIVE_DOUBLE)) |
||||
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); |
||||
} |
||||
|
||||
bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon() |
||||
&& fabs(beta) < std::numeric_limits<double>::epsilon(); |
||||
|
||||
if (sdepth == ddepth && noScale) |
||||
{ |
||||
enqueueCopy(src, dst); |
||||
return; |
||||
} |
||||
|
||||
dst.create(src.size(), dtype); |
||||
|
||||
cudaStream_t stream = Impl::getStream(impl); |
||||
convertTo(src, dst, alpha, beta, stream); |
||||
} |
||||
|
||||
#if CUDART_VERSION >= 5000 |
||||
|
||||
namespace |
||||
{ |
||||
struct CallbackData |
||||
{ |
||||
cv::gpu::Stream::StreamCallback callback; |
||||
void* userData; |
||||
Stream stream; |
||||
}; |
||||
|
||||
void CUDART_CB cudaStreamCallback(cudaStream_t, cudaError_t status, void* userData) |
||||
{ |
||||
CallbackData* data = reinterpret_cast<CallbackData*>(userData); |
||||
data->callback(data->stream, static_cast<int>(status), data->userData); |
||||
delete data; |
||||
} |
||||
} |
||||
|
||||
#endif |
||||
|
||||
void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userData) |
||||
{ |
||||
#if CUDART_VERSION >= 5000 |
||||
CallbackData* data = new CallbackData; |
||||
data->callback = callback; |
||||
data->userData = userData; |
||||
data->stream = *this; |
||||
|
||||
cudaStream_t stream = Impl::getStream(impl); |
||||
|
||||
cudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) ); |
||||
#else |
||||
(void) callback; |
||||
(void) userData; |
||||
CV_Error(CV_StsNotImplemented, "This function requires CUDA 5.0"); |
||||
#endif |
||||
} |
||||
|
||||
cv::gpu::Stream& cv::gpu::Stream::Null() |
||||
{ |
||||
static Stream s((Impl*) 0); |
||||
return s; |
||||
} |
||||
|
||||
cv::gpu::Stream::operator bool() const |
||||
{ |
||||
return impl && impl->stream; |
||||
} |
||||
|
||||
cv::gpu::Stream::Stream(Impl* impl_) : impl(impl_) |
||||
{ |
||||
} |
||||
|
||||
void cv::gpu::Stream::create() |
||||
{ |
||||
if (impl) |
||||
release(); |
||||
|
||||
cudaStream_t stream; |
||||
cudaSafeCall( cudaStreamCreate( &stream ) ); |
||||
|
||||
impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl)); |
||||
|
||||
impl->stream = stream; |
||||
impl->ref_counter = 1; |
||||
} |
||||
|
||||
void cv::gpu::Stream::release() |
||||
{ |
||||
if (impl && CV_XADD(&impl->ref_counter, -1) == 1) |
||||
{ |
||||
cudaSafeCall( cudaStreamDestroy(impl->stream) ); |
||||
cv::fastFree(impl); |
||||
} |
||||
} |
||||
|
||||
#endif /* !defined (HAVE_CUDA) */ |
@ -0,0 +1,215 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
using namespace cv; |
||||
using namespace cv::gpu; |
||||
|
||||
namespace |
||||
{ |
||||
size_t alignUpStep(size_t what, size_t alignment) |
||||
{ |
||||
size_t alignMask = alignment - 1; |
||||
size_t inverseAlignMask = ~alignMask; |
||||
size_t res = (what + alignMask) & inverseAlignMask; |
||||
return res; |
||||
} |
||||
} |
||||
|
||||
void cv::gpu::CudaMem::create(int rows_, int cols_, int type_) |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
(void) rows_; |
||||
(void) cols_; |
||||
(void) type_; |
||||
throw_no_cuda(); |
||||
#else |
||||
if (alloc_type == SHARED) |
||||
{ |
||||
DeviceInfo devInfo; |
||||
CV_Assert( devInfo.canMapHostMemory() ); |
||||
} |
||||
|
||||
type_ &= Mat::TYPE_MASK; |
||||
|
||||
if (rows == rows_ && cols == cols_ && type() == type_ && data) |
||||
return; |
||||
|
||||
if (data) |
||||
release(); |
||||
|
||||
CV_DbgAssert( rows_ >= 0 && cols_ >= 0 ); |
||||
|
||||
if (rows_ > 0 && cols_ > 0) |
||||
{ |
||||
flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + type_; |
||||
rows = rows_; |
||||
cols = cols_; |
||||
step = elemSize() * cols; |
||||
|
||||
if (alloc_type == SHARED) |
||||
{ |
||||
DeviceInfo devInfo; |
||||
step = alignUpStep(step, devInfo.textureAlignment()); |
||||
} |
||||
|
||||
int64 _nettosize = (int64)step*rows; |
||||
size_t nettosize = (size_t)_nettosize; |
||||
|
||||
if (_nettosize != (int64)nettosize) |
||||
CV_Error(cv::Error::StsNoMem, "Too big buffer is allocated"); |
||||
|
||||
size_t datasize = alignSize(nettosize, (int)sizeof(*refcount)); |
||||
|
||||
void* ptr = 0; |
||||
|
||||
switch (alloc_type) |
||||
{ |
||||
case PAGE_LOCKED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocDefault) ); break; |
||||
case SHARED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocMapped) ); break; |
||||
case WRITE_COMBINED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocWriteCombined) ); break; |
||||
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type"); |
||||
} |
||||
|
||||
datastart = data = (uchar*)ptr; |
||||
dataend = data + nettosize; |
||||
|
||||
refcount = (int*)cv::fastMalloc(sizeof(*refcount)); |
||||
*refcount = 1; |
||||
} |
||||
#endif |
||||
} |
||||
|
||||
CudaMem cv::gpu::CudaMem::reshape(int new_cn, int new_rows) const |
||||
{ |
||||
CudaMem hdr = *this; |
||||
|
||||
int cn = channels(); |
||||
if (new_cn == 0) |
||||
new_cn = cn; |
||||
|
||||
int total_width = cols * cn; |
||||
|
||||
if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0) |
||||
new_rows = rows * total_width / new_cn; |
||||
|
||||
if (new_rows != 0 && new_rows != rows) |
||||
{ |
||||
int total_size = total_width * rows; |
||||
|
||||
if (!isContinuous()) |
||||
CV_Error(cv::Error::BadStep, "The matrix is not continuous, thus its number of rows can not be changed"); |
||||
|
||||
if ((unsigned)new_rows > (unsigned)total_size) |
||||
CV_Error(cv::Error::StsOutOfRange, "Bad new number of rows"); |
||||
|
||||
total_width = total_size / new_rows; |
||||
|
||||
if (total_width * new_rows != total_size) |
||||
CV_Error(cv::Error::StsBadArg, "The total number of matrix elements is not divisible by the new number of rows"); |
||||
|
||||
hdr.rows = new_rows; |
||||
hdr.step = total_width * elemSize1(); |
||||
} |
||||
|
||||
int new_width = total_width / new_cn; |
||||
|
||||
if (new_width * new_cn != total_width) |
||||
CV_Error(cv::Error::BadNumChannels, "The total width is not divisible by the new number of channels"); |
||||
|
||||
hdr.cols = new_width; |
||||
hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT); |
||||
|
||||
return hdr; |
||||
} |
||||
|
||||
void cv::gpu::CudaMem::release() |
||||
{ |
||||
#ifdef HAVE_CUDA |
||||
if (refcount && CV_XADD(refcount, -1) == 1) |
||||
{ |
||||
cudaFreeHost(datastart); |
||||
fastFree(refcount); |
||||
} |
||||
|
||||
data = datastart = dataend = 0; |
||||
step = rows = cols = 0; |
||||
refcount = 0; |
||||
#endif |
||||
} |
||||
|
||||
GpuMat cv::gpu::CudaMem::createGpuMatHeader() const |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
throw_no_cuda(); |
||||
return GpuMat(); |
||||
#else |
||||
CV_Assert( alloc_type == SHARED ); |
||||
|
||||
void *pdev; |
||||
cudaSafeCall( cudaHostGetDevicePointer(&pdev, data, 0) ); |
||||
|
||||
return GpuMat(rows, cols, type(), pdev, step); |
||||
#endif |
||||
} |
||||
|
||||
void cv::gpu::registerPageLocked(Mat& m) |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
(void) m; |
||||
throw_no_cuda(); |
||||
#else |
||||
CV_Assert( m.isContinuous() ); |
||||
cudaSafeCall( cudaHostRegister(m.data, m.step * m.rows, cudaHostRegisterPortable) ); |
||||
#endif |
||||
} |
||||
|
||||
void cv::gpu::unregisterPageLocked(Mat& m) |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
(void) m; |
||||
#else |
||||
cudaSafeCall( cudaHostUnregister(m.data) ); |
||||
#endif |
||||
} |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,308 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
using namespace cv; |
||||
using namespace cv::gpu; |
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Stream
|
||||
|
||||
#ifndef HAVE_CUDA |
||||
|
||||
class cv::gpu::Stream::Impl |
||||
{ |
||||
public: |
||||
Impl(void* ptr = 0) |
||||
{ |
||||
(void) ptr; |
||||
throw_no_cuda(); |
||||
} |
||||
}; |
||||
|
||||
#else |
||||
|
||||
class cv::gpu::Stream::Impl |
||||
{ |
||||
public: |
||||
cudaStream_t stream; |
||||
|
||||
Impl(); |
||||
Impl(cudaStream_t stream); |
||||
|
||||
~Impl(); |
||||
}; |
||||
|
||||
cv::gpu::Stream::Impl::Impl() : stream(0) |
||||
{ |
||||
cudaSafeCall( cudaStreamCreate(&stream) ); |
||||
} |
||||
|
||||
cv::gpu::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_) |
||||
{ |
||||
} |
||||
|
||||
cv::gpu::Stream::Impl::~Impl() |
||||
{ |
||||
if (stream) |
||||
cudaStreamDestroy(stream); |
||||
} |
||||
|
||||
cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) |
||||
{ |
||||
return stream.impl_->stream; |
||||
} |
||||
|
||||
#endif |
||||
|
||||
cv::gpu::Stream::Stream() |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
throw_no_cuda(); |
||||
#else |
||||
impl_ = new Impl; |
||||
#endif |
||||
} |
||||
|
||||
bool cv::gpu::Stream::queryIfComplete() const |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
throw_no_cuda(); |
||||
return false; |
||||
#else |
||||
cudaError_t err = cudaStreamQuery(impl_->stream); |
||||
|
||||
if (err == cudaErrorNotReady || err == cudaSuccess) |
||||
return err == cudaSuccess; |
||||
|
||||
cudaSafeCall(err); |
||||
return false; |
||||
#endif |
||||
} |
||||
|
||||
void cv::gpu::Stream::waitForCompletion() |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
throw_no_cuda(); |
||||
#else |
||||
cudaSafeCall( cudaStreamSynchronize(impl_->stream) ); |
||||
#endif |
||||
} |
||||
|
||||
void cv::gpu::Stream::waitEvent(const Event& event) |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
(void) event; |
||||
throw_no_cuda(); |
||||
#else |
||||
cudaSafeCall( cudaStreamWaitEvent(impl_->stream, EventAccessor::getEvent(event), 0) ); |
||||
#endif |
||||
} |
||||
|
||||
#if defined(HAVE_CUDA) && (CUDART_VERSION >= 5000) |
||||
|
||||
namespace |
||||
{ |
||||
struct CallbackData |
||||
{ |
||||
Stream::StreamCallback callback; |
||||
void* userData; |
||||
|
||||
CallbackData(Stream::StreamCallback callback_, void* userData_) : callback(callback_), userData(userData_) {} |
||||
}; |
||||
|
||||
void CUDART_CB cudaStreamCallback(cudaStream_t, cudaError_t status, void* userData) |
||||
{ |
||||
CallbackData* data = reinterpret_cast<CallbackData*>(userData); |
||||
data->callback(static_cast<int>(status), data->userData); |
||||
delete data; |
||||
} |
||||
} |
||||
|
||||
#endif |
||||
|
||||
void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userData) |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
(void) callback; |
||||
(void) userData; |
||||
throw_no_cuda(); |
||||
#else |
||||
#if CUDART_VERSION < 5000 |
||||
(void) callback; |
||||
(void) userData; |
||||
CV_Error(cv::Error::StsNotImplemented, "This function requires CUDA 5.0"); |
||||
#else |
||||
CallbackData* data = new CallbackData(callback, userData); |
||||
|
||||
cudaSafeCall( cudaStreamAddCallback(impl_->stream, cudaStreamCallback, data, 0) ); |
||||
#endif |
||||
#endif |
||||
} |
||||
|
||||
Stream& cv::gpu::Stream::Null() |
||||
{ |
||||
static Stream s(new Impl(0)); |
||||
return s; |
||||
} |
||||
|
||||
cv::gpu::Stream::operator bool_type() const |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
return 0; |
||||
#else |
||||
return (impl_->stream != 0) ? &Stream::this_type_does_not_support_comparisons : 0; |
||||
#endif |
||||
} |
||||
|
||||
template <> void cv::Ptr<Stream::Impl>::delete_obj() |
||||
{ |
||||
if (obj) delete obj; |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Stream
|
||||
|
||||
#ifndef HAVE_CUDA |
||||
|
||||
class cv::gpu::Event::Impl |
||||
{ |
||||
public: |
||||
Impl(unsigned int) |
||||
{ |
||||
throw_no_cuda(); |
||||
} |
||||
}; |
||||
|
||||
#else |
||||
|
||||
class cv::gpu::Event::Impl |
||||
{ |
||||
public: |
||||
cudaEvent_t event; |
||||
|
||||
Impl(unsigned int flags); |
||||
~Impl(); |
||||
}; |
||||
|
||||
cv::gpu::Event::Impl::Impl(unsigned int flags) : event(0) |
||||
{ |
||||
cudaSafeCall( cudaEventCreateWithFlags(&event, flags) ); |
||||
} |
||||
|
||||
cv::gpu::Event::Impl::~Impl() |
||||
{ |
||||
if (event) |
||||
cudaEventDestroy(event); |
||||
} |
||||
|
||||
cudaEvent_t cv::gpu::EventAccessor::getEvent(const Event& event) |
||||
{ |
||||
return event.impl_->event; |
||||
} |
||||
|
||||
#endif |
||||
|
||||
cv::gpu::Event::Event(CreateFlags flags) |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
(void) flags; |
||||
throw_no_cuda(); |
||||
#else |
||||
impl_ = new Impl(flags); |
||||
#endif |
||||
} |
||||
|
||||
void cv::gpu::Event::record(Stream& stream) |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
(void) stream; |
||||
throw_no_cuda(); |
||||
#else |
||||
cudaSafeCall( cudaEventRecord(impl_->event, StreamAccessor::getStream(stream)) ); |
||||
#endif |
||||
} |
||||
|
||||
bool cv::gpu::Event::queryIfComplete() const |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
throw_no_cuda(); |
||||
return false; |
||||
#else |
||||
cudaError_t err = cudaEventQuery(impl_->event); |
||||
|
||||
if (err == cudaErrorNotReady || err == cudaSuccess) |
||||
return err == cudaSuccess; |
||||
|
||||
cudaSafeCall(err); |
||||
return false; |
||||
#endif |
||||
} |
||||
|
||||
void cv::gpu::Event::waitForCompletion() |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
throw_no_cuda(); |
||||
#else |
||||
cudaSafeCall( cudaEventSynchronize(impl_->event) ); |
||||
#endif |
||||
} |
||||
|
||||
float cv::gpu::Event::elapsedTime(const Event& start, const Event& end) |
||||
{ |
||||
#ifndef HAVE_CUDA |
||||
(void) start; |
||||
(void) end; |
||||
throw_no_cuda(); |
||||
return 0.0f; |
||||
#else |
||||
float ms; |
||||
cudaSafeCall( cudaEventElapsedTime(&ms, start.impl_->event, end.impl_->event) ); |
||||
return ms; |
||||
#endif |
||||
} |
||||
|
||||
template <> void cv::Ptr<Event::Impl>::delete_obj() |
||||
{ |
||||
if (obj) delete obj; |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -1,294 +0,0 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
using namespace cv; |
||||
using namespace cv::gpu; |
||||
|
||||
cv::gpu::CudaMem::CudaMem() |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0) |
||||
{ |
||||
} |
||||
|
||||
cv::gpu::CudaMem::CudaMem(int _rows, int _cols, int _type, int _alloc_type) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0) |
||||
{ |
||||
if( _rows > 0 && _cols > 0 ) |
||||
create( _rows, _cols, _type, _alloc_type); |
||||
} |
||||
|
||||
cv::gpu::CudaMem::CudaMem(Size _size, int _type, int _alloc_type) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0) |
||||
{ |
||||
if( _size.height > 0 && _size.width > 0 ) |
||||
create( _size.height, _size.width, _type, _alloc_type); |
||||
} |
||||
|
||||
cv::gpu::CudaMem::CudaMem(const CudaMem& m) |
||||
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type) |
||||
{ |
||||
if( refcount ) |
||||
CV_XADD(refcount, 1); |
||||
} |
||||
|
||||
cv::gpu::CudaMem::CudaMem(const Mat& m, int _alloc_type) |
||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0) |
||||
{ |
||||
if( m.rows > 0 && m.cols > 0 ) |
||||
create( m.size(), m.type(), _alloc_type); |
||||
|
||||
Mat tmp = createMatHeader(); |
||||
m.copyTo(tmp); |
||||
} |
||||
|
||||
cv::gpu::CudaMem::~CudaMem() |
||||
{ |
||||
release(); |
||||
} |
||||
|
||||
CudaMem& cv::gpu::CudaMem::operator = (const CudaMem& m) |
||||
{ |
||||
if( this != &m ) |
||||
{ |
||||
if( m.refcount ) |
||||
CV_XADD(m.refcount, 1); |
||||
release(); |
||||
flags = m.flags; |
||||
rows = m.rows; cols = m.cols; |
||||
step = m.step; data = m.data; |
||||
datastart = m.datastart; |
||||
dataend = m.dataend; |
||||
refcount = m.refcount; |
||||
alloc_type = m.alloc_type; |
||||
} |
||||
return *this; |
||||
} |
||||
|
||||
CudaMem cv::gpu::CudaMem::clone() const |
||||
{ |
||||
CudaMem m(size(), type(), alloc_type); |
||||
Mat to = m; |
||||
Mat from = *this; |
||||
from.copyTo(to); |
||||
return m; |
||||
} |
||||
|
||||
void cv::gpu::CudaMem::create(Size _size, int _type, int _alloc_type) |
||||
{ |
||||
create(_size.height, _size.width, _type, _alloc_type); |
||||
} |
||||
|
||||
Mat cv::gpu::CudaMem::createMatHeader() const |
||||
{ |
||||
return Mat(size(), type(), data, step); |
||||
} |
||||
|
||||
cv::gpu::CudaMem::operator Mat() const |
||||
{ |
||||
return createMatHeader(); |
||||
} |
||||
|
||||
cv::gpu::CudaMem::operator GpuMat() const |
||||
{ |
||||
return createGpuMatHeader(); |
||||
} |
||||
|
||||
bool cv::gpu::CudaMem::isContinuous() const |
||||
{ |
||||
return (flags & Mat::CONTINUOUS_FLAG) != 0; |
||||
} |
||||
|
||||
size_t cv::gpu::CudaMem::elemSize() const |
||||
{ |
||||
return CV_ELEM_SIZE(flags); |
||||
} |
||||
|
||||
size_t cv::gpu::CudaMem::elemSize1() const |
||||
{ |
||||
return CV_ELEM_SIZE1(flags); |
||||
} |
||||
|
||||
int cv::gpu::CudaMem::type() const |
||||
{ |
||||
return CV_MAT_TYPE(flags); |
||||
} |
||||
|
||||
int cv::gpu::CudaMem::depth() const |
||||
{ |
||||
return CV_MAT_DEPTH(flags); |
||||
} |
||||
|
||||
int cv::gpu::CudaMem::channels() const |
||||
{ |
||||
return CV_MAT_CN(flags); |
||||
} |
||||
|
||||
size_t cv::gpu::CudaMem::step1() const |
||||
{ |
||||
return step/elemSize1(); |
||||
} |
||||
|
||||
Size cv::gpu::CudaMem::size() const |
||||
{ |
||||
return Size(cols, rows); |
||||
} |
||||
|
||||
bool cv::gpu::CudaMem::empty() const |
||||
{ |
||||
return data == 0; |
||||
} |
||||
|
||||
#if !defined (HAVE_CUDA) |
||||
|
||||
void cv::gpu::registerPageLocked(Mat&) { throw_no_cuda(); } |
||||
void cv::gpu::unregisterPageLocked(Mat&) { throw_no_cuda(); } |
||||
void cv::gpu::CudaMem::create(int, int, int, int) { throw_no_cuda(); } |
||||
bool cv::gpu::CudaMem::canMapHostMemory() { throw_no_cuda(); return false; } |
||||
void cv::gpu::CudaMem::release() { throw_no_cuda(); } |
||||
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_no_cuda(); return GpuMat(); } |
||||
|
||||
#else /* !defined (HAVE_CUDA) */ |
||||
|
||||
void cv::gpu::registerPageLocked(Mat& m) |
||||
{ |
||||
cudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) ); |
||||
} |
||||
|
||||
void cv::gpu::unregisterPageLocked(Mat& m) |
||||
{ |
||||
cudaSafeCall( cudaHostUnregister(m.ptr()) ); |
||||
} |
||||
|
||||
bool cv::gpu::CudaMem::canMapHostMemory() |
||||
{ |
||||
cudaDeviceProp prop; |
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) ); |
||||
return (prop.canMapHostMemory != 0) ? true : false; |
||||
} |
||||
|
||||
namespace |
||||
{ |
||||
size_t alignUpStep(size_t what, size_t alignment) |
||||
{ |
||||
size_t alignMask = alignment-1; |
||||
size_t inverseAlignMask = ~alignMask; |
||||
size_t res = (what + alignMask) & inverseAlignMask; |
||||
return res; |
||||
} |
||||
} |
||||
|
||||
void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type) |
||||
{ |
||||
if (_alloc_type == ALLOC_ZEROCOPY && !canMapHostMemory()) |
||||
CV_Error(cv::Error::GpuApiCallError, "ZeroCopy is not supported by current device"); |
||||
|
||||
_type &= Mat::TYPE_MASK; |
||||
if( rows == _rows && cols == _cols && type() == _type && data ) |
||||
return; |
||||
if( data ) |
||||
release(); |
||||
CV_DbgAssert( _rows >= 0 && _cols >= 0 ); |
||||
if( _rows > 0 && _cols > 0 ) |
||||
{ |
||||
flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + _type; |
||||
rows = _rows; |
||||
cols = _cols; |
||||
step = elemSize()*cols; |
||||
if (_alloc_type == ALLOC_ZEROCOPY) |
||||
{ |
||||
cudaDeviceProp prop; |
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) ); |
||||
step = alignUpStep(step, prop.textureAlignment); |
||||
} |
||||
int64 _nettosize = (int64)step*rows; |
||||
size_t nettosize = (size_t)_nettosize; |
||||
if( _nettosize != (int64)nettosize ) |
||||
CV_Error(CV_StsNoMem, "Too big buffer is allocated"); |
||||
size_t datasize = alignSize(nettosize, (int)sizeof(*refcount)); |
||||
|
||||
//datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount));
|
||||
alloc_type = _alloc_type; |
||||
void *ptr = 0; |
||||
|
||||
switch (alloc_type) |
||||
{ |
||||
case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break; |
||||
case ALLOC_ZEROCOPY: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break; |
||||
case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break; |
||||
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type"); |
||||
} |
||||
|
||||
datastart = data = (uchar*)ptr; |
||||
dataend = data + nettosize; |
||||
|
||||
refcount = (int*)cv::fastMalloc(sizeof(*refcount)); |
||||
*refcount = 1; |
||||
} |
||||
} |
||||
|
||||
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const |
||||
{ |
||||
CV_Assert( alloc_type == ALLOC_ZEROCOPY ); |
||||
|
||||
GpuMat res; |
||||
|
||||
void *pdev; |
||||
cudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) ); |
||||
res = GpuMat(rows, cols, type(), pdev, step); |
||||
|
||||
return res; |
||||
} |
||||
|
||||
void cv::gpu::CudaMem::release() |
||||
{ |
||||
if( refcount && CV_XADD(refcount, -1) == 1 ) |
||||
{ |
||||
cudaSafeCall( cudaFreeHost(datastart ) ); |
||||
fastFree(refcount); |
||||
} |
||||
data = datastart = dataend = 0; |
||||
step = rows = cols = 0; |
||||
refcount = 0; |
||||
} |
||||
|
||||
#endif /* !defined (HAVE_CUDA) */ |
Loading…
Reference in new issue