Merge pull request #974 from jet47:gpu-core-refactoring

pull/975/merge
Roman Donchenko 12 years ago committed by OpenCV Buildbot
commit 81c6b46fc6
  1. 6
      doc/check_docs2.py
  2. 3
      modules/core/include/opencv2/core/base.hpp
  3. 2
      modules/core/include/opencv2/core/cuda/common.hpp
  4. 691
      modules/core/include/opencv2/core/gpu.hpp
  5. 641
      modules/core/include/opencv2/core/gpu.inl.hpp
  6. 20
      modules/core/include/opencv2/core/gpu_stream_accessor.hpp
  7. 47
      modules/core/include/opencv2/core/gpu_types.hpp
  8. 722
      modules/core/include/opencv2/core/gpumat.hpp
  9. 11
      modules/core/include/opencv2/core/mat.hpp
  10. 213
      modules/core/include/opencv2/core/opengl.hpp
  11. 8
      modules/core/include/opencv2/core/private.gpu.hpp
  12. 200
      modules/core/src/cuda/matrix_operations.cu
  13. 57
      modules/core/src/cuda/matrix_operations.hpp
  14. 348
      modules/core/src/cudastream.cpp
  15. 215
      modules/core/src/gpu_cuda_mem.cpp
  16. 1262
      modules/core/src/gpu_info.cpp
  17. 1126
      modules/core/src/gpu_mat.cpp
  18. 308
      modules/core/src/gpu_stream.cpp
  19. 1722
      modules/core/src/gpumat.cpp
  20. 134
      modules/core/src/matrix.cpp
  21. 294
      modules/core/src/matrix_operations.cpp
  22. 280
      modules/core/src/opengl.cpp
  23. 4
      modules/core/src/precomp.hpp
  24. 259
      modules/gpu/doc/data_structures.rst
  25. 199
      modules/gpu/doc/initalization_and_information.rst
  26. 2
      modules/gpu/include/opencv2/gpu.hpp
  27. 2
      modules/gpu/src/precomp.hpp
  28. 63
      modules/gpu/test/test_opengl.cpp
  29. 2
      modules/gpuarithm/include/opencv2/gpuarithm.hpp
  30. 26
      modules/gpuarithm/src/arithm.cpp
  31. 2
      modules/gpuarithm/src/precomp.hpp
  32. 2
      modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
  33. 2
      modules/gpubgsegm/src/cuda/fgd.hpp
  34. 5
      modules/gpubgsegm/src/gmg.cpp
  35. 2
      modules/gpubgsegm/src/precomp.hpp
  36. 2
      modules/gpucodec/include/opencv2/gpucodec.hpp
  37. 2
      modules/gpucodec/src/cuvid_video_source.h
  38. 2
      modules/gpucodec/src/frame_queue.h
  39. 2
      modules/gpucodec/src/precomp.hpp
  40. 2
      modules/gpucodec/src/video_decoder.h
  41. 2
      modules/gpucodec/src/video_parser.h
  42. 2
      modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp
  43. 20
      modules/gpufeatures2d/src/brute_force_matcher.cpp
  44. 2
      modules/gpufeatures2d/src/precomp.hpp
  45. 2
      modules/gpufilters/include/opencv2/gpufilters.hpp
  46. 24
      modules/gpufilters/src/filtering.cpp
  47. 2
      modules/gpufilters/src/precomp.hpp
  48. 2
      modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
  49. 25
      modules/gpuimgproc/src/match_template.cpp
  50. 2
      modules/gpuimgproc/src/precomp.hpp
  51. 2
      modules/gpulegacy/include/opencv2/gpulegacy/private.hpp
  52. 2
      modules/gpulegacy/src/precomp.hpp
  53. 4
      modules/gpulegacy/test/test_precomp.hpp
  54. 2
      modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
  55. 18
      modules/gpuoptflow/src/farneback.cpp
  56. 2
      modules/gpuoptflow/src/precomp.hpp
  57. 2
      modules/gpuoptflow/test/test_optflow.cpp
  58. 2
      modules/gpustereo/include/opencv2/gpustereo.hpp
  59. 5
      modules/gpustereo/src/disparity_bilateral_filter.cpp
  60. 2
      modules/gpustereo/src/precomp.hpp
  61. 2
      modules/gpustereo/src/stereobm.cpp
  62. 48
      modules/gpustereo/src/stereobp.cpp
  63. 52
      modules/gpustereo/src/stereocsbp.cpp
  64. 2
      modules/gpuwarping/include/opencv2/gpuwarping.hpp
  65. 2
      modules/gpuwarping/src/precomp.hpp
  66. 10
      modules/gpuwarping/src/pyramids.cpp
  67. 5
      modules/gpuwarping/src/resize.cpp
  68. 2
      modules/highgui/include/opencv2/highgui.hpp
  69. 71
      modules/highgui/src/window.cpp
  70. 2
      modules/nonfree/include/opencv2/nonfree/gpu.hpp
  71. 2
      modules/nonfree/src/precomp.hpp
  72. 2
      modules/photo/include/opencv2/photo/gpu.hpp
  73. 2
      modules/photo/src/denoising_gpu.cpp
  74. 7
      modules/softcascade/include/opencv2/softcascade.hpp
  75. 2
      modules/softcascade/src/cuda/channels.cu
  76. 2
      modules/softcascade/src/cuda_invoker.hpp
  77. 37
      modules/softcascade/src/detector_cuda.cpp
  78. 4
      modules/softcascade/src/precomp.hpp
  79. 2
      modules/softcascade/test/test_cuda_softcascade.cpp
  80. 2
      modules/softcascade/test/utility.hpp
  81. 2
      modules/stitching/include/opencv2/stitching/detail/warpers.hpp
  82. 2
      modules/superres/perf/perf_precomp.hpp
  83. 34
      modules/superres/src/input_array_utility.cpp
  84. 2
      modules/superres/src/input_array_utility.hpp
  85. 4
      modules/superres/src/precomp.hpp
  86. 2
      modules/ts/include/opencv2/ts/gpu_test.hpp
  87. 6
      modules/ts/src/gpu_perf.cpp
  88. 10
      modules/ts/src/ts_perf.cpp
  89. 2
      modules/videostab/include/opencv2/videostab/wobble_suppression.hpp
  90. 16
      samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
  91. 4
      samples/gpu/driver_api_multi.cpp
  92. 4
      samples/gpu/driver_api_stereo_multi.cpp
  93. 4
      samples/gpu/multi.cpp
  94. 2
      samples/gpu/opengl.cpp
  95. 2
      samples/gpu/performance/performance.cpp
  96. 4
      samples/gpu/stereo_multi.cpp

@ -201,9 +201,9 @@ def process_module(module, path):
hdrlist.append(os.path.join(root, filename))
if module == "gpu":
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "cuda_devptrs.hpp"))
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpumat.hpp"))
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "stream_accessor.hpp"))
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpu_types.hpp"))
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpu.hpp"))
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpu_stream_accessor.hpp"))
decls = []
for hname in hdrlist:

@ -493,6 +493,9 @@ namespace ogl
namespace gpu
{
class CV_EXPORTS GpuMat;
class CV_EXPORTS CudaMem;
class CV_EXPORTS Stream;
class CV_EXPORTS Event;
}
} // cv

@ -44,7 +44,7 @@
#define __OPENCV_GPU_COMMON_HPP__
#include <cuda_runtime.h>
#include "opencv2/core/cuda_devptrs.hpp"
#include "opencv2/core/gpu_types.hpp"
#include "opencv2/core/cvdef.h"
#include "opencv2/core/base.hpp"

@ -0,0 +1,691 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_GPU_HPP__
#define __OPENCV_CORE_GPU_HPP__
#ifndef __cplusplus
# error gpu.hpp header must be compiled as C++
#endif
#include "opencv2/core.hpp"
#include "opencv2/core/gpu_types.hpp"
namespace cv { namespace gpu {
//////////////////////////////// GpuMat ///////////////////////////////
// Smart pointer for GPU memory with reference counting.
// Its interface is mostly similar with cv::Mat.
class CV_EXPORTS GpuMat
{
public:
//! default constructor
GpuMat();
//! constructs GpuMat of the specified size and type
GpuMat(int rows, int cols, int type);
GpuMat(Size size, int type);
//! constucts GpuMat and fills it with the specified value _s
GpuMat(int rows, int cols, int type, Scalar s);
GpuMat(Size size, int type, Scalar s);
//! copy constructor
GpuMat(const GpuMat& m);
//! constructor for GpuMat headers pointing to user-allocated data
GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
//! creates a GpuMat header for a part of the bigger matrix
GpuMat(const GpuMat& m, Range rowRange, Range colRange);
GpuMat(const GpuMat& m, Rect roi);
//! builds GpuMat from host memory (Blocking call)
explicit GpuMat(InputArray arr);
//! destructor - calls release()
~GpuMat();
//! assignment operators
GpuMat& operator =(const GpuMat& m);
//! allocates new GpuMat data unless the GpuMat already has specified size and type
void create(int rows, int cols, int type);
void create(Size size, int type);
//! decreases reference counter, deallocate the data when reference counter reaches 0
void release();
//! swaps with other smart pointer
void swap(GpuMat& mat);
//! pefroms upload data to GpuMat (Blocking call)
void upload(InputArray arr);
//! pefroms upload data to GpuMat (Non-Blocking call)
void upload(InputArray arr, Stream& stream);
//! pefroms download data from device to host memory (Blocking call)
void download(OutputArray dst) const;
//! pefroms download data from device to host memory (Non-Blocking call)
void download(OutputArray dst, Stream& stream) const;
//! returns deep copy of the GpuMat, i.e. the data is copied
GpuMat clone() const;
//! copies the GpuMat content to device memory (Blocking call)
void copyTo(OutputArray dst) const;
//! copies the GpuMat content to device memory (Non-Blocking call)
void copyTo(OutputArray dst, Stream& stream) const;
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
void copyTo(OutputArray dst, InputArray mask) const;
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
//! sets some of the GpuMat elements to s (Blocking call)
GpuMat& setTo(Scalar s);
//! sets some of the GpuMat elements to s (Non-Blocking call)
GpuMat& setTo(Scalar s, Stream& stream);
//! sets some of the GpuMat elements to s, according to the mask (Blocking call)
GpuMat& setTo(Scalar s, InputArray mask);
//! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
//! converts GpuMat to another datatype (Blocking call)
void convertTo(OutputArray dst, int rtype) const;
//! converts GpuMat to another datatype (Non-Blocking call)
void convertTo(OutputArray dst, int rtype, Stream& stream) const;
//! converts GpuMat to another datatype with scaling (Blocking call)
void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
//! converts GpuMat to another datatype with scaling (Non-Blocking call)
void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
//! converts GpuMat to another datatype with scaling (Non-Blocking call)
void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
void assignTo(GpuMat& m, int type=-1) const;
//! returns pointer to y-th row
uchar* ptr(int y = 0);
const uchar* ptr(int y = 0) const;
//! template version of the above method
template<typename _Tp> _Tp* ptr(int y = 0);
template<typename _Tp> const _Tp* ptr(int y = 0) const;
template <typename _Tp> operator PtrStepSz<_Tp>() const;
template <typename _Tp> operator PtrStep<_Tp>() const;
//! returns a new GpuMat header for the specified row
GpuMat row(int y) const;
//! returns a new GpuMat header for the specified column
GpuMat col(int x) const;
//! ... for the specified row span
GpuMat rowRange(int startrow, int endrow) const;
GpuMat rowRange(Range r) const;
//! ... for the specified column span
GpuMat colRange(int startcol, int endcol) const;
GpuMat colRange(Range r) const;
//! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
GpuMat operator ()(Range rowRange, Range colRange) const;
GpuMat operator ()(Rect roi) const;
//! creates alternative GpuMat header for the same data, with different
//! number of channels and/or different number of rows
GpuMat reshape(int cn, int rows = 0) const;
//! locates GpuMat header within a parent GpuMat
void locateROI(Size& wholeSize, Point& ofs) const;
//! moves/resizes the current GpuMat ROI inside the parent GpuMat
GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
//! returns true iff the GpuMat data is continuous
//! (i.e. when there are no gaps between successive rows)
bool isContinuous() const;
//! returns element size in bytes
size_t elemSize() const;
//! returns the size of element channel in bytes
size_t elemSize1() const;
//! returns element type
int type() const;
//! returns element type
int depth() const;
//! returns number of channels
int channels() const;
//! returns step/elemSize1()
size_t step1() const;
//! returns GpuMat size : width == number of columns, height == number of rows
Size size() const;
//! returns true if GpuMat data is NULL
bool empty() const;
/*! includes several bit-fields:
- the magic signature
- continuity flag
- depth
- number of channels
*/
int flags;
//! the number of rows and columns
int rows, cols;
//! a distance between successive rows in bytes; includes the gap if any
size_t step;
//! pointer to the data
uchar* data;
//! pointer to the reference counter;
//! when GpuMat points to user-allocated data, the pointer is NULL
int* refcount;
//! helper fields used in locateROI and adjustROI
uchar* datastart;
uchar* dataend;
};
//! creates continuous matrix
CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
//! ensures that size of the given matrix is not less than (rows, cols) size
//! and matrix type is match specified one too
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat);
//////////////////////////////// CudaMem ////////////////////////////////
// CudaMem is limited cv::Mat with page locked memory allocation.
// Page locked memory is only needed for async and faster coping to GPU.
// It is convertable to cv::Mat header without reference counting
// so you can use it with other opencv functions.
class CV_EXPORTS CudaMem
{
public:
enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
explicit CudaMem(AllocType alloc_type = PAGE_LOCKED);
CudaMem(const CudaMem& m);
CudaMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
CudaMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
//! creates from host memory with coping data
explicit CudaMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
~CudaMem();
CudaMem& operator =(const CudaMem& m);
//! swaps with other smart pointer
void swap(CudaMem& b);
//! returns deep copy of the matrix, i.e. the data is copied
CudaMem clone() const;
//! allocates new matrix data unless the matrix already has specified size and type.
void create(int rows, int cols, int type);
void create(Size size, int type);
//! creates alternative CudaMem header for the same data, with different
//! number of channels and/or different number of rows
CudaMem reshape(int cn, int rows = 0) const;
//! decrements reference counter and released memory if needed.
void release();
//! returns matrix header with disabled reference counting for CudaMem data.
Mat createMatHeader() const;
//! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
GpuMat createGpuMatHeader() const;
// Please see cv::Mat for descriptions
bool isContinuous() const;
size_t elemSize() const;
size_t elemSize1() const;
int type() const;
int depth() const;
int channels() const;
size_t step1() const;
Size size() const;
bool empty() const;
// Please see cv::Mat for descriptions
int flags;
int rows, cols;
size_t step;
uchar* data;
int* refcount;
uchar* datastart;
uchar* dataend;
AllocType alloc_type;
};
//! page-locks the matrix m memory and maps it for the device(s)
CV_EXPORTS void registerPageLocked(Mat& m);
//! unmaps the memory of matrix m, and makes it pageable again
CV_EXPORTS void unregisterPageLocked(Mat& m);
///////////////////////////////// Stream //////////////////////////////////
// Encapculates Cuda Stream. Provides interface for async coping.
// Passed to each function that supports async kernel execution.
// Reference counting is enabled.
class CV_EXPORTS Stream
{
typedef void (Stream::*bool_type)() const;
void this_type_does_not_support_comparisons() const {}
public:
typedef void (*StreamCallback)(int status, void* userData);
//! creates a new asynchronous stream
Stream();
//! queries an asynchronous stream for completion status
bool queryIfComplete() const;
//! waits for stream tasks to complete
void waitForCompletion();
//! makes a compute stream wait on an event
void waitEvent(const Event& event);
//! adds a callback to be called on the host after all currently enqueued items in the stream have completed
void enqueueHostCallback(StreamCallback callback, void* userData);
//! return Stream object for default CUDA stream
static Stream& Null();
//! returns true if stream object is not default (!= 0)
operator bool_type() const;
// obsolete methods
void enqueueDownload(const GpuMat& src, OutputArray dst);
void enqueueUpload(InputArray src, GpuMat& dst);
void enqueueCopy(const GpuMat& src, OutputArray dst);
void enqueueMemSet(GpuMat& src, Scalar val);
void enqueueMemSet(GpuMat& src, Scalar val, InputArray mask);
void enqueueConvert(const GpuMat& src, OutputArray dst, int dtype, double alpha = 1.0, double beta = 0.0);
class Impl;
private:
Ptr<Impl> impl_;
Stream(const Ptr<Impl>& impl);
friend struct StreamAccessor;
};
class CV_EXPORTS Event
{
public:
enum CreateFlags
{
DEFAULT = 0x00, /**< Default event flag */
BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */
DISABLE_TIMING = 0x02, /**< Event will not record timing data */
INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */
};
explicit Event(CreateFlags flags = DEFAULT);
//! records an event
void record(Stream& stream = Stream::Null());
//! queries an event's status
bool queryIfComplete() const;
//! waits for an event to complete
void waitForCompletion();
//! computes the elapsed time between events
static float elapsedTime(const Event& start, const Event& end);
class Impl;
private:
Ptr<Impl> impl_;
friend struct EventAccessor;
};
//////////////////////////////// Initialization & Info ////////////////////////
//! this is the only function that do not throw exceptions if the library is compiled without CUDA
CV_EXPORTS int getCudaEnabledDeviceCount();
//! set device to be used for GPU executions for the calling host thread
CV_EXPORTS void setDevice(int device);
//! returns which device is currently being used for the calling host thread
CV_EXPORTS int getDevice();
//! explicitly destroys and cleans up all resources associated with the current device in the current process
//! any subsequent API call to this device will reinitialize the device
CV_EXPORTS void resetDevice();
enum FeatureSet
{
FEATURE_SET_COMPUTE_10 = 10,
FEATURE_SET_COMPUTE_11 = 11,
FEATURE_SET_COMPUTE_12 = 12,
FEATURE_SET_COMPUTE_13 = 13,
FEATURE_SET_COMPUTE_20 = 20,
FEATURE_SET_COMPUTE_21 = 21,
FEATURE_SET_COMPUTE_30 = 30,
FEATURE_SET_COMPUTE_35 = 35,
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
};
//! checks whether current device supports the given feature
CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
//! information about what GPU archs this OpenCV GPU module was compiled for
class CV_EXPORTS TargetArchs
{
public:
static bool builtWith(FeatureSet feature_set);
static bool has(int major, int minor);
static bool hasPtx(int major, int minor);
static bool hasBin(int major, int minor);
static bool hasEqualOrLessPtx(int major, int minor);
static bool hasEqualOrGreater(int major, int minor);
static bool hasEqualOrGreaterPtx(int major, int minor);
static bool hasEqualOrGreaterBin(int major, int minor);
};
//! information about the given GPU.
class CV_EXPORTS DeviceInfo
{
public:
//! creates DeviceInfo object for the current GPU
DeviceInfo();
//! creates DeviceInfo object for the given GPU
DeviceInfo(int device_id);
//! device number.
int deviceID() const;
//! ASCII string identifying device
const char* name() const;
//! global memory available on device in bytes
size_t totalGlobalMem() const;
//! shared memory available per block in bytes
size_t sharedMemPerBlock() const;
//! 32-bit registers available per block
int regsPerBlock() const;
//! warp size in threads
int warpSize() const;
//! maximum pitch in bytes allowed by memory copies
size_t memPitch() const;
//! maximum number of threads per block
int maxThreadsPerBlock() const;
//! maximum size of each dimension of a block
Vec3i maxThreadsDim() const;
//! maximum size of each dimension of a grid
Vec3i maxGridSize() const;
//! clock frequency in kilohertz
int clockRate() const;
//! constant memory available on device in bytes
size_t totalConstMem() const;
//! major compute capability
int major() const;
//! minor compute capability
int minor() const;
//! alignment requirement for textures
size_t textureAlignment() const;
//! pitch alignment requirement for texture references bound to pitched memory
size_t texturePitchAlignment() const;
//! number of multiprocessors on device
int multiProcessorCount() const;
//! specified whether there is a run time limit on kernels
bool kernelExecTimeoutEnabled() const;
//! device is integrated as opposed to discrete
bool integrated() const;
//! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
bool canMapHostMemory() const;
enum ComputeMode
{
ComputeModeDefault, /**< default compute mode (Multiple threads can use ::cudaSetDevice() with this device) */
ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use ::cudaSetDevice() with this device) */
ComputeModeProhibited, /**< compute-prohibited mode (No threads can use ::cudaSetDevice() with this device) */
ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use ::cudaSetDevice() with this device) */
};
//! compute mode
ComputeMode computeMode() const;
//! maximum 1D texture size
int maxTexture1D() const;
//! maximum 1D mipmapped texture size
int maxTexture1DMipmap() const;
//! maximum size for 1D textures bound to linear memory
int maxTexture1DLinear() const;
//! maximum 2D texture dimensions
Vec2i maxTexture2D() const;
//! maximum 2D mipmapped texture dimensions
Vec2i maxTexture2DMipmap() const;
//! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
Vec3i maxTexture2DLinear() const;
//! maximum 2D texture dimensions if texture gather operations have to be performed
Vec2i maxTexture2DGather() const;
//! maximum 3D texture dimensions
Vec3i maxTexture3D() const;
//! maximum Cubemap texture dimensions
int maxTextureCubemap() const;
//! maximum 1D layered texture dimensions
Vec2i maxTexture1DLayered() const;
//! maximum 2D layered texture dimensions
Vec3i maxTexture2DLayered() const;
//! maximum Cubemap layered texture dimensions
Vec2i maxTextureCubemapLayered() const;
//! maximum 1D surface size
int maxSurface1D() const;
//! maximum 2D surface dimensions
Vec2i maxSurface2D() const;
//! maximum 3D surface dimensions
Vec3i maxSurface3D() const;
//! maximum 1D layered surface dimensions
Vec2i maxSurface1DLayered() const;
//! maximum 2D layered surface dimensions
Vec3i maxSurface2DLayered() const;
//! maximum Cubemap surface dimensions
int maxSurfaceCubemap() const;
//! maximum Cubemap layered surface dimensions
Vec2i maxSurfaceCubemapLayered() const;
//! alignment requirements for surfaces
size_t surfaceAlignment() const;
//! device can possibly execute multiple kernels concurrently
bool concurrentKernels() const;
//! device has ECC support enabled
bool ECCEnabled() const;
//! PCI bus ID of the device
int pciBusID() const;
//! PCI device ID of the device
int pciDeviceID() const;
//! PCI domain ID of the device
int pciDomainID() const;
//! true if device is a Tesla device using TCC driver, false otherwise
bool tccDriver() const;
//! number of asynchronous engines
int asyncEngineCount() const;
//! device shares a unified address space with the host
bool unifiedAddressing() const;
//! peak memory clock frequency in kilohertz
int memoryClockRate() const;
//! global memory bus width in bits
int memoryBusWidth() const;
//! size of L2 cache in bytes
int l2CacheSize() const;
//! maximum resident threads per multiprocessor
int maxThreadsPerMultiProcessor() const;
//! gets free and total device memory
void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
size_t freeMemory() const;
size_t totalMemory() const;
//! checks whether device supports the given feature
bool supports(FeatureSet feature_set) const;
//! checks whether the GPU module can be run on the given device
bool isCompatible() const;
private:
int device_id_;
};
CV_EXPORTS void printCudaDeviceInfo(int device);
CV_EXPORTS void printShortCudaDeviceInfo(int device);
}} // namespace cv { namespace gpu {
namespace cv {
template <> CV_EXPORTS void Ptr<cv::gpu::Stream::Impl>::delete_obj();
template <> CV_EXPORTS void Ptr<cv::gpu::Event::Impl>::delete_obj();
}
#include "opencv2/core/gpu.inl.hpp"
#endif /* __OPENCV_CORE_GPU_HPP__ */

@ -0,0 +1,641 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_GPUINL_HPP__
#define __OPENCV_CORE_GPUINL_HPP__
#include "opencv2/core/gpu.hpp"
namespace cv { namespace gpu {
//////////////////////////////// GpuMat ///////////////////////////////
inline
GpuMat::GpuMat()
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{}
inline
GpuMat::GpuMat(int rows_, int cols_, int type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
if (rows_ > 0 && cols_ > 0)
create(rows_, cols_, type_);
}
inline
GpuMat::GpuMat(Size size_, int type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
if (size_.height > 0 && size_.width > 0)
create(size_.height, size_.width, type_);
}
inline
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
if (rows_ > 0 && cols_ > 0)
{
create(rows_, cols_, type_);
setTo(s_);
}
}
inline
GpuMat::GpuMat(Size size_, int type_, Scalar s_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
if (size_.height > 0 && size_.width > 0)
{
create(size_.height, size_.width, type_);
setTo(s_);
}
}
inline
GpuMat::GpuMat(const GpuMat& m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend)
{
if (refcount)
CV_XADD(refcount, 1);
}
inline
GpuMat::GpuMat(InputArray arr) :
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
upload(arr);
}
inline
GpuMat::~GpuMat()
{
release();
}
inline
GpuMat& GpuMat::operator =(const GpuMat& m)
{
if (this != &m)
{
GpuMat temp(m);
swap(temp);
}
return *this;
}
inline
void GpuMat::create(Size size_, int type_)
{
create(size_.height, size_.width, type_);
}
inline
void GpuMat::swap(GpuMat& b)
{
std::swap(flags, b.flags);
std::swap(rows, b.rows);
std::swap(cols, b.cols);
std::swap(step, b.step);
std::swap(data, b.data);
std::swap(datastart, b.datastart);
std::swap(dataend, b.dataend);
std::swap(refcount, b.refcount);
}
inline
GpuMat GpuMat::clone() const
{
GpuMat m;
copyTo(m);
return m;
}
inline
void GpuMat::copyTo(OutputArray dst, InputArray mask) const
{
copyTo(dst, mask, Stream::Null());
}
inline
GpuMat& GpuMat::setTo(Scalar s)
{
return setTo(s, Stream::Null());
}
inline
GpuMat& GpuMat::setTo(Scalar s, InputArray mask)
{
return setTo(s, mask, Stream::Null());
}
inline
void GpuMat::convertTo(OutputArray dst, int rtype) const
{
convertTo(dst, rtype, Stream::Null());
}
inline
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const
{
convertTo(dst, rtype, alpha, beta, Stream::Null());
}
inline
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const
{
convertTo(dst, rtype, alpha, 0.0, stream);
}
inline
void GpuMat::assignTo(GpuMat& m, int _type) const
{
if (_type < 0)
m = *this;
else
convertTo(m, _type);
}
inline
uchar* GpuMat::ptr(int y)
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
return data + step * y;
}
inline
const uchar* GpuMat::ptr(int y) const
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
return data + step * y;
}
template<typename _Tp> inline
_Tp* GpuMat::ptr(int y)
{
return (_Tp*)ptr(y);
}
template<typename _Tp> inline
const _Tp* GpuMat::ptr(int y) const
{
return (const _Tp*)ptr(y);
}
template <class T> inline
GpuMat::operator PtrStepSz<T>() const
{
return PtrStepSz<T>(rows, cols, (T*)data, step);
}
template <class T> inline
GpuMat::operator PtrStep<T>() const
{
return PtrStep<T>((T*)data, step);
}
inline
GpuMat GpuMat::row(int y) const
{
return GpuMat(*this, Range(y, y+1), Range::all());
}
inline
GpuMat GpuMat::col(int x) const
{
return GpuMat(*this, Range::all(), Range(x, x+1));
}
inline
GpuMat GpuMat::rowRange(int startrow, int endrow) const
{
return GpuMat(*this, Range(startrow, endrow), Range::all());
}
inline
GpuMat GpuMat::rowRange(Range r) const
{
return GpuMat(*this, r, Range::all());
}
inline
GpuMat GpuMat::colRange(int startcol, int endcol) const
{
return GpuMat(*this, Range::all(), Range(startcol, endcol));
}
inline
GpuMat GpuMat::colRange(Range r) const
{
return GpuMat(*this, Range::all(), r);
}
inline
GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const
{
return GpuMat(*this, rowRange_, colRange_);
}
inline
GpuMat GpuMat::operator ()(Rect roi) const
{
return GpuMat(*this, roi);
}
inline
bool GpuMat::isContinuous() const
{
return (flags & Mat::CONTINUOUS_FLAG) != 0;
}
inline
size_t GpuMat::elemSize() const
{
return CV_ELEM_SIZE(flags);
}
inline
size_t GpuMat::elemSize1() const
{
return CV_ELEM_SIZE1(flags);
}
inline
int GpuMat::type() const
{
return CV_MAT_TYPE(flags);
}
inline
int GpuMat::depth() const
{
return CV_MAT_DEPTH(flags);
}
inline
int GpuMat::channels() const
{
return CV_MAT_CN(flags);
}
inline
size_t GpuMat::step1() const
{
return step / elemSize1();
}
inline
Size GpuMat::size() const
{
return Size(cols, rows);
}
inline
bool GpuMat::empty() const
{
return data == 0;
}
static inline
GpuMat createContinuous(int rows, int cols, int type)
{
GpuMat m;
createContinuous(rows, cols, type, m);
return m;
}
static inline
void createContinuous(Size size, int type, OutputArray arr)
{
createContinuous(size.height, size.width, type, arr);
}
static inline
GpuMat createContinuous(Size size, int type)
{
GpuMat m;
createContinuous(size, type, m);
return m;
}
static inline
void ensureSizeIsEnough(Size size, int type, OutputArray arr)
{
ensureSizeIsEnough(size.height, size.width, type, arr);
}
static inline
void swap(GpuMat& a, GpuMat& b)
{
a.swap(b);
}
//////////////////////////////// CudaMem ////////////////////////////////
inline
CudaMem::CudaMem(AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
}
inline
CudaMem::CudaMem(const CudaMem& m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
{
if( refcount )
CV_XADD(refcount, 1);
}
inline
CudaMem::CudaMem(int rows_, int cols_, int type_, AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
if (rows_ > 0 && cols_ > 0)
create(rows_, cols_, type_);
}
inline
CudaMem::CudaMem(Size size_, int type_, AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
if (size_.height > 0 && size_.width > 0)
create(size_.height, size_.width, type_);
}
inline
CudaMem::CudaMem(InputArray arr, AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
arr.getMat().copyTo(*this);
}
inline
CudaMem::~CudaMem()
{
release();
}
inline
CudaMem& CudaMem::operator =(const CudaMem& m)
{
if (this != &m)
{
CudaMem temp(m);
swap(temp);
}
return *this;
}
inline
void CudaMem::swap(CudaMem& b)
{
std::swap(flags, b.flags);
std::swap(rows, b.rows);
std::swap(cols, b.cols);
std::swap(step, b.step);
std::swap(data, b.data);
std::swap(datastart, b.datastart);
std::swap(dataend, b.dataend);
std::swap(refcount, b.refcount);
std::swap(alloc_type, b.alloc_type);
}
inline
CudaMem CudaMem::clone() const
{
CudaMem m(size(), type(), alloc_type);
createMatHeader().copyTo(m);
return m;
}
inline
void CudaMem::create(Size size_, int type_)
{
create(size_.height, size_.width, type_);
}
inline
Mat CudaMem::createMatHeader() const
{
return Mat(size(), type(), data, step);
}
inline
bool CudaMem::isContinuous() const
{
return (flags & Mat::CONTINUOUS_FLAG) != 0;
}
inline
size_t CudaMem::elemSize() const
{
return CV_ELEM_SIZE(flags);
}
inline
size_t CudaMem::elemSize1() const
{
return CV_ELEM_SIZE1(flags);
}
inline
int CudaMem::type() const
{
return CV_MAT_TYPE(flags);
}
inline
int CudaMem::depth() const
{
return CV_MAT_DEPTH(flags);
}
inline
int CudaMem::channels() const
{
return CV_MAT_CN(flags);
}
inline
size_t CudaMem::step1() const
{
return step / elemSize1();
}
inline
Size CudaMem::size() const
{
return Size(cols, rows);
}
inline
bool CudaMem::empty() const
{
return data == 0;
}
static inline
void swap(CudaMem& a, CudaMem& b)
{
a.swap(b);
}
//////////////////////////////// Stream ///////////////////////////////
inline
void Stream::enqueueDownload(const GpuMat& src, OutputArray dst)
{
src.download(dst, *this);
}
inline
void Stream::enqueueUpload(InputArray src, GpuMat& dst)
{
dst.upload(src, *this);
}
inline
void Stream::enqueueCopy(const GpuMat& src, OutputArray dst)
{
src.copyTo(dst, *this);
}
inline
void Stream::enqueueMemSet(GpuMat& src, Scalar val)
{
src.setTo(val, *this);
}
inline
void Stream::enqueueMemSet(GpuMat& src, Scalar val, InputArray mask)
{
src.setTo(val, mask, *this);
}
inline
void Stream::enqueueConvert(const GpuMat& src, OutputArray dst, int dtype, double alpha, double beta)
{
src.convertTo(dst, dtype, alpha, beta, *this);
}
inline
Stream::Stream(const Ptr<Impl>& impl)
: impl_(impl)
{
}
//////////////////////////////// Initialization & Info ////////////////////////
inline
bool TargetArchs::has(int major, int minor)
{
return hasPtx(major, minor) || hasBin(major, minor);
}
inline
bool TargetArchs::hasEqualOrGreater(int major, int minor)
{
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
}
inline
DeviceInfo::DeviceInfo()
{
device_id_ = getDevice();
}
inline
DeviceInfo::DeviceInfo(int device_id)
{
CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
device_id_ = device_id;
}
inline
int DeviceInfo::deviceID() const
{
return device_id_;
}
inline
size_t DeviceInfo::freeMemory() const
{
size_t _totalMemory, _freeMemory;
queryMemory(_totalMemory, _freeMemory);
return _freeMemory;
}
inline
size_t DeviceInfo::totalMemory() const
{
size_t _totalMemory, _freeMemory;
queryMemory(_totalMemory, _freeMemory);
return _totalMemory;
}
inline
bool DeviceInfo::supports(FeatureSet feature_set) const
{
int version = major() * 10 + minor();
return version >= feature_set;
}
}} // namespace cv { namespace gpu {
//////////////////////////////// Mat ////////////////////////////////
namespace cv {
inline
Mat::Mat(const gpu::GpuMat& m)
: flags(0), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), datalimit(0), allocator(0), size(&rows)
{
m.download(*this);
}
}
#endif // __OPENCV_CORE_GPUINL_HPP__

@ -40,28 +40,38 @@
//
//M*/
#ifndef __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
#define __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
#ifndef __OPENCV_CORE_GPU_STREAM_ACCESSOR_HPP__
#define __OPENCV_CORE_GPU_STREAM_ACCESSOR_HPP__
#include <cuda_runtime.h>
#include "opencv2/core/cvdef.h"
#ifndef __cplusplus
# error gpu_stream_accessor.hpp header must be compiled as C++
#endif
// This is only header file that depends on Cuda. All other headers are independent.
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
// In this case you have to install Cuda Toolkit.
#include <cuda_runtime.h>
#include "opencv2/core/cvdef.h"
namespace cv
{
namespace gpu
{
class Stream;
class Event;
struct StreamAccessor
{
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
};
struct EventAccessor
{
CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
};
}
}
#endif /* __OPENCV_CUDA_STREAM_ACCESSOR_HPP__ */
#endif /* __OPENCV_CORE_GPU_STREAM_ACCESSOR_HPP__ */

@ -40,10 +40,12 @@
//
//M*/
#ifndef __OPENCV_CORE_DEVPTRS_HPP__
#define __OPENCV_CORE_DEVPTRS_HPP__
#ifndef __OPENCV_CORE_GPU_TYPES_HPP__
#define __OPENCV_CORE_GPU_TYPES_HPP__
#ifdef __cplusplus
#ifndef __cplusplus
# error gpu_types.hpp header must be compiled as C++
#endif
#ifdef __CUDACC__
#define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
@ -58,7 +60,7 @@ namespace cv
// Simple lightweight structures that encapsulates information about an image on device.
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
template<typename T> struct DevPtr
template <typename T> struct DevPtr
{
typedef T elem_type;
typedef int index_type;
@ -75,7 +77,7 @@ namespace cv
__CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
};
template<typename T> struct PtrSz : public DevPtr<T>
template <typename T> struct PtrSz : public DevPtr<T>
{
__CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
__CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
@ -83,12 +85,12 @@ namespace cv
size_t size;
};
template<typename T> struct PtrStep : public DevPtr<T>
template <typename T> struct PtrStep : public DevPtr<T>
{
__CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
__CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
/** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
//! stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!!
size_t step;
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
@ -118,36 +120,7 @@ namespace cv
typedef PtrStep<unsigned char> PtrStepb;
typedef PtrStep<float> PtrStepf;
typedef PtrStep<int> PtrStepi;
#if defined __GNUC__
#define __CV_GPU_DEPR_BEFORE__
#define __CV_GPU_DEPR_AFTER__ __attribute__ ((deprecated))
#elif defined(__MSVC__) //|| defined(__CUDACC__)
#pragma deprecated(DevMem2D_)
#define __CV_GPU_DEPR_BEFORE__ __declspec(deprecated)
#define __CV_GPU_DEPR_AFTER__
#else
#define __CV_GPU_DEPR_BEFORE__
#define __CV_GPU_DEPR_AFTER__
#endif
template <typename T> struct __CV_GPU_DEPR_BEFORE__ DevMem2D_ : public PtrStepSz<T>
{
DevMem2D_() {}
DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
template <typename U>
explicit __CV_GPU_DEPR_BEFORE__ DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}
} __CV_GPU_DEPR_AFTER__ ;
typedef DevMem2D_<unsigned char> DevMem2Db;
typedef DevMem2Db DevMem2D;
typedef DevMem2D_<float> DevMem2Df;
typedef DevMem2D_<int> DevMem2Di;
}
}
#endif // __cplusplus
#endif /* __OPENCV_CORE_DEVPTRS_HPP__ */
#endif /* __OPENCV_CORE_GPU_TYPES_HPP__ */

@ -1,722 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPUMAT_HPP__
#define __OPENCV_GPUMAT_HPP__
#include "opencv2/core.hpp"
#include "opencv2/core/cuda_devptrs.hpp"
namespace cv { namespace gpu
{
//////////////////////////////// CudaMem ////////////////////////////////
// CudaMem is limited cv::Mat with page locked memory allocation.
// Page locked memory is only needed for async and faster coping to GPU.
// It is convertable to cv::Mat header without reference counting
// so you can use it with other opencv functions.
// Page-locks the matrix m memory and maps it for the device(s)
CV_EXPORTS void registerPageLocked(Mat& m);
// Unmaps the memory of matrix m, and makes it pageable again.
CV_EXPORTS void unregisterPageLocked(Mat& m);
class CV_EXPORTS CudaMem
{
public:
enum { ALLOC_PAGE_LOCKED = 1, ALLOC_ZEROCOPY = 2, ALLOC_WRITE_COMBINED = 4 };
CudaMem();
CudaMem(const CudaMem& m);
CudaMem(int rows, int cols, int type, int _alloc_type = ALLOC_PAGE_LOCKED);
CudaMem(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
//! creates from cv::Mat with coping data
explicit CudaMem(const Mat& m, int alloc_type = ALLOC_PAGE_LOCKED);
~CudaMem();
CudaMem& operator = (const CudaMem& m);
//! returns deep copy of the matrix, i.e. the data is copied
CudaMem clone() const;
//! allocates new matrix data unless the matrix already has specified size and type.
void create(int rows, int cols, int type, int alloc_type = ALLOC_PAGE_LOCKED);
void create(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
//! decrements reference counter and released memory if needed.
void release();
//! returns matrix header with disabled reference counting for CudaMem data.
Mat createMatHeader() const;
operator Mat() const;
//! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
GpuMat createGpuMatHeader() const;
operator GpuMat() const;
//returns if host memory can be mapperd to gpu address space;
static bool canMapHostMemory();
// Please see cv::Mat for descriptions
bool isContinuous() const;
size_t elemSize() const;
size_t elemSize1() const;
int type() const;
int depth() const;
int channels() const;
size_t step1() const;
Size size() const;
bool empty() const;
// Please see cv::Mat for descriptions
int flags;
int rows, cols;
size_t step;
uchar* data;
int* refcount;
uchar* datastart;
uchar* dataend;
int alloc_type;
};
//////////////////////////////// CudaStream ////////////////////////////////
// Encapculates Cuda Stream. Provides interface for async coping.
// Passed to each function that supports async kernel execution.
// Reference counting is enabled
class CV_EXPORTS Stream
{
public:
Stream();
~Stream();
Stream(const Stream&);
Stream& operator =(const Stream&);
bool queryIfComplete();
void waitForCompletion();
//! downloads asynchronously
// Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its subMat)
void enqueueDownload(const GpuMat& src, CudaMem& dst);
void enqueueDownload(const GpuMat& src, Mat& dst);
//! uploads asynchronously
// Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its ROI)
void enqueueUpload(const CudaMem& src, GpuMat& dst);
void enqueueUpload(const Mat& src, GpuMat& dst);
//! copy asynchronously
void enqueueCopy(const GpuMat& src, GpuMat& dst);
//! memory set asynchronously
void enqueueMemSet(GpuMat& src, Scalar val);
void enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask);
//! converts matrix type, ex from float to uchar depending on type
void enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double a = 1, double b = 0);
//! adds a callback to be called on the host after all currently enqueued items in the stream have completed
typedef void (*StreamCallback)(Stream& stream, int status, void* userData);
void enqueueHostCallback(StreamCallback callback, void* userData);
static Stream& Null();
operator bool() const;
private:
struct Impl;
explicit Stream(Impl* impl);
void create();
void release();
Impl *impl;
friend struct StreamAccessor;
};
//////////////////////////////// Initialization & Info ////////////////////////
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
CV_EXPORTS int getCudaEnabledDeviceCount();
//! Functions below throw cv::Expception if the library is compiled without Cuda.
CV_EXPORTS void setDevice(int device);
CV_EXPORTS int getDevice();
//! Explicitly destroys and cleans up all resources associated with the current device in the current process.
//! Any subsequent API call to this device will reinitialize the device.
CV_EXPORTS void resetDevice();
enum FeatureSet
{
FEATURE_SET_COMPUTE_10 = 10,
FEATURE_SET_COMPUTE_11 = 11,
FEATURE_SET_COMPUTE_12 = 12,
FEATURE_SET_COMPUTE_13 = 13,
FEATURE_SET_COMPUTE_20 = 20,
FEATURE_SET_COMPUTE_21 = 21,
FEATURE_SET_COMPUTE_30 = 30,
FEATURE_SET_COMPUTE_35 = 35,
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
};
// Checks whether current device supports the given feature
CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
// Gives information about what GPU archs this OpenCV GPU module was
// compiled for
class CV_EXPORTS TargetArchs
{
public:
static bool builtWith(FeatureSet feature_set);
static bool has(int major, int minor);
static bool hasPtx(int major, int minor);
static bool hasBin(int major, int minor);
static bool hasEqualOrLessPtx(int major, int minor);
static bool hasEqualOrGreater(int major, int minor);
static bool hasEqualOrGreaterPtx(int major, int minor);
static bool hasEqualOrGreaterBin(int major, int minor);
private:
TargetArchs();
};
// Gives information about the given GPU
class CV_EXPORTS DeviceInfo
{
public:
// Creates DeviceInfo object for the current GPU
DeviceInfo() : device_id_(getDevice()) { query(); }
// Creates DeviceInfo object for the given GPU
DeviceInfo(int device_id) : device_id_(device_id) { query(); }
String name() const { return name_; }
// Return compute capability versions
int majorVersion() const { return majorVersion_; }
int minorVersion() const { return minorVersion_; }
int multiProcessorCount() const { return multi_processor_count_; }
size_t sharedMemPerBlock() const;
void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
size_t freeMemory() const;
size_t totalMemory() const;
// Checks whether device supports the given feature
bool supports(FeatureSet feature_set) const;
// Checks whether the GPU module can be run on the given device
bool isCompatible() const;
int deviceID() const { return device_id_; }
private:
void query();
int device_id_;
String name_;
int multi_processor_count_;
int majorVersion_;
int minorVersion_;
};
CV_EXPORTS void printCudaDeviceInfo(int device);
CV_EXPORTS void printShortCudaDeviceInfo(int device);
//////////////////////////////// GpuMat ///////////////////////////////
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
class CV_EXPORTS GpuMat
{
public:
//! default constructor
GpuMat();
//! constructs GpuMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
GpuMat(int rows, int cols, int type);
GpuMat(Size size, int type);
//! constucts GpuMatrix and fills it with the specified value _s.
GpuMat(int rows, int cols, int type, Scalar s);
GpuMat(Size size, int type, Scalar s);
//! copy constructor
GpuMat(const GpuMat& m);
//! constructor for GpuMatrix headers pointing to user-allocated data
GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
//! creates a matrix header for a part of the bigger matrix
GpuMat(const GpuMat& m, Range rowRange, Range colRange);
GpuMat(const GpuMat& m, Rect roi);
//! builds GpuMat from Mat. Perfom blocking upload to device.
explicit GpuMat(const Mat& m);
//! destructor - calls release()
~GpuMat();
//! assignment operators
GpuMat& operator = (const GpuMat& m);
//! pefroms blocking upload data to GpuMat.
void upload(const Mat& m);
//! downloads data from device to host memory. Blocking calls.
void download(Mat& m) const;
//! returns a new GpuMatrix header for the specified row
GpuMat row(int y) const;
//! returns a new GpuMatrix header for the specified column
GpuMat col(int x) const;
//! ... for the specified row span
GpuMat rowRange(int startrow, int endrow) const;
GpuMat rowRange(Range r) const;
//! ... for the specified column span
GpuMat colRange(int startcol, int endcol) const;
GpuMat colRange(Range r) const;
//! returns deep copy of the GpuMatrix, i.e. the data is copied
GpuMat clone() const;
//! copies the GpuMatrix content to "m".
// It calls m.create(this->size(), this->type()).
void copyTo(GpuMat& m) const;
//! copies those GpuMatrix elements to "m" that are marked with non-zero mask elements.
void copyTo(GpuMat& m, const GpuMat& mask) const;
//! converts GpuMatrix to another datatype with optional scalng. See cvConvertScale.
void convertTo(GpuMat& m, int rtype, double alpha = 1, double beta = 0) const;
void assignTo(GpuMat& m, int type=-1) const;
//! sets every GpuMatrix element to s
GpuMat& operator = (Scalar s);
//! sets some of the GpuMatrix elements to s, according to the mask
GpuMat& setTo(Scalar s, const GpuMat& mask = GpuMat());
//! creates alternative GpuMatrix header for the same data, with different
// number of channels and/or different number of rows. see cvReshape.
GpuMat reshape(int cn, int rows = 0) const;
//! allocates new GpuMatrix data unless the GpuMatrix already has specified size and type.
// previous data is unreferenced if needed.
void create(int rows, int cols, int type);
void create(Size size, int type);
//! decreases reference counter;
// deallocate the data when reference counter reaches 0.
void release();
//! swaps with other smart pointer
void swap(GpuMat& mat);
//! locates GpuMatrix header within a parent GpuMatrix. See below
void locateROI(Size& wholeSize, Point& ofs) const;
//! moves/resizes the current GpuMatrix ROI inside the parent GpuMatrix.
GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
//! extracts a rectangular sub-GpuMatrix
// (this is a generalized form of row, rowRange etc.)
GpuMat operator()(Range rowRange, Range colRange) const;
GpuMat operator()(Rect roi) const;
//! returns true iff the GpuMatrix data is continuous
// (i.e. when there are no gaps between successive rows).
// similar to CV_IS_GpuMat_CONT(cvGpuMat->type)
bool isContinuous() const;
//! returns element size in bytes,
// similar to CV_ELEM_SIZE(cvMat->type)
size_t elemSize() const;
//! returns the size of element channel in bytes.
size_t elemSize1() const;
//! returns element type, similar to CV_MAT_TYPE(cvMat->type)
int type() const;
//! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
int depth() const;
//! returns element type, similar to CV_MAT_CN(cvMat->type)
int channels() const;
//! returns step/elemSize1()
size_t step1() const;
//! returns GpuMatrix size:
// width == number of columns, height == number of rows
Size size() const;
//! returns true if GpuMatrix data is NULL
bool empty() const;
//! returns pointer to y-th row
uchar* ptr(int y = 0);
const uchar* ptr(int y = 0) const;
//! template version of the above method
template<typename _Tp> _Tp* ptr(int y = 0);
template<typename _Tp> const _Tp* ptr(int y = 0) const;
template <typename _Tp> operator PtrStepSz<_Tp>() const;
template <typename _Tp> operator PtrStep<_Tp>() const;
// Deprecated function
__CV_GPU_DEPR_BEFORE__ template <typename _Tp> operator DevMem2D_<_Tp>() const __CV_GPU_DEPR_AFTER__;
#undef __CV_GPU_DEPR_BEFORE__
#undef __CV_GPU_DEPR_AFTER__
/*! includes several bit-fields:
- the magic signature
- continuity flag
- depth
- number of channels
*/
int flags;
//! the number of rows and columns
int rows, cols;
//! a distance between successive rows in bytes; includes the gap if any
size_t step;
//! pointer to the data
uchar* data;
//! pointer to the reference counter;
// when GpuMatrix points to user-allocated data, the pointer is NULL
int* refcount;
//! helper fields used in locateROI and adjustROI
uchar* datastart;
uchar* dataend;
};
//! Creates continuous GPU matrix
CV_EXPORTS void createContinuous(int rows, int cols, int type, GpuMat& m);
//! Ensures that size of the given matrix is not less than (rows, cols) size
//! and matrix type is match specified one too
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);
CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat &mat);
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
inline
GpuMat::GpuMat()
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{}
inline
GpuMat::GpuMat(int rows_, int cols_, int type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
if (rows_ > 0 && cols_ > 0)
create(rows_, cols_, type_);
}
inline
GpuMat::GpuMat(Size size_, int type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
if (size_.height > 0 && size_.width > 0)
create(size_.height, size_.width, type_);
}
inline
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
if (rows_ > 0 && cols_ > 0)
{
create(rows_, cols_, type_);
setTo(s_);
}
}
inline
GpuMat::GpuMat(Size size_, int type_, Scalar s_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
if (size_.height > 0 && size_.width > 0)
{
create(size_.height, size_.width, type_);
setTo(s_);
}
}
inline
GpuMat::~GpuMat()
{
release();
}
inline
GpuMat GpuMat::clone() const
{
GpuMat m;
copyTo(m);
return m;
}
inline
void GpuMat::assignTo(GpuMat& m, int _type) const
{
if (_type < 0)
m = *this;
else
convertTo(m, _type);
}
inline
size_t GpuMat::step1() const
{
return step / elemSize1();
}
inline
bool GpuMat::empty() const
{
return data == 0;
}
template<typename _Tp> inline
_Tp* GpuMat::ptr(int y)
{
return (_Tp*)ptr(y);
}
template<typename _Tp> inline
const _Tp* GpuMat::ptr(int y) const
{
return (const _Tp*)ptr(y);
}
inline
GpuMat GpuMat::row(int y) const
{
return GpuMat(*this, Range(y, y+1), Range::all());
}
inline
GpuMat GpuMat::col(int x) const
{
return GpuMat(*this, Range::all(), Range(x, x+1));
}
inline
GpuMat GpuMat::rowRange(int startrow, int endrow) const
{
return GpuMat(*this, Range(startrow, endrow), Range::all());
}
inline
GpuMat GpuMat::rowRange(Range r) const
{
return GpuMat(*this, r, Range::all());
}
inline
GpuMat GpuMat::colRange(int startcol, int endcol) const
{
return GpuMat(*this, Range::all(), Range(startcol, endcol));
}
inline
GpuMat GpuMat::colRange(Range r) const
{
return GpuMat(*this, Range::all(), r);
}
inline
void GpuMat::create(Size size_, int type_)
{
create(size_.height, size_.width, type_);
}
inline
GpuMat GpuMat::operator()(Range _rowRange, Range _colRange) const
{
return GpuMat(*this, _rowRange, _colRange);
}
inline
GpuMat GpuMat::operator()(Rect roi) const
{
return GpuMat(*this, roi);
}
inline
bool GpuMat::isContinuous() const
{
return (flags & Mat::CONTINUOUS_FLAG) != 0;
}
inline
size_t GpuMat::elemSize() const
{
return CV_ELEM_SIZE(flags);
}
inline
size_t GpuMat::elemSize1() const
{
return CV_ELEM_SIZE1(flags);
}
inline
int GpuMat::type() const
{
return CV_MAT_TYPE(flags);
}
inline
int GpuMat::depth() const
{
return CV_MAT_DEPTH(flags);
}
inline
int GpuMat::channels() const
{
return CV_MAT_CN(flags);
}
inline
Size GpuMat::size() const
{
return Size(cols, rows);
}
inline
uchar* GpuMat::ptr(int y)
{
CV_DbgAssert((unsigned)y < (unsigned)rows);
return data + step * y;
}
inline
const uchar* GpuMat::ptr(int y) const
{
CV_DbgAssert((unsigned)y < (unsigned)rows);
return data + step * y;
}
inline
GpuMat& GpuMat::operator = (Scalar s)
{
setTo(s);
return *this;
}
template <class T> inline
GpuMat::operator PtrStepSz<T>() const
{
return PtrStepSz<T>(rows, cols, (T*)data, step);
}
template <class T> inline
GpuMat::operator PtrStep<T>() const
{
return PtrStep<T>((T*)data, step);
}
template <class T> inline
GpuMat::operator DevMem2D_<T>() const
{
return DevMem2D_<T>(rows, cols, (T*)data, step);
}
static inline
void swap(GpuMat& a, GpuMat& b)
{
a.swap(b);
}
static inline
GpuMat createContinuous(int rows, int cols, int type)
{
GpuMat m;
createContinuous(rows, cols, type, m);
return m;
}
static inline
void createContinuous(Size size, int type, GpuMat& m)
{
createContinuous(size.height, size.width, type, m);
}
static inline
GpuMat createContinuous(Size size, int type)
{
GpuMat m;
createContinuous(size, type, m);
return m;
}
static inline
void ensureSizeIsEnough(Size size, int type, GpuMat& m)
{
ensureSizeIsEnough(size.height, size.width, type, m);
}
}} // cv::gpu
#endif // __OPENCV_GPUMAT_HPP__

@ -77,7 +77,7 @@ public:
STD_VECTOR_MAT = 5 << KIND_SHIFT,
EXPR = 6 << KIND_SHIFT,
OPENGL_BUFFER = 7 << KIND_SHIFT,
OPENGL_TEXTURE = 8 << KIND_SHIFT,
CUDA_MEM = 8 << KIND_SHIFT,
GPU_MAT = 9 << KIND_SHIFT
};
@ -94,13 +94,12 @@ public:
_InputArray(const double& val);
_InputArray(const gpu::GpuMat& d_mat);
_InputArray(const ogl::Buffer& buf);
_InputArray(const ogl::Texture2D& tex);
_InputArray(const gpu::CudaMem& cuda_mem);
virtual Mat getMat(int i=-1) const;
virtual void getMatVector(std::vector<Mat>& mv) const;
virtual gpu::GpuMat getGpuMat() const;
virtual ogl::Buffer getOGlBuffer() const;
virtual ogl::Texture2D getOGlTexture2D() const;
virtual int kind() const;
virtual Size size(int i=-1) const;
@ -143,7 +142,7 @@ public:
_OutputArray(std::vector<Mat>& vec);
_OutputArray(gpu::GpuMat& d_mat);
_OutputArray(ogl::Buffer& buf);
_OutputArray(ogl::Texture2D& tex);
_OutputArray(gpu::CudaMem& cuda_mem);
template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
template<typename _Tp> _OutputArray(std::vector<Mat_<_Tp> >& vec);
@ -155,7 +154,7 @@ public:
_OutputArray(const std::vector<Mat>& vec);
_OutputArray(const gpu::GpuMat& d_mat);
_OutputArray(const ogl::Buffer& buf);
_OutputArray(const ogl::Texture2D& tex);
_OutputArray(const gpu::CudaMem& cuda_mem);
template<typename _Tp> _OutputArray(const std::vector<_Tp>& vec);
template<typename _Tp> _OutputArray(const std::vector<std::vector<_Tp> >& vec);
template<typename _Tp> _OutputArray(const std::vector<Mat_<_Tp> >& vec);
@ -169,7 +168,7 @@ public:
virtual Mat& getMatRef(int i=-1) const;
virtual gpu::GpuMat& getGpuMatRef() const;
virtual ogl::Buffer& getOGlBufferRef() const;
virtual ogl::Texture2D& getOGlTexture2DRef() const;
virtual gpu::CudaMem& getCudaMemRef() const;
virtual void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
virtual void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;

@ -40,8 +40,12 @@
//
//M*/
#ifndef __OPENCV_OPENGL_INTEROP_HPP__
#define __OPENCV_OPENGL_INTEROP_HPP__
#ifndef __OPENCV_CORE_OPENGL_HPP__
#define __OPENCV_CORE_OPENGL_HPP__
#ifndef __cplusplus
# error opengl.hpp header must be compiled as C++
#endif
#include "opencv2/core.hpp"
@ -84,7 +88,7 @@ public:
//! create buffer
void create(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false) { create(asize.height, asize.width, atype, target, autoRelease); }
void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
//! release memory and delete buffer object
void release();
@ -92,11 +96,15 @@ public:
//! set auto release mode (if true, release will be called in object's destructor)
void setAutoRelease(bool flag);
//! copy from host/device memory
//! copy from host/device memory (blocking)
void copyFrom(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
//! copy from device memory (non blocking)
void copyFrom(InputArray arr, gpu::Stream& stream, Target target = ARRAY_BUFFER, bool autoRelease = false);
//! copy to host/device memory
void copyTo(OutputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false) const;
//! copy to host/device memory (blocking)
void copyTo(OutputArray arr) const;
//! copy to device memory (non blocking)
void copyTo(OutputArray arr, gpu::Stream& stream) const;
//! create copy of current buffer
Buffer clone(Target target = ARRAY_BUFFER, bool autoRelease = false) const;
@ -111,21 +119,26 @@ public:
Mat mapHost(Access access);
void unmapHost();
//! map to device memory
//! map to device memory (blocking)
gpu::GpuMat mapDevice();
void unmapDevice();
int rows() const { return rows_; }
int cols() const { return cols_; }
Size size() const { return Size(cols_, rows_); }
bool empty() const { return rows_ == 0 || cols_ == 0; }
//! map to device memory (non blocking)
gpu::GpuMat mapDevice(gpu::Stream& stream);
void unmapDevice(gpu::Stream& stream);
int type() const { return type_; }
int depth() const { return CV_MAT_DEPTH(type_); }
int channels() const { return CV_MAT_CN(type_); }
int elemSize() const { return CV_ELEM_SIZE(type_); }
int elemSize1() const { return CV_ELEM_SIZE1(type_); }
int rows() const;
int cols() const;
Size size() const;
bool empty() const;
int type() const;
int depth() const;
int channels() const;
int elemSize() const;
int elemSize1() const;
//! get OpenGL opject id
unsigned int bufId() const;
class Impl;
@ -165,7 +178,7 @@ public:
//! create texture
void create(int arows, int acols, Format aformat, bool autoRelease = false);
void create(Size asize, Format aformat, bool autoRelease = false) { create(asize.height, asize.width, aformat, autoRelease); }
void create(Size asize, Format aformat, bool autoRelease = false);
//! release memory and delete texture object
void release();
@ -182,13 +195,14 @@ public:
//! bind texture to current active texture unit for GL_TEXTURE_2D target
void bind() const;
int rows() const { return rows_; }
int cols() const { return cols_; }
Size size() const { return Size(cols_, rows_); }
bool empty() const { return rows_ == 0 || cols_ == 0; }
int rows() const;
int cols() const;
Size size() const;
bool empty() const;
Format format() const { return format_; }
Format format() const;
//! get OpenGL opject id
unsigned int texId() const;
class Impl;
@ -224,8 +238,8 @@ public:
void bind() const;
int size() const { return size_; }
bool empty() const { return size_ == 0; }
int size() const;
bool empty() const;
private:
int size_;
@ -260,14 +274,14 @@ enum {
CV_EXPORTS void render(const Arrays& arr, int mode = POINTS, Scalar color = Scalar::all(255));
CV_EXPORTS void render(const Arrays& arr, InputArray indices, int mode = POINTS, Scalar color = Scalar::all(255));
}} // namespace cv::gl
}} // namespace cv::ogl
namespace cv { namespace gpu {
//! set a CUDA device to use OpenGL interoperability
CV_EXPORTS void setGlDevice(int device = 0);
}} // cv::gpu
}}
namespace cv {
@ -276,4 +290,149 @@ template <> CV_EXPORTS void Ptr<cv::ogl::Texture2D::Impl>::delete_obj();
}
#endif // __OPENCV_OPENGL_INTEROP_HPP__
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
inline
cv::ogl::Buffer::Buffer(int arows, int acols, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
{
create(arows, acols, atype, target, autoRelease);
}
inline
cv::ogl::Buffer::Buffer(Size asize, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
{
create(asize, atype, target, autoRelease);
}
inline
void cv::ogl::Buffer::create(Size asize, int atype, Target target, bool autoRelease)
{
create(asize.height, asize.width, atype, target, autoRelease);
}
inline
int cv::ogl::Buffer::rows() const
{
return rows_;
}
inline
int cv::ogl::Buffer::cols() const
{
return cols_;
}
inline
cv::Size cv::ogl::Buffer::size() const
{
return Size(cols_, rows_);
}
inline
bool cv::ogl::Buffer::empty() const
{
return rows_ == 0 || cols_ == 0;
}
inline
int cv::ogl::Buffer::type() const
{
return type_;
}
inline
int cv::ogl::Buffer::depth() const
{
return CV_MAT_DEPTH(type_);
}
inline
int cv::ogl::Buffer::channels() const
{
return CV_MAT_CN(type_);
}
inline
int cv::ogl::Buffer::elemSize() const
{
return CV_ELEM_SIZE(type_);
}
inline
int cv::ogl::Buffer::elemSize1() const
{
return CV_ELEM_SIZE1(type_);
}
///////
inline
cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
{
create(arows, acols, aformat, autoRelease);
}
inline
cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
{
create(asize, aformat, autoRelease);
}
inline
void cv::ogl::Texture2D::create(Size asize, Format aformat, bool autoRelease)
{
create(asize.height, asize.width, aformat, autoRelease);
}
inline
int cv::ogl::Texture2D::rows() const
{
return rows_;
}
inline
int cv::ogl::Texture2D::cols() const
{
return cols_;
}
inline
cv::Size cv::ogl::Texture2D::size() const
{
return Size(cols_, rows_);
}
inline
bool cv::ogl::Texture2D::empty() const
{
return rows_ == 0 || cols_ == 0;
}
inline
cv::ogl::Texture2D::Format cv::ogl::Texture2D::format() const
{
return format_;
}
///////
inline
cv::ogl::Arrays::Arrays() : size_(0)
{
}
inline
int cv::ogl::Arrays::size() const
{
return size_;
}
inline
bool cv::ogl::Arrays::empty() const
{
return size_ == 0;
}
#endif /* __OPENCV_CORE_OPENGL_HPP__ */

@ -41,8 +41,8 @@
//
//M*/
#ifndef __OPENCV_CORE_GPU_PRIVATE_HPP__
#define __OPENCV_CORE_GPU_PRIVATE_HPP__
#ifndef __OPENCV_CORE_PRIVATE_GPU_HPP__
#define __OPENCV_CORE_PRIVATE_GPU_HPP__
#ifndef __OPENCV_BUILD
# error this is a private header which should not be used from outside of the OpenCV library
@ -53,11 +53,13 @@
#include "opencv2/core/cvdef.h"
#include "opencv2/core/base.hpp"
#include "opencv2/core/gpu.hpp"
#ifdef HAVE_CUDA
# include <cuda.h>
# include <cuda_runtime.h>
# include <npp.h>
# include "opencv2/core/stream_accessor.hpp"
# include "opencv2/core/gpu_stream_accessor.hpp"
# include "opencv2/core/cuda/common.hpp"
# define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)

@ -44,188 +44,113 @@
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/type_traits.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
namespace cv { namespace gpu { namespace cudev
{
void writeScalar(const uchar*);
void writeScalar(const schar*);
void writeScalar(const ushort*);
void writeScalar(const short int*);
void writeScalar(const int*);
void writeScalar(const float*);
void writeScalar(const double*);
void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
void convert_gpu(PtrStepSzb, int, PtrStepSzb, int, double, double, cudaStream_t);
}}}
#include "matrix_operations.hpp"
namespace cv { namespace gpu { namespace cudev
{
template <typename T> struct shift_and_sizeof;
template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
template <> struct shift_and_sizeof<short> { enum { shift = 1 }; };
template <> struct shift_and_sizeof<unsigned short> { enum { shift = 1 }; };
template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
// copyWithMask
template <typename T> void copyToWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream)
template <typename T>
void copyWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream)
{
if (colorMask)
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<T>)dst, identity<T>(), SingleMask(mask), stream);
if (multiChannelMask)
cv::gpu::cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, identity<T>(), SingleMask(mask), stream);
else
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<T>)dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
cv::gpu::cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
}
void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream)
void copyWithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream)
{
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream);
static func_t tab[] =
static const func_t tab[] =
{
0,
copyToWithMask<unsigned char>,
copyToWithMask<unsigned short>,
copyWithMask<uchar>,
copyWithMask<ushort>,
0,
copyToWithMask<int>,
copyWithMask<int>,
0,
0,
0,
copyToWithMask<double>
copyWithMask<double>
};
tab[elemSize1](src, dst, cn, mask, colorMask, stream);
const func_t func = tab[elemSize1];
CV_DbgAssert( func != 0 );
func(src, dst, cn, mask, multiChannelMask, stream);
}
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// SetTo //////////////////////////////////
///////////////////////////////////////////////////////////////////////////
__constant__ uchar scalar_8u[4];
__constant__ schar scalar_8s[4];
__constant__ ushort scalar_16u[4];
__constant__ short scalar_16s[4];
__constant__ int scalar_32s[4];
__constant__ float scalar_32f[4];
__constant__ double scalar_64f[4];
template <typename T> __device__ __forceinline__ T readScalar(int i);
template <> __device__ __forceinline__ uchar readScalar<uchar>(int i) {return scalar_8u[i];}
template <> __device__ __forceinline__ schar readScalar<schar>(int i) {return scalar_8s[i];}
template <> __device__ __forceinline__ ushort readScalar<ushort>(int i) {return scalar_16u[i];}
template <> __device__ __forceinline__ short readScalar<short>(int i) {return scalar_16s[i];}
template <> __device__ __forceinline__ int readScalar<int>(int i) {return scalar_32s[i];}
template <> __device__ __forceinline__ float readScalar<float>(int i) {return scalar_32f[i];}
template <> __device__ __forceinline__ double readScalar<double>(int i) {return scalar_64f[i];}
void writeScalar(const uchar* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
}
void writeScalar(const schar* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
}
void writeScalar(const ushort* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
}
void writeScalar(const short* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
}
void writeScalar(const int* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
}
void writeScalar(const float* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
}
void writeScalar(const double* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
}
// set
template<typename T>
__global__ void set_to_without_mask(T* mat, int cols, int rows, size_t step, int channels)
template<typename T, class Mask>
__global__ void set(PtrStepSz<T> mat, const Mask mask, const int channels, const typename TypeVec<T, 4>::vec_type value)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < cols * channels ) && (y < rows))
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = readScalar<T>(x % channels);
}
}
if (x >= mat.cols * channels || y >= mat.rows)
return;
template<typename T>
__global__ void set_to_with_mask(T* mat, const uchar* mask, int cols, int rows, size_t step, int channels, size_t step_mask)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
const T scalar[4] = {value.x, value.y, value.z, value.w};
if ((x < cols * channels ) && (y < rows))
if (mask[y * step_mask + x / channels] != 0)
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = readScalar<T>(x % channels);
}
if (mask(y, x / channels))
mat(y, x) = scalar[x % channels];
}
template <typename T>
void set_to_gpu(PtrStepSzb mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream)
void set(PtrStepSz<T> mat, const T* scalar, int channels, cudaStream_t stream)
{
writeScalar(scalar);
typedef typename TypeVec<T, 4>::vec_type scalar_t;
dim3 threadsPerBlock(32, 8, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
dim3 block(32, 8);
dim3 grid(divUp(mat.cols * channels, block.x), divUp(mat.rows, block.y));
set_to_with_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step);
set<T><<<grid, block, 0, stream>>>(mat, WithOutMask(), channels, VecTraits<scalar_t>::make(scalar));
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall ( cudaDeviceSynchronize() );
}
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<schar >(PtrStepSzb mat, const schar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<ushort>(PtrStepSzb mat, const ushort* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<short >(PtrStepSzb mat, const short* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<int >(PtrStepSzb mat, const int* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<float >(PtrStepSzb mat, const float* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<double>(PtrStepSzb mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<uchar >(PtrStepSz<uchar > mat, const uchar* scalar, int channels, cudaStream_t stream);
template void set<schar >(PtrStepSz<schar > mat, const schar* scalar, int channels, cudaStream_t stream);
template void set<ushort>(PtrStepSz<ushort> mat, const ushort* scalar, int channels, cudaStream_t stream);
template void set<short >(PtrStepSz<short > mat, const short* scalar, int channels, cudaStream_t stream);
template void set<int >(PtrStepSz<int > mat, const int* scalar, int channels, cudaStream_t stream);
template void set<float >(PtrStepSz<float > mat, const float* scalar, int channels, cudaStream_t stream);
template void set<double>(PtrStepSz<double> mat, const double* scalar, int channels, cudaStream_t stream);
template <typename T>
void set_to_gpu(PtrStepSzb mat, const T* scalar, int channels, cudaStream_t stream)
void set(PtrStepSz<T> mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream)
{
writeScalar(scalar);
typedef typename TypeVec<T, 4>::vec_type scalar_t;
dim3 threadsPerBlock(32, 8, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
dim3 block(32, 8);
dim3 grid(divUp(mat.cols * channels, block.x), divUp(mat.rows, block.y));
set_to_without_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels);
set<T><<<grid, block, 0, stream>>>(mat, SingleMask(mask), channels, VecTraits<scalar_t>::make(scalar));
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall ( cudaDeviceSynchronize() );
}
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<schar >(PtrStepSzb mat, const schar* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<ushort>(PtrStepSzb mat, const ushort* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<short >(PtrStepSzb mat, const short* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<int >(PtrStepSzb mat, const int* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<float >(PtrStepSzb mat, const float* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<double>(PtrStepSzb mat, const double* scalar, int channels, cudaStream_t stream);
template void set<uchar >(PtrStepSz<uchar > mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<schar >(PtrStepSz<schar > mat, const schar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<ushort>(PtrStepSz<ushort> mat, const ushort* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<short >(PtrStepSz<short > mat, const short* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<int >(PtrStepSz<int > mat, const int* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<float >(PtrStepSz<float > mat, const float* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<double>(PtrStepSz<double> mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
// convert
template <typename T, typename D, typename S> struct Convertor : unary_function<T, D>
{
@ -290,18 +215,11 @@ namespace cv { namespace gpu { namespace cudev
template<typename T, typename D, typename S>
void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
{
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
}
#if defined __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wmissing-declarations"
#endif
void convert_gpu(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream)
void convert(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream);
@ -372,11 +290,7 @@ namespace cv { namespace gpu { namespace cudev
}
};
caller_t func = tab[sdepth][ddepth];
const caller_t func = tab[sdepth][ddepth];
func(src, dst, alpha, beta, stream);
}
#if defined __clang__
# pragma clang diagnostic pop
#endif
}}} // namespace cv { namespace gpu { namespace cudev

@ -0,0 +1,57 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/cuda/common.hpp"
namespace cv { namespace gpu { namespace cudev
{
void copyWithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream);
template <typename T>
void set(PtrStepSz<T> mat, const T* scalar, int channels, cudaStream_t stream);
template <typename T>
void set(PtrStepSz<T> mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
void convert(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream);
}}}

@ -1,348 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA)
cv::gpu::Stream::Stream() { throw_no_cuda(); }
cv::gpu::Stream::~Stream() {}
cv::gpu::Stream::Stream(const Stream&) { throw_no_cuda(); }
Stream& cv::gpu::Stream::operator=(const Stream&) { throw_no_cuda(); return *this; }
bool cv::gpu::Stream::queryIfComplete() { throw_no_cuda(); return false; }
void cv::gpu::Stream::waitForCompletion() { throw_no_cuda(); }
void cv::gpu::Stream::enqueueDownload(const GpuMat&, Mat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueDownload(const GpuMat&, CudaMem&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueUpload(const CudaMem&, GpuMat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueUpload(const Mat&, GpuMat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueCopy(const GpuMat&, GpuMat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar, const GpuMat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueConvert(const GpuMat&, GpuMat&, int, double, double) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueHostCallback(StreamCallback, void*) { throw_no_cuda(); }
Stream& cv::gpu::Stream::Null() { throw_no_cuda(); static Stream s; return s; }
cv::gpu::Stream::operator bool() const { throw_no_cuda(); return false; }
cv::gpu::Stream::Stream(Impl*) { throw_no_cuda(); }
void cv::gpu::Stream::create() { throw_no_cuda(); }
void cv::gpu::Stream::release() { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu
{
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream);
void setTo(GpuMat& src, Scalar s, cudaStream_t stream);
void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream);
}}
struct Stream::Impl
{
static cudaStream_t getStream(const Impl* impl)
{
return impl ? impl->stream : 0;
}
cudaStream_t stream;
int ref_counter;
};
cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
{
return Stream::Impl::getStream(stream.impl);
}
cv::gpu::Stream::Stream() : impl(0)
{
create();
}
cv::gpu::Stream::~Stream()
{
release();
}
cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl)
{
if (impl)
CV_XADD(&impl->ref_counter, 1);
}
Stream& cv::gpu::Stream::operator =(const Stream& stream)
{
if (this != &stream)
{
release();
impl = stream.impl;
if (impl)
CV_XADD(&impl->ref_counter, 1);
}
return *this;
}
bool cv::gpu::Stream::queryIfComplete()
{
cudaStream_t stream = Impl::getStream(impl);
cudaError_t err = cudaStreamQuery(stream);
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
cudaSafeCall(err);
return false;
}
void cv::gpu::Stream::waitForCompletion()
{
cudaStream_t stream = Impl::getStream(impl);
cudaSafeCall( cudaStreamSynchronize(stream) );
}
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert( src.size() == dst.size() && src.type() == dst.type() );
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
}
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
{
dst.create(src.size(), src.type(), CudaMem::ALLOC_PAGE_LOCKED);
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
}
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
{
dst.create(src.size(), src.type());
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
}
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
{
dst.create(src.size(), src.type());
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
}
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
{
dst.create(src.size(), src.type());
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
}
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
{
const int sdepth = src.depth();
if (sdepth == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
cudaStream_t stream = Impl::getStream(impl);
if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0)
{
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
return;
}
if (sdepth == CV_8U)
{
int cn = src.channels();
if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3]))
{
int ival = saturate_cast<uchar>(val[0]);
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
return;
}
}
setTo(src, val, stream);
}
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
{
const int sdepth = src.depth();
if (sdepth == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
CV_Assert(mask.type() == CV_8UC1);
cudaStream_t stream = Impl::getStream(impl);
setTo(src, val, mask, stream);
}
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double alpha, double beta)
{
if (dtype < 0)
dtype = src.type();
else
dtype = CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels());
const int sdepth = src.depth();
const int ddepth = CV_MAT_DEPTH(dtype);
if (sdepth == CV_64F || ddepth == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon()
&& fabs(beta) < std::numeric_limits<double>::epsilon();
if (sdepth == ddepth && noScale)
{
enqueueCopy(src, dst);
return;
}
dst.create(src.size(), dtype);
cudaStream_t stream = Impl::getStream(impl);
convertTo(src, dst, alpha, beta, stream);
}
#if CUDART_VERSION >= 5000
namespace
{
struct CallbackData
{
cv::gpu::Stream::StreamCallback callback;
void* userData;
Stream stream;
};
void CUDART_CB cudaStreamCallback(cudaStream_t, cudaError_t status, void* userData)
{
CallbackData* data = reinterpret_cast<CallbackData*>(userData);
data->callback(data->stream, static_cast<int>(status), data->userData);
delete data;
}
}
#endif
void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userData)
{
#if CUDART_VERSION >= 5000
CallbackData* data = new CallbackData;
data->callback = callback;
data->userData = userData;
data->stream = *this;
cudaStream_t stream = Impl::getStream(impl);
cudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
#else
(void) callback;
(void) userData;
CV_Error(CV_StsNotImplemented, "This function requires CUDA 5.0");
#endif
}
cv::gpu::Stream& cv::gpu::Stream::Null()
{
static Stream s((Impl*) 0);
return s;
}
cv::gpu::Stream::operator bool() const
{
return impl && impl->stream;
}
cv::gpu::Stream::Stream(Impl* impl_) : impl(impl_)
{
}
void cv::gpu::Stream::create()
{
if (impl)
release();
cudaStream_t stream;
cudaSafeCall( cudaStreamCreate( &stream ) );
impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl));
impl->stream = stream;
impl->ref_counter = 1;
}
void cv::gpu::Stream::release()
{
if (impl && CV_XADD(&impl->ref_counter, -1) == 1)
{
cudaSafeCall( cudaStreamDestroy(impl->stream) );
cv::fastFree(impl);
}
}
#endif /* !defined (HAVE_CUDA) */

@ -0,0 +1,215 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
namespace
{
size_t alignUpStep(size_t what, size_t alignment)
{
size_t alignMask = alignment - 1;
size_t inverseAlignMask = ~alignMask;
size_t res = (what + alignMask) & inverseAlignMask;
return res;
}
}
void cv::gpu::CudaMem::create(int rows_, int cols_, int type_)
{
#ifndef HAVE_CUDA
(void) rows_;
(void) cols_;
(void) type_;
throw_no_cuda();
#else
if (alloc_type == SHARED)
{
DeviceInfo devInfo;
CV_Assert( devInfo.canMapHostMemory() );
}
type_ &= Mat::TYPE_MASK;
if (rows == rows_ && cols == cols_ && type() == type_ && data)
return;
if (data)
release();
CV_DbgAssert( rows_ >= 0 && cols_ >= 0 );
if (rows_ > 0 && cols_ > 0)
{
flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + type_;
rows = rows_;
cols = cols_;
step = elemSize() * cols;
if (alloc_type == SHARED)
{
DeviceInfo devInfo;
step = alignUpStep(step, devInfo.textureAlignment());
}
int64 _nettosize = (int64)step*rows;
size_t nettosize = (size_t)_nettosize;
if (_nettosize != (int64)nettosize)
CV_Error(cv::Error::StsNoMem, "Too big buffer is allocated");
size_t datasize = alignSize(nettosize, (int)sizeof(*refcount));
void* ptr = 0;
switch (alloc_type)
{
case PAGE_LOCKED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocDefault) ); break;
case SHARED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocMapped) ); break;
case WRITE_COMBINED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocWriteCombined) ); break;
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
}
datastart = data = (uchar*)ptr;
dataend = data + nettosize;
refcount = (int*)cv::fastMalloc(sizeof(*refcount));
*refcount = 1;
}
#endif
}
CudaMem cv::gpu::CudaMem::reshape(int new_cn, int new_rows) const
{
CudaMem hdr = *this;
int cn = channels();
if (new_cn == 0)
new_cn = cn;
int total_width = cols * cn;
if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)
new_rows = rows * total_width / new_cn;
if (new_rows != 0 && new_rows != rows)
{
int total_size = total_width * rows;
if (!isContinuous())
CV_Error(cv::Error::BadStep, "The matrix is not continuous, thus its number of rows can not be changed");
if ((unsigned)new_rows > (unsigned)total_size)
CV_Error(cv::Error::StsOutOfRange, "Bad new number of rows");
total_width = total_size / new_rows;
if (total_width * new_rows != total_size)
CV_Error(cv::Error::StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");
hdr.rows = new_rows;
hdr.step = total_width * elemSize1();
}
int new_width = total_width / new_cn;
if (new_width * new_cn != total_width)
CV_Error(cv::Error::BadNumChannels, "The total width is not divisible by the new number of channels");
hdr.cols = new_width;
hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);
return hdr;
}
void cv::gpu::CudaMem::release()
{
#ifdef HAVE_CUDA
if (refcount && CV_XADD(refcount, -1) == 1)
{
cudaFreeHost(datastart);
fastFree(refcount);
}
data = datastart = dataend = 0;
step = rows = cols = 0;
refcount = 0;
#endif
}
GpuMat cv::gpu::CudaMem::createGpuMatHeader() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return GpuMat();
#else
CV_Assert( alloc_type == SHARED );
void *pdev;
cudaSafeCall( cudaHostGetDevicePointer(&pdev, data, 0) );
return GpuMat(rows, cols, type(), pdev, step);
#endif
}
void cv::gpu::registerPageLocked(Mat& m)
{
#ifndef HAVE_CUDA
(void) m;
throw_no_cuda();
#else
CV_Assert( m.isContinuous() );
cudaSafeCall( cudaHostRegister(m.data, m.step * m.rows, cudaHostRegisterPortable) );
#endif
}
void cv::gpu::unregisterPageLocked(Mat& m)
{
#ifndef HAVE_CUDA
(void) m;
#else
cudaSafeCall( cudaHostUnregister(m.data) );
#endif
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,308 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
////////////////////////////////////////////////////////////////
// Stream
#ifndef HAVE_CUDA
class cv::gpu::Stream::Impl
{
public:
Impl(void* ptr = 0)
{
(void) ptr;
throw_no_cuda();
}
};
#else
class cv::gpu::Stream::Impl
{
public:
cudaStream_t stream;
Impl();
Impl(cudaStream_t stream);
~Impl();
};
cv::gpu::Stream::Impl::Impl() : stream(0)
{
cudaSafeCall( cudaStreamCreate(&stream) );
}
cv::gpu::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_)
{
}
cv::gpu::Stream::Impl::~Impl()
{
if (stream)
cudaStreamDestroy(stream);
}
cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
{
return stream.impl_->stream;
}
#endif
cv::gpu::Stream::Stream()
{
#ifndef HAVE_CUDA
throw_no_cuda();
#else
impl_ = new Impl;
#endif
}
bool cv::gpu::Stream::queryIfComplete() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return false;
#else
cudaError_t err = cudaStreamQuery(impl_->stream);
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
cudaSafeCall(err);
return false;
#endif
}
void cv::gpu::Stream::waitForCompletion()
{
#ifndef HAVE_CUDA
throw_no_cuda();
#else
cudaSafeCall( cudaStreamSynchronize(impl_->stream) );
#endif
}
void cv::gpu::Stream::waitEvent(const Event& event)
{
#ifndef HAVE_CUDA
(void) event;
throw_no_cuda();
#else
cudaSafeCall( cudaStreamWaitEvent(impl_->stream, EventAccessor::getEvent(event), 0) );
#endif
}
#if defined(HAVE_CUDA) && (CUDART_VERSION >= 5000)
namespace
{
struct CallbackData
{
Stream::StreamCallback callback;
void* userData;
CallbackData(Stream::StreamCallback callback_, void* userData_) : callback(callback_), userData(userData_) {}
};
void CUDART_CB cudaStreamCallback(cudaStream_t, cudaError_t status, void* userData)
{
CallbackData* data = reinterpret_cast<CallbackData*>(userData);
data->callback(static_cast<int>(status), data->userData);
delete data;
}
}
#endif
void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userData)
{
#ifndef HAVE_CUDA
(void) callback;
(void) userData;
throw_no_cuda();
#else
#if CUDART_VERSION < 5000
(void) callback;
(void) userData;
CV_Error(cv::Error::StsNotImplemented, "This function requires CUDA 5.0");
#else
CallbackData* data = new CallbackData(callback, userData);
cudaSafeCall( cudaStreamAddCallback(impl_->stream, cudaStreamCallback, data, 0) );
#endif
#endif
}
Stream& cv::gpu::Stream::Null()
{
static Stream s(new Impl(0));
return s;
}
cv::gpu::Stream::operator bool_type() const
{
#ifndef HAVE_CUDA
return 0;
#else
return (impl_->stream != 0) ? &Stream::this_type_does_not_support_comparisons : 0;
#endif
}
template <> void cv::Ptr<Stream::Impl>::delete_obj()
{
if (obj) delete obj;
}
////////////////////////////////////////////////////////////////
// Stream
#ifndef HAVE_CUDA
class cv::gpu::Event::Impl
{
public:
Impl(unsigned int)
{
throw_no_cuda();
}
};
#else
class cv::gpu::Event::Impl
{
public:
cudaEvent_t event;
Impl(unsigned int flags);
~Impl();
};
cv::gpu::Event::Impl::Impl(unsigned int flags) : event(0)
{
cudaSafeCall( cudaEventCreateWithFlags(&event, flags) );
}
cv::gpu::Event::Impl::~Impl()
{
if (event)
cudaEventDestroy(event);
}
cudaEvent_t cv::gpu::EventAccessor::getEvent(const Event& event)
{
return event.impl_->event;
}
#endif
cv::gpu::Event::Event(CreateFlags flags)
{
#ifndef HAVE_CUDA
(void) flags;
throw_no_cuda();
#else
impl_ = new Impl(flags);
#endif
}
void cv::gpu::Event::record(Stream& stream)
{
#ifndef HAVE_CUDA
(void) stream;
throw_no_cuda();
#else
cudaSafeCall( cudaEventRecord(impl_->event, StreamAccessor::getStream(stream)) );
#endif
}
bool cv::gpu::Event::queryIfComplete() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return false;
#else
cudaError_t err = cudaEventQuery(impl_->event);
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
cudaSafeCall(err);
return false;
#endif
}
void cv::gpu::Event::waitForCompletion()
{
#ifndef HAVE_CUDA
throw_no_cuda();
#else
cudaSafeCall( cudaEventSynchronize(impl_->event) );
#endif
}
float cv::gpu::Event::elapsedTime(const Event& start, const Event& end)
{
#ifndef HAVE_CUDA
(void) start;
(void) end;
throw_no_cuda();
return 0.0f;
#else
float ms;
cudaSafeCall( cudaEventElapsedTime(&ms, start.impl_->event, end.impl_->event) );
return ms;
#endif
}
template <> void cv::Ptr<Event::Impl>::delete_obj()
{
if (obj) delete obj;
}

File diff suppressed because it is too large Load Diff

@ -41,8 +41,6 @@
//M*/
#include "precomp.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/opengl.hpp"
/****************************************************************************************\
* [scaled] Identity matrix initialization *
@ -941,14 +939,15 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
\*************************************************************************************************/
_InputArray::_InputArray() : flags(0), obj(0) {}
_InputArray::~_InputArray() {}
_InputArray::_InputArray(const Mat& m) : flags(MAT), obj((void*)&m) {}
_InputArray::_InputArray(const std::vector<Mat>& vec) : flags(STD_VECTOR_MAT), obj((void*)&vec) {}
_InputArray::_InputArray(const double& val) : flags(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F), obj((void*)&val), sz(Size(1,1)) {}
_InputArray::_InputArray(const MatExpr& expr) : flags(FIXED_TYPE + FIXED_SIZE + EXPR), obj((void*)&expr) {}
_InputArray::_InputArray(const gpu::GpuMat& d_mat) : flags(GPU_MAT), obj((void*)&d_mat) {}
_InputArray::_InputArray(const ogl::Buffer& buf) : flags(OPENGL_BUFFER), obj((void*)&buf) {}
_InputArray::_InputArray(const ogl::Texture2D& tex) : flags(OPENGL_TEXTURE), obj((void*)&tex) {}
_InputArray::_InputArray(const gpu::CudaMem& cuda_mem) : flags(CUDA_MEM), obj((void*)&cuda_mem) {}
_InputArray::~_InputArray() {}
Mat _InputArray::getMat(int i) const
{
@ -996,14 +995,37 @@ Mat _InputArray::getMat(int i) const
return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat();
}
CV_Assert( k == STD_VECTOR_MAT );
//if( k == STD_VECTOR_MAT )
if( k == STD_VECTOR_MAT )
{
const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
CV_Assert( 0 <= i && i < (int)v.size() );
return v[i];
}
if( k == OPENGL_BUFFER )
{
CV_Assert( i < 0 );
CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapHost/unmapHost methods for ogl::Buffer object");
return Mat();
}
if( k == GPU_MAT )
{
CV_Assert( i < 0 );
CV_Error(cv::Error::StsNotImplemented, "You should explicitly call download method for gpu::GpuMat object");
return Mat();
}
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
{
CV_Assert( i < 0 );
const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
return cuda_mem->createMatHeader();
}
}
@ -1092,10 +1114,29 @@ gpu::GpuMat _InputArray::getGpuMat() const
{
int k = kind();
CV_Assert(k == GPU_MAT);
if (k == GPU_MAT)
{
const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
return *d_mat;
}
if (k == CUDA_MEM)
{
const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
return cuda_mem->createGpuMatHeader();
}
if (k == OPENGL_BUFFER)
{
CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapDevice/unmapDevice methods for ogl::Buffer object");
return gpu::GpuMat();
}
const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
return *d_mat;
if (k == NONE)
return gpu::GpuMat();
CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for gpu::GpuMat and gpu::CudaMem");
return gpu::GpuMat();
}
ogl::Buffer _InputArray::getOGlBuffer() const
@ -1108,16 +1149,6 @@ ogl::Buffer _InputArray::getOGlBuffer() const
return *gl_buf;
}
ogl::Texture2D _InputArray::getOGlTexture2D() const
{
int k = kind();
CV_Assert(k == OPENGL_TEXTURE);
const ogl::Texture2D* gl_tex = (const ogl::Texture2D*)obj;
return *gl_tex;
}
int _InputArray::kind() const
{
return flags & KIND_MASK;
@ -1186,19 +1217,19 @@ Size _InputArray::size(int i) const
return buf->size();
}
if( k == OPENGL_TEXTURE )
if( k == GPU_MAT )
{
CV_Assert( i < 0 );
const ogl::Texture2D* tex = (const ogl::Texture2D*)obj;
return tex->size();
const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
return d_mat->size();
}
CV_Assert( k == GPU_MAT );
//if( k == GPU_MAT )
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
{
CV_Assert( i < 0 );
const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
return d_mat->size();
const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
return cuda_mem->size();
}
}
@ -1252,9 +1283,12 @@ int _InputArray::type(int i) const
if( k == OPENGL_BUFFER )
return ((const ogl::Buffer*)obj)->type();
CV_Assert( k == GPU_MAT );
//if( k == GPU_MAT )
if( k == GPU_MAT )
return ((const gpu::GpuMat*)obj)->type();
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
return ((const gpu::CudaMem*)obj)->type();
}
int _InputArray::depth(int i) const
@ -1304,29 +1338,29 @@ bool _InputArray::empty() const
if( k == OPENGL_BUFFER )
return ((const ogl::Buffer*)obj)->empty();
if( k == OPENGL_TEXTURE )
return ((const ogl::Texture2D*)obj)->empty();
CV_Assert( k == GPU_MAT );
//if( k == GPU_MAT )
if( k == GPU_MAT )
return ((const gpu::GpuMat*)obj)->empty();
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
return ((const gpu::CudaMem*)obj)->empty();
}
_OutputArray::_OutputArray() {}
_OutputArray::~_OutputArray() {}
_OutputArray::_OutputArray(Mat& m) : _InputArray(m) {}
_OutputArray::_OutputArray(std::vector<Mat>& vec) : _InputArray(vec) {}
_OutputArray::_OutputArray(gpu::GpuMat& d_mat) : _InputArray(d_mat) {}
_OutputArray::_OutputArray(ogl::Buffer& buf) : _InputArray(buf) {}
_OutputArray::_OutputArray(ogl::Texture2D& tex) : _InputArray(tex) {}
_OutputArray::_OutputArray(gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {}
_OutputArray::_OutputArray(const Mat& m) : _InputArray(m) {flags |= FIXED_SIZE|FIXED_TYPE;}
_OutputArray::_OutputArray(const std::vector<Mat>& vec) : _InputArray(vec) {flags |= FIXED_SIZE;}
_OutputArray::_OutputArray(const gpu::GpuMat& d_mat) : _InputArray(d_mat) {flags |= FIXED_SIZE|FIXED_TYPE;}
_OutputArray::_OutputArray(const ogl::Buffer& buf) : _InputArray(buf) {flags |= FIXED_SIZE|FIXED_TYPE;}
_OutputArray::_OutputArray(const ogl::Texture2D& tex) : _InputArray(tex) {flags |= FIXED_SIZE|FIXED_TYPE;}
_OutputArray::_OutputArray(const gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {flags |= FIXED_SIZE|FIXED_TYPE;}
_OutputArray::~_OutputArray() {}
bool _OutputArray::fixedSize() const
{
@ -1362,6 +1396,13 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
((ogl::Buffer*)obj)->create(_sz, mtype);
return;
}
if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == _sz);
CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype);
((gpu::CudaMem*)obj)->create(_sz, mtype);
return;
}
int sizes[] = {_sz.height, _sz.width};
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
}
@ -1390,6 +1431,13 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp
((ogl::Buffer*)obj)->create(rows, cols, mtype);
return;
}
if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == Size(cols, rows));
CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype);
((gpu::CudaMem*)obj)->create(rows, cols, mtype);
return;
}
int sizes[] = {rows, cols};
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
}
@ -1609,15 +1657,15 @@ void _OutputArray::release() const
return;
}
if( k == OPENGL_BUFFER )
if( k == CUDA_MEM )
{
((ogl::Buffer*)obj)->release();
((gpu::CudaMem*)obj)->release();
return;
}
if( k == OPENGL_TEXTURE )
if( k == OPENGL_BUFFER )
{
((ogl::Texture2D*)obj)->release();
((ogl::Buffer*)obj)->release();
return;
}
@ -1693,11 +1741,11 @@ ogl::Buffer& _OutputArray::getOGlBufferRef() const
return *(ogl::Buffer*)obj;
}
ogl::Texture2D& _OutputArray::getOGlTexture2DRef() const
gpu::CudaMem& _OutputArray::getCudaMemRef() const
{
int k = kind();
CV_Assert( k == OPENGL_TEXTURE );
return *(ogl::Texture2D*)obj;
CV_Assert( k == CUDA_MEM );
return *(gpu::CudaMem*)obj;
}
static _OutputArray _none;

@ -1,294 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
cv::gpu::CudaMem::CudaMem()
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
}
cv::gpu::CudaMem::CudaMem(int _rows, int _cols, int _type, int _alloc_type)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
if( _rows > 0 && _cols > 0 )
create( _rows, _cols, _type, _alloc_type);
}
cv::gpu::CudaMem::CudaMem(Size _size, int _type, int _alloc_type)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
if( _size.height > 0 && _size.width > 0 )
create( _size.height, _size.width, _type, _alloc_type);
}
cv::gpu::CudaMem::CudaMem(const CudaMem& m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
{
if( refcount )
CV_XADD(refcount, 1);
}
cv::gpu::CudaMem::CudaMem(const Mat& m, int _alloc_type)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
if( m.rows > 0 && m.cols > 0 )
create( m.size(), m.type(), _alloc_type);
Mat tmp = createMatHeader();
m.copyTo(tmp);
}
cv::gpu::CudaMem::~CudaMem()
{
release();
}
CudaMem& cv::gpu::CudaMem::operator = (const CudaMem& m)
{
if( this != &m )
{
if( m.refcount )
CV_XADD(m.refcount, 1);
release();
flags = m.flags;
rows = m.rows; cols = m.cols;
step = m.step; data = m.data;
datastart = m.datastart;
dataend = m.dataend;
refcount = m.refcount;
alloc_type = m.alloc_type;
}
return *this;
}
CudaMem cv::gpu::CudaMem::clone() const
{
CudaMem m(size(), type(), alloc_type);
Mat to = m;
Mat from = *this;
from.copyTo(to);
return m;
}
void cv::gpu::CudaMem::create(Size _size, int _type, int _alloc_type)
{
create(_size.height, _size.width, _type, _alloc_type);
}
Mat cv::gpu::CudaMem::createMatHeader() const
{
return Mat(size(), type(), data, step);
}
cv::gpu::CudaMem::operator Mat() const
{
return createMatHeader();
}
cv::gpu::CudaMem::operator GpuMat() const
{
return createGpuMatHeader();
}
bool cv::gpu::CudaMem::isContinuous() const
{
return (flags & Mat::CONTINUOUS_FLAG) != 0;
}
size_t cv::gpu::CudaMem::elemSize() const
{
return CV_ELEM_SIZE(flags);
}
size_t cv::gpu::CudaMem::elemSize1() const
{
return CV_ELEM_SIZE1(flags);
}
int cv::gpu::CudaMem::type() const
{
return CV_MAT_TYPE(flags);
}
int cv::gpu::CudaMem::depth() const
{
return CV_MAT_DEPTH(flags);
}
int cv::gpu::CudaMem::channels() const
{
return CV_MAT_CN(flags);
}
size_t cv::gpu::CudaMem::step1() const
{
return step/elemSize1();
}
Size cv::gpu::CudaMem::size() const
{
return Size(cols, rows);
}
bool cv::gpu::CudaMem::empty() const
{
return data == 0;
}
#if !defined (HAVE_CUDA)
void cv::gpu::registerPageLocked(Mat&) { throw_no_cuda(); }
void cv::gpu::unregisterPageLocked(Mat&) { throw_no_cuda(); }
void cv::gpu::CudaMem::create(int, int, int, int) { throw_no_cuda(); }
bool cv::gpu::CudaMem::canMapHostMemory() { throw_no_cuda(); return false; }
void cv::gpu::CudaMem::release() { throw_no_cuda(); }
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_no_cuda(); return GpuMat(); }
#else /* !defined (HAVE_CUDA) */
void cv::gpu::registerPageLocked(Mat& m)
{
cudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
}
void cv::gpu::unregisterPageLocked(Mat& m)
{
cudaSafeCall( cudaHostUnregister(m.ptr()) );
}
bool cv::gpu::CudaMem::canMapHostMemory()
{
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
return (prop.canMapHostMemory != 0) ? true : false;
}
namespace
{
size_t alignUpStep(size_t what, size_t alignment)
{
size_t alignMask = alignment-1;
size_t inverseAlignMask = ~alignMask;
size_t res = (what + alignMask) & inverseAlignMask;
return res;
}
}
void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
{
if (_alloc_type == ALLOC_ZEROCOPY && !canMapHostMemory())
CV_Error(cv::Error::GpuApiCallError, "ZeroCopy is not supported by current device");
_type &= Mat::TYPE_MASK;
if( rows == _rows && cols == _cols && type() == _type && data )
return;
if( data )
release();
CV_DbgAssert( _rows >= 0 && _cols >= 0 );
if( _rows > 0 && _cols > 0 )
{
flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + _type;
rows = _rows;
cols = _cols;
step = elemSize()*cols;
if (_alloc_type == ALLOC_ZEROCOPY)
{
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
step = alignUpStep(step, prop.textureAlignment);
}
int64 _nettosize = (int64)step*rows;
size_t nettosize = (size_t)_nettosize;
if( _nettosize != (int64)nettosize )
CV_Error(CV_StsNoMem, "Too big buffer is allocated");
size_t datasize = alignSize(nettosize, (int)sizeof(*refcount));
//datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount));
alloc_type = _alloc_type;
void *ptr = 0;
switch (alloc_type)
{
case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
case ALLOC_ZEROCOPY: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
}
datastart = data = (uchar*)ptr;
dataend = data + nettosize;
refcount = (int*)cv::fastMalloc(sizeof(*refcount));
*refcount = 1;
}
}
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const
{
CV_Assert( alloc_type == ALLOC_ZEROCOPY );
GpuMat res;
void *pdev;
cudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
res = GpuMat(rows, cols, type(), pdev, step);
return res;
}
void cv::gpu::CudaMem::release()
{
if( refcount && CV_XADD(refcount, -1) == 1 )
{
cudaSafeCall( cudaFreeHost(datastart ) );
fastFree(refcount);
}
data = datastart = dataend = 0;
step = rows = cols = 0;
refcount = 0;
}
#endif /* !defined (HAVE_CUDA) */

@ -55,62 +55,61 @@ using namespace cv::gpu;
namespace
{
#ifndef HAVE_OPENGL
void throw_no_ogl() { CV_Error(CV_OpenGlNotSupported, "The library is compiled without OpenGL support"); }
inline void throw_no_ogl() { CV_Error(cv::Error::OpenGlNotSupported, "The library is compiled without OpenGL support"); }
#else
void throw_no_ogl() { CV_Error(CV_OpenGlApiCallError, "OpenGL context doesn't exist"); }
inline void throw_no_ogl() { CV_Error(cv::Error::OpenGlApiCallError, "OpenGL context doesn't exist"); }
#endif
bool checkError(const char* file, const int line, const char* func = 0)
{
#ifndef HAVE_OPENGL
(void) file;
(void) line;
(void) func;
return true;
#else
GLenum err = gl::GetError();
if (err != gl::NO_ERROR_)
bool checkError(const char* file, const int line, const char* func = 0)
{
const char* msg;
#ifndef HAVE_OPENGL
(void) file;
(void) line;
(void) func;
return true;
#else
GLenum err = gl::GetError();
switch (err)
if (err != gl::NO_ERROR_)
{
case gl::INVALID_ENUM:
msg = "An unacceptable value is specified for an enumerated argument";
break;
const char* msg;
case gl::INVALID_VALUE:
msg = "A numeric argument is out of range";
break;
switch (err)
{
case gl::INVALID_ENUM:
msg = "An unacceptable value is specified for an enumerated argument";
break;
case gl::INVALID_OPERATION:
msg = "The specified operation is not allowed in the current state";
break;
case gl::INVALID_VALUE:
msg = "A numeric argument is out of range";
break;
case gl::OUT_OF_MEMORY:
msg = "There is not enough memory left to execute the command";
break;
case gl::INVALID_OPERATION:
msg = "The specified operation is not allowed in the current state";
break;
default:
msg = "Unknown error";
};
case gl::OUT_OF_MEMORY:
msg = "There is not enough memory left to execute the command";
break;
cvError(CV_OpenGlApiCallError, func, msg, file, line);
default:
msg = "Unknown error";
};
return false;
}
cvError(CV_OpenGlApiCallError, func, msg, file, line);
return true;
#endif
}
return false;
}
#if defined(__GNUC__)
#define CV_CheckGlError() CV_DbgAssert( (checkError(__FILE__, __LINE__, __func__)) )
#else
#define CV_CheckGlError() CV_DbgAssert( (checkError(__FILE__, __LINE__)) )
#endif
return true;
#endif
}
#if defined(__GNUC__)
#define CV_CheckGlError() CV_DbgAssert( (checkError(__FILE__, __LINE__, __func__)) )
#else
#define CV_CheckGlError() CV_DbgAssert( (checkError(__FILE__, __LINE__)) )
#endif
} // namespace
#ifdef HAVE_OPENGL
@ -129,7 +128,7 @@ void cv::gpu::setGlDevice(int device)
(void) device;
throw_no_ogl();
#else
#if !defined(HAVE_CUDA) || defined(CUDA_DISABLER)
#ifndef HAVE_CUDA
(void) device;
throw_no_cuda();
#else
@ -141,7 +140,7 @@ void cv::gpu::setGlDevice(int device)
////////////////////////////////////////////////////////////////////////
// CudaResource
#if defined(HAVE_OPENGL) && defined(HAVE_CUDA) && !defined(CUDA_DISABLER)
#if defined(HAVE_OPENGL) && defined(HAVE_CUDA)
namespace
{
@ -353,12 +352,13 @@ const Ptr<cv::ogl::Buffer::Impl>& cv::ogl::Buffer::Impl::empty()
return p;
}
cv::ogl::Buffer::Impl::Impl() : bufId_(0), autoRelease_(true)
cv::ogl::Buffer::Impl::Impl() : bufId_(0), autoRelease_(false)
{
}
cv::ogl::Buffer::Impl::Impl(GLuint abufId, bool autoRelease) : bufId_(abufId), autoRelease_(autoRelease)
{
CV_Assert( gl::IsBuffer(abufId) == gl::TRUE_ );
}
cv::ogl::Buffer::Impl::Impl(GLsizeiptr size, const GLvoid* data, GLenum target, bool autoRelease) : bufId_(0), autoRelease_(autoRelease)
@ -437,29 +437,31 @@ void cv::ogl::Buffer::Impl::unmapHost()
}
#ifdef HAVE_CUDA
void cv::ogl::Buffer::Impl::copyFrom(const void* src, size_t spitch, size_t width, size_t height, cudaStream_t stream)
{
cudaResource_.registerBuffer(bufId_);
cudaResource_.copyFrom(src, spitch, width, height, stream);
}
void cv::ogl::Buffer::Impl::copyTo(void* dst, size_t dpitch, size_t width, size_t height, cudaStream_t stream) const
{
cudaResource_.registerBuffer(bufId_);
cudaResource_.copyTo(dst, dpitch, width, height, stream);
}
void cv::ogl::Buffer::Impl::copyFrom(const void* src, size_t spitch, size_t width, size_t height, cudaStream_t stream)
{
cudaResource_.registerBuffer(bufId_);
cudaResource_.copyFrom(src, spitch, width, height, stream);
}
void* cv::ogl::Buffer::Impl::mapDevice(cudaStream_t stream)
{
cudaResource_.registerBuffer(bufId_);
return cudaResource_.map(stream);
}
void cv::ogl::Buffer::Impl::copyTo(void* dst, size_t dpitch, size_t width, size_t height, cudaStream_t stream) const
{
cudaResource_.registerBuffer(bufId_);
cudaResource_.copyTo(dst, dpitch, width, height, stream);
}
void cv::ogl::Buffer::Impl::unmapDevice(cudaStream_t stream)
{
cudaResource_.unmap(stream);
}
#endif
void* cv::ogl::Buffer::Impl::mapDevice(cudaStream_t stream)
{
cudaResource_.registerBuffer(bufId_);
return cudaResource_.map(stream);
}
void cv::ogl::Buffer::Impl::unmapDevice(cudaStream_t stream)
{
cudaResource_.unmap(stream);
}
#endif // HAVE_CUDA
#endif // HAVE_OPENGL
@ -505,16 +507,6 @@ cv::ogl::Buffer::Buffer(Size asize, int atype, unsigned int abufId, bool autoRel
#endif
}
cv::ogl::Buffer::Buffer(int arows, int acols, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
{
create(arows, acols, atype, target, autoRelease);
}
cv::ogl::Buffer::Buffer(Size asize, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
{
create(asize, atype, target, autoRelease);
}
cv::ogl::Buffer::Buffer(InputArray arr, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
{
#ifndef HAVE_OPENGL
@ -528,22 +520,9 @@ cv::ogl::Buffer::Buffer(InputArray arr, Target target, bool autoRelease) : rows_
switch (kind)
{
case _InputArray::OPENGL_BUFFER:
{
copyFrom(arr, target, autoRelease);
break;
}
case _InputArray::OPENGL_TEXTURE:
{
copyFrom(arr, target, autoRelease);
break;
}
case _InputArray::GPU_MAT:
{
copyFrom(arr, target, autoRelease);
break;
}
copyFrom(arr, target, autoRelease);
break;
default:
{
@ -613,14 +592,6 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
#else
const int kind = arr.kind();
if (kind == _InputArray::OPENGL_TEXTURE)
{
ogl::Texture2D tex = arr.getOGlTexture2D();
tex.copyTo(*this);
setAutoRelease(autoRelease);
return;
}
const Size asize = arr.size();
const int atype = arr.type();
create(asize, atype, target, autoRelease);
@ -636,7 +607,7 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
case _InputArray::GPU_MAT:
{
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
#ifndef HAVE_CUDA
throw_no_cuda();
#else
GpuMat dmat = arr.getGpuMat();
@ -656,13 +627,36 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
#endif
}
void cv::ogl::Buffer::copyTo(OutputArray arr, Target target, bool autoRelease) const
void cv::ogl::Buffer::copyFrom(InputArray arr, gpu::Stream& stream, Target target, bool autoRelease)
{
#ifndef HAVE_OPENGL
(void) arr;
(void) stream;
(void) target;
(void) autoRelease;
throw_no_ogl();
#else
#ifndef HAVE_CUDA
(void) arr;
(void) stream;
(void) target;
(void) autoRelease;
throw_no_cuda();
#else
GpuMat dmat = arr.getGpuMat();
create(dmat.size(), dmat.type(), target, autoRelease);
impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows, gpu::StreamAccessor::getStream(stream));
#endif
#endif
}
void cv::ogl::Buffer::copyTo(OutputArray arr) const
{
#ifndef HAVE_OPENGL
(void) arr;
throw_no_ogl();
#else
const int kind = arr.kind();
@ -670,19 +664,13 @@ void cv::ogl::Buffer::copyTo(OutputArray arr, Target target, bool autoRelease) c
{
case _InputArray::OPENGL_BUFFER:
{
arr.getOGlBufferRef().copyFrom(*this, target, autoRelease);
break;
}
case _InputArray::OPENGL_TEXTURE:
{
arr.getOGlTexture2DRef().copyFrom(*this, autoRelease);
arr.getOGlBufferRef().copyFrom(*this);
break;
}
case _InputArray::GPU_MAT:
{
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
#ifndef HAVE_CUDA
throw_no_cuda();
#else
GpuMat& dmat = arr.getGpuMatRef();
@ -704,6 +692,25 @@ void cv::ogl::Buffer::copyTo(OutputArray arr, Target target, bool autoRelease) c
#endif
}
void cv::ogl::Buffer::copyTo(OutputArray arr, gpu::Stream& stream) const
{
#ifndef HAVE_OPENGL
(void) arr;
(void) stream;
throw_no_ogl();
#else
#ifndef HAVE_CUDA
(void) arr;
(void) stream;
throw_no_cuda();
#else
arr.create(rows_, cols_, type_);
GpuMat dmat = arr.getGpuMat();
impl_->copyTo(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows, gpu::StreamAccessor::getStream(stream));
#endif
#endif
}
cv::ogl::Buffer cv::ogl::Buffer::clone(Target target, bool autoRelease) const
{
#ifndef HAVE_OPENGL
@ -765,7 +772,7 @@ GpuMat cv::ogl::Buffer::mapDevice()
throw_no_ogl();
return GpuMat();
#else
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
#ifndef HAVE_CUDA
throw_no_cuda();
return GpuMat();
#else
@ -779,7 +786,7 @@ void cv::ogl::Buffer::unmapDevice()
#ifndef HAVE_OPENGL
throw_no_ogl();
#else
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
#ifndef HAVE_CUDA
throw_no_cuda();
#else
impl_->unmapDevice();
@ -787,6 +794,38 @@ void cv::ogl::Buffer::unmapDevice()
#endif
}
gpu::GpuMat cv::ogl::Buffer::mapDevice(gpu::Stream& stream)
{
#ifndef HAVE_OPENGL
(void) stream;
throw_no_ogl();
return GpuMat();
#else
#ifndef HAVE_CUDA
(void) stream;
throw_no_cuda();
return GpuMat();
#else
return GpuMat(rows_, cols_, type_, impl_->mapDevice(gpu::StreamAccessor::getStream(stream)));
#endif
#endif
}
void cv::ogl::Buffer::unmapDevice(gpu::Stream& stream)
{
#ifndef HAVE_OPENGL
(void) stream;
throw_no_ogl();
#else
#ifndef HAVE_CUDA
(void) stream;
throw_no_cuda();
#else
impl_->unmapDevice(gpu::StreamAccessor::getStream(stream));
#endif
#endif
}
unsigned int cv::ogl::Buffer::bufId() const
{
#ifndef HAVE_OPENGL
@ -844,12 +883,13 @@ const Ptr<cv::ogl::Texture2D::Impl> cv::ogl::Texture2D::Impl::empty()
return p;
}
cv::ogl::Texture2D::Impl::Impl() : texId_(0), autoRelease_(true)
cv::ogl::Texture2D::Impl::Impl() : texId_(0), autoRelease_(false)
{
}
cv::ogl::Texture2D::Impl::Impl(GLuint atexId, bool autoRelease) : texId_(atexId), autoRelease_(autoRelease)
{
CV_Assert( gl::IsTexture(atexId) == gl::TRUE_ );
}
cv::ogl::Texture2D::Impl::Impl(GLint internalFormat, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid* pixels, bool autoRelease) : texId_(0), autoRelease_(autoRelease)
@ -955,16 +995,6 @@ cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, unsigned int atexId, b
#endif
}
cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
{
create(arows, acols, aformat, autoRelease);
}
cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
{
create(asize, aformat, autoRelease);
}
cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
{
#ifndef HAVE_OPENGL
@ -1005,7 +1035,7 @@ cv::ogl::Texture2D::Texture2D(InputArray arr, bool autoRelease) : rows_(0), cols
case _InputArray::GPU_MAT:
{
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
#ifndef HAVE_CUDA
throw_no_cuda();
#else
GpuMat dmat = arr.getGpuMat();
@ -1118,7 +1148,7 @@ void cv::ogl::Texture2D::copyFrom(InputArray arr, bool autoRelease)
case _InputArray::GPU_MAT:
{
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
#ifndef HAVE_CUDA
throw_no_cuda();
#else
GpuMat dmat = arr.getGpuMat();
@ -1169,7 +1199,7 @@ void cv::ogl::Texture2D::copyTo(OutputArray arr, int ddepth, bool autoRelease) c
case _InputArray::GPU_MAT:
{
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
#ifndef HAVE_CUDA
throw_no_cuda();
#else
ogl::Buffer buf(rows_, cols_, CV_MAKE_TYPE(ddepth, cn), ogl::Buffer::PIXEL_PACK_BUFFER);
@ -1221,10 +1251,6 @@ template <> void cv::Ptr<cv::ogl::Texture2D::Impl>::delete_obj()
////////////////////////////////////////////////////////////////////////
// ogl::Arrays
cv::ogl::Arrays::Arrays() : size_(0)
{
}
void cv::ogl::Arrays::setVertexArray(InputArray vertex)
{
const int cn = vertex.channels();

@ -45,11 +45,11 @@
#include "opencv2/core/utility.hpp"
#include "opencv2/core/core_c.h"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/core/opengl.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include <assert.h>
#include <ctype.h>

@ -6,32 +6,22 @@ Data Structures
gpu::PtrStepSz
---------------
--------------
.. ocv:class:: gpu::PtrStepSz
Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compiled code (CUDA kernels). Typically, it is used internally by OpenCV and by users who write device code. You can call its members from both host and device code. ::
template <typename T> struct PtrStepSz
template <typename T> struct PtrStepSz : public PtrStep<T>
{
int cols;
int rows;
T* data;
size_t step;
PtrStepSz() : cols(0), rows(0), data(0), step(0){};
PtrStepSz(int rows, int cols, T *data, size_t step);
__CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
__CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
template <typename U>
explicit PtrStepSz(const PtrStepSz<U>& d);
explicit PtrStepSz(const PtrStepSz<U>& d) : PtrStep<T>((T*)d.data, d.step), cols(d.cols), rows(d.rows){}
typedef T elem_type;
enum { elem_size = sizeof(elem_type) };
__CV_GPU_HOST_DEVICE__ size_t elemSize() const;
/* returns pointer to the beginning of the given image row */
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0);
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
int cols;
int rows;
};
typedef PtrStepSz<unsigned char> PtrStepSzb;
@ -41,32 +31,32 @@ Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compi
gpu::PtrStep
--------------
------------
.. ocv:class:: gpu::PtrStep
Structure similar to :ocv:class:`gpu::PtrStepSz` but containing only a pointer and row step. Width and height fields are excluded due to performance reasons. The structure is intended for internal use or for users who write device code. ::
template<typename T> struct PtrStep
template <typename T> struct PtrStep : public DevPtr<T>
{
T* data;
size_t step;
__CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
__CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
PtrStep();
PtrStep(const PtrStepSz<T>& mem);
//! stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!!
size_t step;
typedef T elem_type;
enum { elem_size = sizeof(elem_type) };
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
__CV_GPU_HOST_DEVICE__ size_t elemSize() const;
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0);
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};
typedef PtrStep<unsigned char> PtrStep;
typedef PtrStep<unsigned char> PtrStepb;
typedef PtrStep<float> PtrStepf;
typedef PtrStep<int> PtrStepi;
gpu::GpuMat
-----------
.. ocv:class:: gpu::GpuMat
@ -89,28 +79,31 @@ Beware that the latter limitation may lead to overloaded matrix operators that c
//! default constructor
GpuMat();
//! constructs GpuMat of the specified size and type
GpuMat(int rows, int cols, int type);
GpuMat(Size size, int type);
.....
//! builds GpuMat from Mat. Blocks uploading to device.
explicit GpuMat (const Mat& m);
//! builds GpuMat from host memory (Blocking call)
explicit GpuMat(InputArray arr);
//! returns lightweight PtrStepSz structure for passing
//to nvcc-compiled code. Contains size, data ptr and step.
template <class T> operator PtrStepSz<T>() const;
template <class T> operator PtrStep<T>() const;
//! blocks uploading data to GpuMat.
void upload(const cv::Mat& m);
void upload(const CudaMem& m, Stream& stream);
//! pefroms upload data to GpuMat (Blocking call)
void upload(InputArray arr);
//! downloads data from device to host memory. Blocking calls.
void download(cv::Mat& m) const;
//! pefroms upload data to GpuMat (Non-Blocking call)
void upload(InputArray arr, Stream& stream);
//! download async
void download(CudaMem& m, Stream& stream) const;
//! pefroms download data from device to host memory (Blocking call)
void download(OutputArray dst) const;
//! pefroms download data from device to host memory (Non-Blocking call)
void download(OutputArray dst, Stream& stream) const;
};
@ -121,16 +114,10 @@ Beware that the latter limitation may lead to overloaded matrix operators that c
gpu::createContinuous
-------------------------
Creates a continuous matrix in the GPU memory.
---------------------
Creates a continuous matrix.
.. ocv:function:: void gpu::createContinuous(int rows, int cols, int type, GpuMat& m)
.. ocv:function:: GpuMat gpu::createContinuous(int rows, int cols, int type)
.. ocv:function:: void gpu::createContinuous(Size size, int type, GpuMat& m)
.. ocv:function:: GpuMat gpu::createContinuous(Size size, int type)
.. ocv:function:: void gpu::createContinuous(int rows, int cols, int type, OutputArray arr)
:param rows: Row count.
@ -138,64 +125,39 @@ Creates a continuous matrix in the GPU memory.
:param type: Type of the matrix.
:param m: Destination matrix. This parameter changes only if it has a proper type and area ( :math:`\texttt{rows} \times \texttt{cols}` ).
:param arr: Destination matrix. This parameter changes only if it has a proper type and area ( :math:`\texttt{rows} \times \texttt{cols}` ).
Matrix is called continuous if its elements are stored continuously, that is, without gaps at the end of each row.
gpu::ensureSizeIsEnough
---------------------------
-----------------------
Ensures that the size of a matrix is big enough and the matrix has a proper type.
.. ocv:function:: void gpu::ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m)
.. ocv:function:: void gpu::ensureSizeIsEnough(Size size, int type, GpuMat& m)
.. ocv:function:: void gpu::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
:param rows: Minimum desired number of rows.
:param cols: Minimum desired number of columns.
:param size: Rows and columns passed as a structure.
:param type: Desired matrix type.
:param m: Destination matrix.
:param arr: Destination matrix.
The function does not reallocate memory if the matrix has proper attributes already.
gpu::registerPageLocked
-------------------------------
Page-locks the memory of matrix and maps it for the device(s).
.. ocv:function:: void gpu::registerPageLocked(Mat& m)
:param m: Input matrix.
gpu::unregisterPageLocked
-------------------------------
Unmaps the memory of matrix and makes it pageable again.
.. ocv:function:: void gpu::unregisterPageLocked(Mat& m)
:param m: Input matrix.
gpu::CudaMem
------------
.. ocv:class:: gpu::CudaMem
Class with reference counting wrapping special memory type allocation functions from CUDA. Its interface is also
:ocv:func:`Mat`-like but with additional memory type parameters.
Class with reference counting wrapping special memory type allocation functions from CUDA. Its interface is also :ocv:func:`Mat`-like but with additional memory type parameters.
* **ALLOC_PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous uploading/downloading data from/to GPU.
* **ALLOC_ZEROCOPY** specifies a zero copy memory allocation that enables mapping the host memory to GPU address space, if supported.
* **ALLOC_WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache utilization.
* **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous uploading/downloading data from/to GPU.
* **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU address space, if supported.
* **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache utilization.
.. note:: Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2 Pinned Memory APIs* document or *CUDA C Programming Guide*.
@ -204,36 +166,33 @@ Class with reference counting wrapping special memory type allocation functions
class CV_EXPORTS CudaMem
{
public:
enum { ALLOC_PAGE_LOCKED = 1, ALLOC_ZEROCOPY = 2,
ALLOC_WRITE_COMBINED = 4 };
enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
CudaMem(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
explicit CudaMem(AllocType alloc_type = PAGE_LOCKED);
//! creates from cv::Mat with coping data
explicit CudaMem(const Mat& m, int alloc_type = ALLOC_PAGE_LOCKED);
CudaMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
CudaMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
......
//! creates from host memory with coping data
explicit CudaMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
void create(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
......
//! returns matrix header with disabled ref. counting for CudaMem data.
Mat createMatHeader() const;
operator Mat() const;
//! returns matrix header with disabled reference counting for CudaMem data.
Mat createMatHeader() const;
//! maps host memory into device address space
GpuMat createGpuMatHeader() const;
operator GpuMat() const;
//! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
GpuMat createGpuMatHeader() const;
//if host memory can be mapped to gpu address space;
static bool canMapHostMemory();
......
int alloc_type;
AllocType alloc_type;
};
gpu::CudaMem::createMatHeader
---------------------------------
-----------------------------
Creates a header without reference counting to :ocv:class:`gpu::CudaMem` data.
.. ocv:function:: Mat gpu::CudaMem::createMatHeader() const
@ -241,20 +200,32 @@ Creates a header without reference counting to :ocv:class:`gpu::CudaMem` data.
gpu::CudaMem::createGpuMatHeader
------------------------------------
--------------------------------
Maps CPU memory to GPU address space and creates the :ocv:class:`gpu::GpuMat` header without reference counting for it.
.. ocv:function:: GpuMat gpu::CudaMem::createGpuMatHeader() const
This can be done only if memory was allocated with the ``ALLOC_ZEROCOPY`` flag and if it is supported by the hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which eliminates an extra copy.
This can be done only if memory was allocated with the ``SHARED`` flag and if it is supported by the hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which eliminates an extra copy.
gpu::registerPageLocked
-----------------------
Page-locks the memory of matrix and maps it for the device(s).
.. ocv:function:: void gpu::registerPageLocked(Mat& m)
:param m: Input matrix.
gpu::unregisterPageLocked
-------------------------
Unmaps the memory of matrix and makes it pageable again.
gpu::CudaMem::canMapHostMemory
----------------------------------
Returns ``true`` if the current hardware supports address space mapping and ``ALLOC_ZEROCOPY`` memory allocation.
.. ocv:function:: void gpu::unregisterPageLocked(Mat& m)
.. ocv:function:: static bool gpu::CudaMem::canMapHostMemory()
:param m: Input matrix.
@ -262,7 +233,7 @@ gpu::Stream
-----------
.. ocv:class:: gpu::Stream
This class encapsulates a queue of asynchronous calls. Some functions have overloads with the additional ``gpu::Stream`` parameter. The overloads do initialization work (allocate output buffers, upload constants, and so on), start the GPU kernel, and return before results are ready. You can check whether all operations are complete via :ocv:func:`gpu::Stream::queryIfComplete`. You can asynchronously upload/download data from/to page-locked buffers, using the :ocv:class:`gpu::CudaMem` or :ocv:class:`Mat` header that points to a region of :ocv:class:`gpu::CudaMem`.
This class encapsulates a queue of asynchronous calls.
.. note:: Currently, you may face problems if an operation is enqueued twice with different data. Some functions use the constant GPU memory, and next call may update the memory before the previous one has been finished. But calling different operations asynchronously is safe because each operation has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are also safe.
@ -272,30 +243,24 @@ This class encapsulates a queue of asynchronous calls. Some functions have overl
{
public:
Stream();
~Stream();
Stream(const Stream&);
Stream& operator=(const Stream&);
//! queries an asynchronous stream for completion status
bool queryIfComplete() const;
bool queryIfComplete();
//! waits for stream tasks to complete
void waitForCompletion();
void enqueueDownload(const GpuMat& src, CudaMem& dst);
void enqueueDownload(const GpuMat& src, Mat& dst);
void enqueueUpload(const CudaMem& src, GpuMat& dst);
void enqueueUpload(const Mat& src, GpuMat& dst);
//! makes a compute stream wait on an event
void waitEvent(const Event& event);
void enqueueCopy(const GpuMat& src, GpuMat& dst);
void enqueueMemSet(const GpuMat& src, Scalar val);
void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask);
//! adds a callback to be called on the host after all currently enqueued items in the stream have completed
void enqueueHostCallback(StreamCallback callback, void* userData);
void enqueueConvert(const GpuMat& src, GpuMat& dst, int type,
double a = 1, double b = 0);
//! return Stream object for default CUDA stream
static Stream& Null();
typedef void (*StreamCallback)(Stream& stream, int status, void* userData);
void enqueueHostCallback(StreamCallback callback, void* userData);
//! returns true if stream object is not default (!= 0)
operator bool_type() const;
};
@ -316,53 +281,11 @@ Blocks the current CPU thread until all operations in the stream are complete.
gpu::Stream::enqueueDownload
----------------------------
Copies data from device to host.
.. ocv:function:: void gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
.. ocv:function:: void gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
.. note:: ``cv::Mat`` must point to page locked memory (i.e. to ``CudaMem`` data or to its subMat) or must be registered with :ocv:func:`gpu::registerPageLocked` .
gpu::Stream::enqueueUpload
--------------------------
Copies data from host to device.
.. ocv:function:: void gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
.. ocv:function:: void gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
.. note:: ``cv::Mat`` must point to page locked memory (i.e. to ``CudaMem`` data or to its subMat) or must be registered with :ocv:func:`gpu::registerPageLocked` .
gpu::Stream::enqueueCopy
------------------------
Copies data from device to device.
.. ocv:function:: void gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
gpu::Stream::enqueueMemSet
--------------------------
Initializes or sets device memory to a value.
.. ocv:function:: void gpu::Stream::enqueueMemSet( GpuMat& src, Scalar val )
.. ocv:function:: void gpu::Stream::enqueueMemSet( GpuMat& src, Scalar val, const GpuMat& mask )
gpu::Stream::enqueueConvert
---------------------------
Converts matrix type, ex from float to uchar depending on type.
gpu::Stream::waitEvent
----------------------
Makes a compute stream wait on an event.
.. ocv:function:: void gpu::Stream::enqueueConvert( const GpuMat& src, GpuMat& dst, int dtype, double a=1, double b=0 )
.. ocv:function:: void gpu::Stream::waitEvent(const Event& event)

@ -107,23 +107,186 @@ Class providing functionality for querying the specified GPU properties. ::
class CV_EXPORTS DeviceInfo
{
public:
//! creates DeviceInfo object for the current GPU
DeviceInfo();
//! creates DeviceInfo object for the given GPU
DeviceInfo(int device_id);
String name() const;
//! ASCII string identifying device
const char* name() const;
//! global memory available on device in bytes
size_t totalGlobalMem() const;
//! shared memory available per block in bytes
size_t sharedMemPerBlock() const;
//! 32-bit registers available per block
int regsPerBlock() const;
//! warp size in threads
int warpSize() const;
//! maximum pitch in bytes allowed by memory copies
size_t memPitch() const;
//! maximum number of threads per block
int maxThreadsPerBlock() const;
//! maximum size of each dimension of a block
Vec3i maxThreadsDim() const;
int majorVersion() const;
int minorVersion() const;
//! maximum size of each dimension of a grid
Vec3i maxGridSize() const;
//! clock frequency in kilohertz
int clockRate() const;
//! constant memory available on device in bytes
size_t totalConstMem() const;
//! major compute capability
int major() const;
//! minor compute capability
int minor() const;
//! alignment requirement for textures
size_t textureAlignment() const;
//! pitch alignment requirement for texture references bound to pitched memory
size_t texturePitchAlignment() const;
//! number of multiprocessors on device
int multiProcessorCount() const;
//! specified whether there is a run time limit on kernels
bool kernelExecTimeoutEnabled() const;
//! device is integrated as opposed to discrete
bool integrated() const;
//! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
bool canMapHostMemory() const;
enum ComputeMode
{
ComputeModeDefault, /**< default compute mode (Multiple threads can use ::cudaSetDevice() with this device) */
ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use ::cudaSetDevice() with this device) */
ComputeModeProhibited, /**< compute-prohibited mode (No threads can use ::cudaSetDevice() with this device) */
ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use ::cudaSetDevice() with this device) */
};
//! compute mode
ComputeMode computeMode() const;
//! maximum 1D texture size
int maxTexture1D() const;
//! maximum 1D mipmapped texture size
int maxTexture1DMipmap() const;
//! maximum size for 1D textures bound to linear memory
int maxTexture1DLinear() const;
//! maximum 2D texture dimensions
Vec2i maxTexture2D() const;
//! maximum 2D mipmapped texture dimensions
Vec2i maxTexture2DMipmap() const;
//! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
Vec3i maxTexture2DLinear() const;
//! maximum 2D texture dimensions if texture gather operations have to be performed
Vec2i maxTexture2DGather() const;
//! maximum 3D texture dimensions
Vec3i maxTexture3D() const;
//! maximum Cubemap texture dimensions
int maxTextureCubemap() const;
//! maximum 1D layered texture dimensions
Vec2i maxTexture1DLayered() const;
//! maximum 2D layered texture dimensions
Vec3i maxTexture2DLayered() const;
//! maximum Cubemap layered texture dimensions
Vec2i maxTextureCubemapLayered() const;
//! maximum 1D surface size
int maxSurface1D() const;
//! maximum 2D surface dimensions
Vec2i maxSurface2D() const;
//! maximum 3D surface dimensions
Vec3i maxSurface3D() const;
//! maximum 1D layered surface dimensions
Vec2i maxSurface1DLayered() const;
//! maximum 2D layered surface dimensions
Vec3i maxSurface2DLayered() const;
//! maximum Cubemap surface dimensions
int maxSurfaceCubemap() const;
//! maximum Cubemap layered surface dimensions
Vec2i maxSurfaceCubemapLayered() const;
//! alignment requirements for surfaces
size_t surfaceAlignment() const;
//! device can possibly execute multiple kernels concurrently
bool concurrentKernels() const;
//! device has ECC support enabled
bool ECCEnabled() const;
//! PCI bus ID of the device
int pciBusID() const;
//! PCI device ID of the device
int pciDeviceID() const;
//! PCI domain ID of the device
int pciDomainID() const;
//! true if device is a Tesla device using TCC driver, false otherwise
bool tccDriver() const;
//! number of asynchronous engines
int asyncEngineCount() const;
//! device shares a unified address space with the host
bool unifiedAddressing() const;
//! peak memory clock frequency in kilohertz
int memoryClockRate() const;
//! global memory bus width in bits
int memoryBusWidth() const;
//! size of L2 cache in bytes
int l2CacheSize() const;
//! maximum resident threads per multiprocessor
int maxThreadsPerMultiProcessor() const;
//! gets free and total device memory
void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
size_t freeMemory() const;
size_t totalMemory() const;
bool supports(FeatureSet feature) const;
bool isCompatible() const;
//! checks whether device supports the given feature
bool supports(FeatureSet feature_set) const;
int deviceID() const;
//! checks whether the GPU module can be run on the given device
bool isCompatible() const;
};
@ -146,31 +309,23 @@ gpu::DeviceInfo::name
---------------------
Returns the device name.
.. ocv:function:: String gpu::DeviceInfo::name() const
.. ocv:function:: const char* gpu::DeviceInfo::name() const
gpu::DeviceInfo::majorVersion
-----------------------------
gpu::DeviceInfo::major
----------------------
Returns the major compute capability version.
.. ocv:function:: int gpu::DeviceInfo::majorVersion()
.. ocv:function:: int gpu::DeviceInfo::major()
gpu::DeviceInfo::minorVersion
-----------------------------
gpu::DeviceInfo::minor
----------------------
Returns the minor compute capability version.
.. ocv:function:: int gpu::DeviceInfo::minorVersion()
gpu::DeviceInfo::multiProcessorCount
------------------------------------
Returns the number of streaming multiprocessors.
.. ocv:function:: int gpu::DeviceInfo::multiProcessorCount()
.. ocv:function:: int gpu::DeviceInfo::minor()
@ -194,7 +349,7 @@ gpu::DeviceInfo::supports
-------------------------
Provides information on GPU feature support.
.. ocv:function:: bool gpu::DeviceInfo::supports( FeatureSet feature_set ) const
.. ocv:function:: bool gpu::DeviceInfo::supports(FeatureSet feature_set) const
:param feature_set: Features to be checked. See :ocv:enum:`gpu::FeatureSet`.

@ -47,7 +47,7 @@
# error gpu.hpp header must be compiled as C++
#endif
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#if !defined(__OPENCV_BUILD) && !defined(OPENCV_GPU_SKIP_INCLUDE)
#include "opencv2/opencv_modules.hpp"

@ -49,7 +49,7 @@
#include "opencv2/calib3d.hpp"
#include "opencv2/objdetect.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "opencv2/opencv_modules.hpp"

@ -126,25 +126,6 @@ GPU_TEST_P(Buffer, ConstructorFromBuffer)
EXPECT_EQ(buf_gold.type(), buf.type());
}
GPU_TEST_P(Buffer, ConstructorFromTexture2D)
{
const int depth = CV_MAT_DEPTH(type);
const int cn = CV_MAT_CN(type);
if (depth != CV_32F || cn == 2)
return;
cv::Mat gold = randomMat(size, type, 0, 1.0);
cv::ogl::Texture2D tex_gold(gold, true);
cv::ogl::Buffer buf(tex_gold, cv::ogl::Buffer::PIXEL_PACK_BUFFER, true);
cv::Mat bufData;
buf.copyTo(bufData);
EXPECT_MAT_NEAR(gold, bufData, 1e-2);
}
GPU_TEST_P(Buffer, Create)
{
cv::ogl::Buffer buf;
@ -198,26 +179,6 @@ GPU_TEST_P(Buffer, CopyFromBuffer)
EXPECT_MAT_NEAR(gold, bufData, 0);
}
GPU_TEST_P(Buffer, CopyFromTexture2D)
{
const int depth = CV_MAT_DEPTH(type);
const int cn = CV_MAT_CN(type);
if (depth != CV_32F || cn == 2)
return;
cv::Mat gold = randomMat(size, type, 0, 1.0);
cv::ogl::Texture2D tex_gold(gold, true);
cv::ogl::Buffer buf;
buf.copyFrom(tex_gold, cv::ogl::Buffer::ARRAY_BUFFER, true);
cv::Mat bufData;
buf.copyTo(bufData);
EXPECT_MAT_NEAR(gold, bufData, 1e-2);
}
GPU_TEST_P(Buffer, CopyToGpuMat)
{
cv::Mat gold = randomMat(size, type);
@ -237,7 +198,8 @@ GPU_TEST_P(Buffer, CopyToBuffer)
cv::ogl::Buffer buf(gold, cv::ogl::Buffer::ARRAY_BUFFER, true);
cv::ogl::Buffer dst;
buf.copyTo(dst, cv::ogl::Buffer::ARRAY_BUFFER, true);
buf.copyTo(dst);
dst.setAutoRelease(true);
EXPECT_NE(buf.bufId(), dst.bufId());
@ -247,27 +209,6 @@ GPU_TEST_P(Buffer, CopyToBuffer)
EXPECT_MAT_NEAR(gold, bufData, 0);
}
GPU_TEST_P(Buffer, CopyToTexture2D)
{
const int depth = CV_MAT_DEPTH(type);
const int cn = CV_MAT_CN(type);
if (depth != CV_32F || cn == 2)
return;
cv::Mat gold = randomMat(size, type, 0, 1.0);
cv::ogl::Buffer buf(gold, cv::ogl::Buffer::PIXEL_PACK_BUFFER, true);
cv::ogl::Texture2D tex;
buf.copyTo(tex, cv::ogl::Buffer::PIXEL_PACK_BUFFER, true);
cv::Mat texData;
tex.copyTo(texData);
EXPECT_MAT_NEAR(gold, texData, 1e-2);
}
GPU_TEST_P(Buffer, Clone)
{
cv::Mat gold = randomMat(size, type);

@ -47,7 +47,7 @@
# error gpuarithm.hpp header must be compiled as C++
#endif
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
namespace cv { namespace gpu {

@ -217,10 +217,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
{
if (src3.empty())
{
if (stream)
stream.enqueueMemSet(dst, Scalar::all(0));
else
dst.setTo(Scalar::all(0));
dst.setTo(Scalar::all(0), stream);
}
else
{
@ -230,10 +227,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
}
else
{
if (stream)
stream.enqueueCopy(src3, dst);
else
src3.copyTo(dst);
src3.copyTo(dst, stream);
}
}
}
@ -336,18 +330,13 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
cv::gpu::cudev::imgproc::shfl_integral_gpu(src, buffer, stream);
sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
if (s)
s.enqueueMemSet(sum, Scalar::all(0));
else
sum.setTo(Scalar::all(0));
sum.setTo(Scalar::all(0), s);
GpuMat inner = sum(Rect(1, 1, src.cols, src.rows));
GpuMat res = buffer(Rect(0, 0, src.cols, src.rows));
if (s)
s.enqueueCopy(res, inner);
else
res.copyTo(inner);
res.copyTo(inner, s);
}
else
{
@ -720,10 +709,7 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
GpuMat result_block(result_roi_size, result_data.type(),
result_data.ptr(), result_data.step);
if (stream)
stream.enqueueCopy(result_block, result_roi);
else
result_block.copyTo(result_roi);
result_block.copyTo(result_roi, stream);
}
}

@ -50,7 +50,7 @@
#include "opencv2/gpuarithm.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "opencv2/opencv_modules.hpp"

@ -49,7 +49,7 @@
#include <memory>
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/gpufilters.hpp"
namespace cv { namespace gpu {

@ -43,7 +43,7 @@
#ifndef __FGD_BGFG_COMMON_HPP__
#define __FGD_BGFG_COMMON_HPP__
#include "opencv2/core/cuda_devptrs.hpp"
#include "opencv2/core/gpu_types.hpp"
namespace bgfg
{

@ -134,10 +134,7 @@ void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat& frame, cv::gpu::GpuMat
initialize(frame.size(), 0.0f, frame.depth() == CV_8U ? 255.0f : frame.depth() == CV_16U ? std::numeric_limits<ushort>::max() : 1.0f);
fgmask.create(frameSize_, CV_8UC1);
if (stream)
stream.enqueueMemSet(fgmask, cv::Scalar::all(0));
else
fgmask.setTo(cv::Scalar::all(0));
fgmask.setTo(cv::Scalar::all(0), stream);
funcs[frame.depth()][frame.channels() - 1](frame, fgmask, colors_, weights_, nfeatures_, frameNum_, learningRate, updateBackgroundModel, cv::gpu::StreamAccessor::getStream(stream));

@ -50,6 +50,6 @@
#include "opencv2/gpufilters.hpp"
#include "opencv2/gpuimgproc.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#endif /* __OPENCV_PRECOMP_H__ */

@ -49,7 +49,7 @@
#include <iosfwd>
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
namespace cv { namespace gpu {

@ -43,7 +43,7 @@
#ifndef __CUVUD_VIDEO_SOURCE_H__
#define __CUVUD_VIDEO_SOURCE_H__
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "opencv2/gpucodec.hpp"
#include "thread.h"

@ -44,7 +44,7 @@
#define __FRAME_QUEUE_H__
#include "opencv2/core/utility.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include <nvcuvid.h>

@ -52,7 +52,7 @@
#include "opencv2/gpucodec.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#ifdef HAVE_NVCUVID
#include <nvcuvid.h>

@ -43,7 +43,7 @@
#ifndef __VIDEO_DECODER_H__
#define __VIDEO_DECODER_H__
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "opencv2/gpucodec.hpp"
#include <nvcuvid.h>

@ -43,7 +43,7 @@
#ifndef __VIDEO_PARSER_H__
#define __VIDEO_PARSER_H__
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "opencv2/gpucodec.hpp"
#include "frame_queue.h"
#include "video_decoder.h"

@ -47,7 +47,7 @@
# error gpufeatures2d.hpp header must be compiled as C++
#endif
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/gpufilters.hpp"
namespace cv { namespace gpu {

@ -497,10 +497,7 @@ void cv::gpu::BFMatcher_GPU::knnMatchSingle(const GpuMat& query, const GpuMat& t
ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
}
if (stream)
stream.enqueueMemSet(trainIdx, Scalar::all(-1));
else
trainIdx.setTo(Scalar::all(-1));
trainIdx.setTo(Scalar::all(-1), stream);
caller_t func = callers[query.depth()];
CV_Assert(func != 0);
@ -616,10 +613,7 @@ void cv::gpu::BFMatcher_GPU::knnMatch2Collection(const GpuMat& query, const GpuM
ensureSizeIsEnough(1, nQuery, CV_32SC2, imgIdx);
ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
if (stream)
stream.enqueueMemSet(trainIdx, Scalar::all(-1));
else
trainIdx.setTo(Scalar::all(-1));
trainIdx.setTo(Scalar::all(-1), stream);
caller_t func = callers[query.depth()];
CV_Assert(func != 0);
@ -803,10 +797,7 @@ void cv::gpu::BFMatcher_GPU::radiusMatchSingle(const GpuMat& query, const GpuMat
ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance);
}
if (stream)
stream.enqueueMemSet(nMatches, Scalar::all(0));
else
nMatches.setTo(Scalar::all(0));
nMatches.setTo(Scalar::all(0), stream);
caller_t func = callers[query.depth()];
CV_Assert(func != 0);
@ -931,10 +922,7 @@ void cv::gpu::BFMatcher_GPU::radiusMatchCollection(const GpuMat& query, GpuMat&
ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32FC1, distance);
}
if (stream)
stream.enqueueMemSet(nMatches, Scalar::all(0));
else
nMatches.setTo(Scalar::all(0));
nMatches.setTo(Scalar::all(0), stream);
caller_t func = callers[query.depth()];
CV_Assert(func != 0);

@ -52,6 +52,6 @@
#include "opencv2/gpuwarping.hpp"
#include "opencv2/features2d.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#endif /* __OPENCV_PRECOMP_H__ */

@ -47,7 +47,7 @@
# error gpufilters.hpp header must be compiled as C++
#endif
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/core/base.hpp"
namespace cv { namespace gpu {

@ -157,10 +157,7 @@ namespace
if (roi.size() != src_size)
{
if (stream)
stream.enqueueMemSet(dst, Scalar::all(0));
else
dst.setTo(Scalar::all(0));
dst.setTo(Scalar::all(0), stream);
}
normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);
@ -221,10 +218,7 @@ namespace
if (roi.size() != src_size)
{
if (stream)
stream.enqueueMemSet(dst, Scalar::all(0));
else
dst.setTo(Scalar::all(0));
dst.setTo(Scalar::all(0), stream);
}
ensureSizeIsEnough(src_size, bufType, *pbuf);
@ -487,10 +481,7 @@ namespace
if (roi.size() != src_size)
{
if (stream)
stream.enqueueMemSet(dst, Scalar::all(0));
else
dst.setTo(Scalar::all(0));
dst.setTo(Scalar::all(0), stream);
}
normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);
@ -557,10 +548,7 @@ namespace
if (iterations == 0 || _kernel.rows * _kernel.cols == 1)
{
if (stream)
stream.enqueueCopy(src, dst);
else
src.copyTo(dst);
src.copyTo(dst, stream);
return;
}
@ -890,7 +878,7 @@ namespace
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
{
DeviceInfo devInfo;
int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
int cc = devInfo.major() * 10 + devInfo.minor();
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s));
}
@ -989,7 +977,7 @@ namespace
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
{
DeviceInfo devInfo;
int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
int cc = devInfo.major() * 10 + devInfo.minor();
if (ksize > 16 && cc < 20)
CV_Error(cv::Error::StsNotImplemented, "column linear filter doesn't implemented for kernel size > 16 for device with compute capabilities less than 2.0");

@ -48,7 +48,7 @@
#include "opencv2/gpufilters.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "opencv2/opencv_modules.hpp"

@ -47,7 +47,7 @@
# error gpuimgproc.hpp header must be compiled as C++
#endif
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/core/base.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/gpufilters.hpp"

@ -196,16 +196,9 @@ namespace
return;
}
if (stream)
{
stream.enqueueConvert(image, buf.imagef, CV_32F);
stream.enqueueConvert(templ, buf.templf, CV_32F);
}
else
{
image.convertTo(buf.imagef, CV_32F);
templ.convertTo(buf.templf, CV_32F);
}
image.convertTo(buf.imagef, CV_32F, stream);
templ.convertTo(buf.templf, CV_32F, stream);
matchTemplate_CCORR_32F(buf.imagef, buf.templf, result, buf, stream);
}
@ -317,16 +310,8 @@ namespace
void matchTemplate_CCOFF_NORMED_8U(
const GpuMat& image, const GpuMat& templ, GpuMat& result, MatchTemplateBuf &buf, Stream& stream)
{
if (stream)
{
stream.enqueueConvert(image, buf.imagef, CV_32F);
stream.enqueueConvert(templ, buf.templf, CV_32F);
}
else
{
image.convertTo(buf.imagef, CV_32F);
templ.convertTo(buf.templf, CV_32F);
}
image.convertTo(buf.imagef, CV_32F, stream);
templ.convertTo(buf.templf, CV_32F, stream);
matchTemplate_CCORR_32F(buf.imagef, buf.templf, result, buf, stream);

@ -47,7 +47,7 @@
#include "opencv2/gpufilters.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "opencv2/opencv_modules.hpp"

@ -48,7 +48,7 @@
# error this is a private header which should not be used from outside of the OpenCV library
#endif
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#ifndef HAVE_CUDA
# error gpulegacy module requires CUDA

@ -56,7 +56,7 @@
# include "opencv2/objdetect.hpp"
#endif
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "opencv2/gpulegacy/private.hpp"
#endif /* __OPENCV_PRECOMP_H__ */

@ -68,11 +68,11 @@
#include "opencv2/ts.hpp"
#include "opencv2/ts/gpu_test.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/gpulegacy.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "NCVTest.hpp"
#include "NCVAutoTestLister.hpp"

@ -47,7 +47,7 @@
# error gpuoptflow.hpp header must be compiled as C++
#endif
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
namespace cv { namespace gpu {

@ -235,8 +235,8 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
break;
}
streams[0].enqueueConvert(frame0, frames_[0], CV_32F);
streams[1].enqueueConvert(frame1, frames_[1], CV_32F);
frame0.convertTo(frames_[0], CV_32F, streams[0]);
frame1.convertTo(frames_[1], CV_32F, streams[1]);
if (fastPyramids)
{
@ -293,21 +293,21 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
{
gpu::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
gpu::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
streams[0].enqueueConvert(curFlowX, curFlowX, curFlowX.depth(), scale);
streams[1].enqueueConvert(curFlowY, curFlowY, curFlowY.depth(), scale);
curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]);
curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]);
}
else
{
streams[0].enqueueMemSet(curFlowX, 0);
streams[1].enqueueMemSet(curFlowY, 0);
curFlowX.setTo(0, streams[0]);
curFlowY.setTo(0, streams[1]);
}
}
else
{
gpu::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
gpu::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
streams[0].enqueueConvert(curFlowX, curFlowX, curFlowX.depth(), 1./pyrScale);
streams[1].enqueueConvert(curFlowY, curFlowY, curFlowY.depth(), 1./pyrScale);
curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale, streams[0]);
curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale, streams[1]);
}
GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
@ -343,7 +343,7 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
{
cudev::optflow_farneback::gaussianBlurGpu(
frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, S(streams[i]));
gpu::resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR, streams[i]);
gpu::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]);
cudev::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN, R[i], S(streams[i]));
}
}

@ -51,7 +51,7 @@
#include "opencv2/gpuimgproc.hpp"
#include "opencv2/video.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "opencv2/opencv_modules.hpp"

@ -80,7 +80,7 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
brox(loadMat(frame0), loadMat(frame1), u, v);
std::string fname(cvtest::TS::ptr()->get_data_path());
if (devInfo.majorVersion() >= 2)
if (devInfo.major() >= 2)
fname += "opticalflow/brox_optical_flow_cc20.bin";
else
fname += "opticalflow/brox_optical_flow.bin";

@ -47,7 +47,7 @@
# error gpustereo.hpp header must be compiled as C++
#endif
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
namespace cv { namespace gpu {

@ -113,10 +113,7 @@ namespace
if (&dst != &disp)
{
if (stream)
stream.enqueueCopy(disp, dst);
else
disp.copyTo(dst);
disp.copyTo(dst, stream);
}
disp_bilateral_filter<T>(dst, img, img.channels(), iters, StreamAccessor::getStream(stream));

@ -47,6 +47,6 @@
#include "opencv2/gpustereo.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#endif /* __OPENCV_PRECOMP_H__ */

@ -91,7 +91,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
DeviceInfo device_info;
if (device_info.majorVersion() > 1 || device_info.multiProcessorCount() > 16)
if (device_info.major() > 1 || device_info.multiProcessorCount() > 16)
return true;
return false;

@ -194,20 +194,10 @@ namespace
if (rthis.levels & 1)
{
//can clear less area
if (stream)
{
stream.enqueueMemSet(u, zero);
stream.enqueueMemSet(d, zero);
stream.enqueueMemSet(l, zero);
stream.enqueueMemSet(r, zero);
}
else
{
u.setTo(zero);
d.setTo(zero);
l.setTo(zero);
r.setTo(zero);
}
u.setTo(zero, stream);
d.setTo(zero, stream);
l.setTo(zero, stream);
r.setTo(zero, stream);
}
if (rthis.levels > 1)
@ -222,20 +212,10 @@ namespace
if ((rthis.levels & 1) == 0)
{
if (stream)
{
stream.enqueueMemSet(u2, zero);
stream.enqueueMemSet(d2, zero);
stream.enqueueMemSet(l2, zero);
stream.enqueueMemSet(r2, zero);
}
else
{
u2.setTo(zero);
d2.setTo(zero);
l2.setTo(zero);
r2.setTo(zero);
}
u2.setTo(zero, stream);
d2.setTo(zero, stream);
l2.setTo(zero, stream);
r2.setTo(zero, stream);
}
}
@ -313,20 +293,12 @@ namespace
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
if (stream)
stream.enqueueMemSet(out, zero);
else
out.setTo(zero);
out.setTo(zero, stream);
output_callers[funcIdx](u, d, l, r, datas.front(), out, cudaStream);
if (disp.type() != CV_16S)
{
if (stream)
stream.enqueueConvert(out, disp, disp.type());
else
out.convertTo(disp, disp.type());
}
out.convertTo(disp, disp.type(), stream);
}
StereoBeliefPropagation& rthis;

@ -213,36 +213,18 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& te
load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight, rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
if (stream)
{
stream.enqueueMemSet(l[0], zero);
stream.enqueueMemSet(d[0], zero);
stream.enqueueMemSet(r[0], zero);
stream.enqueueMemSet(u[0], zero);
stream.enqueueMemSet(l[1], zero);
stream.enqueueMemSet(d[1], zero);
stream.enqueueMemSet(r[1], zero);
stream.enqueueMemSet(u[1], zero);
stream.enqueueMemSet(data_cost, zero);
stream.enqueueMemSet(data_cost_selected, zero);
}
else
{
l[0].setTo(zero);
d[0].setTo(zero);
r[0].setTo(zero);
u[0].setTo(zero);
l[1].setTo(zero);
d[1].setTo(zero);
r[1].setTo(zero);
u[1].setTo(zero);
data_cost.setTo(zero);
data_cost_selected.setTo(zero);
}
l[0].setTo(zero, stream);
d[0].setTo(zero, stream);
r[0].setTo(zero, stream);
u[0].setTo(zero, stream);
l[1].setTo(zero, stream);
d[1].setTo(zero, stream);
r[1].setTo(zero, stream);
u[1].setTo(zero, stream);
data_cost.setTo(zero, stream);
data_cost_selected.setTo(zero, stream);
int cur_idx = 0;
@ -279,20 +261,14 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& te
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
if (stream)
stream.enqueueMemSet(out, zero);
else
out.setTo(zero);
out.setTo(zero, stream);
compute_disp(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), elem_step, out, nr_plane_pyr[0], cudaStream);
if (disp.type() != CV_16S)
{
if (stream)
stream.enqueueConvert(out, disp, disp.type());
else
out.convertTo(disp, disp.type());
out.convertTo(disp, disp.type(), stream);
}
}

@ -47,7 +47,7 @@
# error gpuwarping.hpp header must be compiled as C++
#endif
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/imgproc.hpp"
namespace cv { namespace gpu {

@ -45,7 +45,7 @@
#include "opencv2/gpuwarping.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "opencv2/opencv_modules.hpp"

@ -184,10 +184,7 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream
if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
{
if (stream)
stream.enqueueCopy(layer0_, outImg);
else
layer0_.copyTo(outImg);
layer0_.copyTo(outImg, stream);
}
float lastScale = 1.0f;
@ -202,10 +199,7 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream
if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows)
{
if (stream)
stream.enqueueCopy(curLayer, outImg);
else
curLayer.copyTo(outImg);
curLayer.copyTo(outImg, stream);
}
if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows)

@ -77,10 +77,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
if (dsize == src.size())
{
if (s)
s.enqueueCopy(src, dst);
else
src.copyTo(dst);
src.copyTo(dst, s);
return;
}

@ -148,6 +148,8 @@ CV_EXPORTS_W void setTrackbarPos(const String& trackbarname, const String& winna
// OpenGL support
CV_EXPORTS void imshow(const String& winname, const ogl::Texture2D& tex);
CV_EXPORTS void setOpenGlDrawCallback(const String& winname, OpenGlDrawCallback onOpenGlDraw, void* userdata = 0);
CV_EXPORTS void setOpenGlContext(const String& winname);

@ -281,39 +281,64 @@ void cv::imshow( const String& winname, InputArray _img )
setOpenGlContext(winname);
if (_img.kind() == _InputArray::OPENGL_TEXTURE)
{
cv::ogl::Texture2D& tex = wndTexs[winname];
cv::ogl::Texture2D& tex = ownWndTexs[winname];
tex = _img.getOGlTexture2D();
if (_img.kind() == _InputArray::GPU_MAT)
{
cv::ogl::Buffer& buf = ownWndBufs[winname];
buf.copyFrom(_img);
buf.setAutoRelease(false);
tex.copyFrom(buf);
tex.setAutoRelease(false);
setOpenGlDrawCallback(winname, glDrawTextureCallback, &tex);
}
else
{
cv::ogl::Texture2D& tex = ownWndTexs[winname];
if (_img.kind() == _InputArray::GPU_MAT)
{
cv::ogl::Buffer& buf = ownWndBufs[winname];
buf.copyFrom(_img);
buf.setAutoRelease(false);
tex.copyFrom(buf);
tex.setAutoRelease(false);
}
else
{
tex.copyFrom(_img);
}
tex.copyFrom(_img);
}
tex.setAutoRelease(false);
tex.setAutoRelease(false);
setOpenGlDrawCallback(winname, glDrawTextureCallback, &tex);
updateWindow(winname);
}
#endif
}
void cv::imshow(const String& winname, const ogl::Texture2D& _tex)
{
#ifndef HAVE_OPENGL
(void) winname;
(void) _tex;
CV_Error(cv::Error::OpenGlNotSupported, "The library is compiled without OpenGL support");
#else
const double useGl = getWindowProperty(winname, WND_PROP_OPENGL);
setOpenGlDrawCallback(winname, glDrawTextureCallback, &tex);
if (useGl <= 0)
{
CV_Error(cv::Error::OpenGlNotSupported, "The window was created without OpenGL context");
}
else
{
const double autoSize = getWindowProperty(winname, WND_PROP_AUTOSIZE);
if (autoSize > 0)
{
Size size = _tex.size();
resizeWindow(winname, size.width, size.height);
}
setOpenGlContext(winname);
cv::ogl::Texture2D& tex = wndTexs[winname];
tex = _tex;
tex.setAutoRelease(false);
setOpenGlDrawCallback(winname, glDrawTextureCallback, &tex);
updateWindow(winname);
}
#endif

@ -43,7 +43,7 @@
#ifndef __OPENCV_NONFREE_GPU_HPP__
#define __OPENCV_NONFREE_GPU_HPP__
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
namespace cv { namespace gpu {

@ -50,7 +50,7 @@
#include "opencv2/core/private.hpp"
#include "opencv2/nonfree/gpu.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "opencv2/opencv_modules.hpp"

@ -43,7 +43,7 @@
#ifndef __OPENCV_PHOTO_GPU_HPP__
#define __OPENCV_PHOTO_GPU_HPP__
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
namespace cv { namespace gpu {

@ -43,7 +43,7 @@
#include "precomp.hpp"
#include "opencv2/photo/gpu.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#include "opencv2/opencv_modules.hpp"

@ -43,9 +43,10 @@
#ifndef __OPENCV_SOFTCASCADE_HPP__
#define __OPENCV_SOFTCASCADE_HPP__
#include <iosfwd>
#include "opencv2/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include <ostream>
#include "opencv2/core/gpu.hpp"
namespace cv { namespace softcascade {
@ -296,4 +297,4 @@ private:
}} // namespace cv { namespace softcascade {
#endif
#endif

@ -40,7 +40,7 @@
//
//M*/
#include "opencv2/core/cuda_devptrs.hpp"
#include "opencv2/core/gpu_types.hpp"
#include "opencv2/core/cuda/common.hpp"
namespace cv { namespace softcascade { namespace cudev

@ -44,7 +44,7 @@
#ifndef __OPENCV_ICF_HPP__
#define __OPENCV_ICF_HPP__
#include "opencv2/core/cuda_devptrs.hpp"
#include "opencv2/core/gpu_types.hpp"
#include "cuda_runtime_api.h"
#if defined __CUDACC__

@ -335,10 +335,7 @@ struct cv::softcascade::SCascade::Fields
void detect(cv::gpu::GpuMat& objects, cv::gpu::Stream& s) const
{
if (s)
s.enqueueMemSet(objects, 0);
else
cudaMemset(objects.data, 0, sizeof(Detection));
objects.setTo(Scalar::all(0), s);
cudaSafeCall( cudaGetLastError());
@ -354,16 +351,8 @@ struct cv::softcascade::SCascade::Fields
cv::gpu::GpuMat ndetections = cv::gpu::GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
if (s)
{
s.enqueueMemSet(overlaps, 0);
s.enqueueMemSet(suppressed, 0);
}
else
{
overlaps.setTo(0);
suppressed.setTo(0);
}
overlaps.setTo(0, s);
suppressed.setTo(0, s);
cudaStream_t stream = cv::gpu::StreamAccessor::getStream(s);
cudev::suppress(objects, overlaps, ndetections, suppressed, stream);
@ -488,18 +477,12 @@ void integral(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& sum, cv::gpu::GpuMat&
cv::softcascade::cudev::shfl_integral(src, buffer, stream);
sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
if (s)
s.enqueueMemSet(sum, cv::Scalar::all(0));
else
sum.setTo(cv::Scalar::all(0));
sum.setTo(cv::Scalar::all(0), s);
cv::gpu::GpuMat inner = sum(cv::Rect(1, 1, src.cols, src.rows));
cv::gpu::GpuMat res = buffer(cv::Rect(0, 0, src.cols, src.rows));
if (s)
s.enqueueCopy(res, inner);
else
res.copyTo(inner);
res.copyTo(inner, s);
}
else {CV_Error(cv::Error::GpuNotSupported, ": CC 3.x required.");}
}
@ -541,10 +524,7 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp
}
else
{
if (s)
s.enqueueCopy(image, flds.hogluv);
else
image.copyTo(flds.hogluv);
image.copyTo(flds.hogluv, s);
}
flds.detect(objects, s);
@ -571,10 +551,7 @@ using cv::gpu::GpuMat;
inline void setZero(cv::gpu::GpuMat& m, cv::gpu::Stream& s)
{
if (s)
s.enqueueMemSet(m, 0);
else
m.setTo(0);
m.setTo(0, s);
}
struct SeparablePreprocessor : public cv::softcascade::ChannelsProcessor

@ -43,11 +43,13 @@
#ifndef __OPENCV_PRECOMP_H__
#define __OPENCV_PRECOMP_H__
#include <iostream>
#include "opencv2/softcascade.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
namespace cv { namespace softcascade { namespace internal
{

@ -41,7 +41,7 @@
//M*/
#include "test_precomp.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#ifdef HAVE_CUDA

@ -43,7 +43,7 @@
#define __OPENCV_SOFTCASCADE_TEST_UTILITY_HPP__
#include "opencv2/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/ts.hpp"
//////////////////////////////////////////////////////////////////////

@ -44,7 +44,7 @@
#define __OPENCV_STITCHING_WARPERS_HPP__
#include "opencv2/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/opencv_modules.hpp"

@ -52,7 +52,7 @@
#define __OPENCV_PERF_PRECOMP_HPP__
#include "opencv2/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/ts.hpp"
#include "opencv2/ts/gpu_perf.hpp"
#include "opencv2/superres.hpp"

@ -57,10 +57,6 @@ Mat cv::superres::arrGetMat(InputArray arr, Mat& buf)
arr.getOGlBuffer().copyTo(buf);
return buf;
case _InputArray::OPENGL_TEXTURE:
arr.getOGlTexture2D().copyTo(buf);
return buf;
default:
return arr.getMat();
}
@ -77,10 +73,6 @@ GpuMat cv::superres::arrGetGpuMat(InputArray arr, GpuMat& buf)
arr.getOGlBuffer().copyTo(buf);
return buf;
case _InputArray::OPENGL_TEXTURE:
arr.getOGlTexture2D().copyTo(buf);
return buf;
default:
buf.upload(arr.getMat());
return buf;
@ -97,10 +89,6 @@ namespace
{
dst.getOGlBufferRef().copyFrom(src);
}
void arr2tex(InputArray src, OutputArray dst)
{
dst.getOGlTexture2D().copyFrom(src);
}
void mat2gpu(InputArray src, OutputArray dst)
{
dst.getGpuMatRef().upload(src.getMat());
@ -109,10 +97,6 @@ namespace
{
src.getOGlBuffer().copyTo(dst);
}
void tex2arr(InputArray src, OutputArray dst)
{
src.getOGlTexture2D().copyTo(dst);
}
void gpu2mat(InputArray src, OutputArray dst)
{
GpuMat d = src.getGpuMat();
@ -132,15 +116,15 @@ void cv::superres::arrCopy(InputArray src, OutputArray dst)
static const func_t funcs[10][10] =
{
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
{0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr},
{0, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr},
{0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, arr2tex, gpu2gpu}
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu},
{0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, 0 /*buf2arr*/, buf2arr},
{0, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/},
{0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, 0 /*arr2tex*/, gpu2gpu}
};
const int src_kind = src.kind() >> _InputArray::KIND_SHIFT;

@ -44,7 +44,7 @@
#define __OPENCV_SUPERRES_INPUT_ARRAY_UTILITY_HPP__
#include "opencv2/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
namespace cv
{

@ -48,14 +48,14 @@
#include "opencv2/opencv_modules.hpp"
#include "opencv2/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/core/opengl.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/video/tracking.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/gpu_private.hpp"
#include "opencv2/core/private.gpu.hpp"
#ifdef HAVE_OPENCV_GPUARITHM
# include "opencv2/gpuarithm.hpp"

@ -45,7 +45,7 @@
#include <stdexcept>
#include "opencv2/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/ts.hpp"

@ -42,7 +42,7 @@
#include "precomp.hpp"
#include "opencv2/ts/gpu_perf.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#ifdef HAVE_CUDA
#include <cuda_runtime.h>
@ -287,8 +287,8 @@ namespace perf
cv::gpu::DeviceInfo info(i);
printf("[----------]\n"), fflush(stdout);
printf("[ DEVICE ] \t# %d %s.\n", i, info.name().c_str()), fflush(stdout);
printf("[ ] \tCompute capability: %d.%d\n", (int)info.majorVersion(), (int)info.minorVersion()), fflush(stdout);
printf("[ DEVICE ] \t# %d %s.\n", i, info.name()), fflush(stdout);
printf("[ ] \tCompute capability: %d.%d\n", (int)info.major(), (int)info.minor()), fflush(stdout);
printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()), fflush(stdout);
printf("[ ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)), fflush(stdout);
printf("[ ] \tFree memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0)), fflush(stdout);

@ -1,7 +1,7 @@
#include "precomp.hpp"
#ifdef HAVE_CUDA
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#endif
#ifdef ANDROID
@ -72,10 +72,6 @@ static void setCurrentThreadAffinityMask(int mask)
}
#endif
#ifdef HAVE_CUDA
# include <opencv2/core/gpumat.hpp>
#endif
namespace {
class PerfEnvironment: public ::testing::Environment
@ -686,13 +682,13 @@ void TestBase::Init(int argc, const char* const argv[])
cv::gpu::DeviceInfo info(param_cuda_device);
if (!info.isCompatible())
{
printf("[----------]\n[ FAILURE ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name().c_str()), fflush(stdout);
printf("[----------]\n[ FAILURE ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name()), fflush(stdout);
exit(-1);
}
cv::gpu::setDevice(param_cuda_device);
printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name().c_str()), fflush(stdout);
printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name()), fflush(stdout);
}
#endif

@ -45,7 +45,7 @@
#include <vector>
#include "opencv2/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/videostab/global_motion.hpp"
#include "opencv2/videostab/log.hpp"

@ -368,8 +368,8 @@ Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b)
gpu::Stream stream;
stream.enqueueConvert(b.gI1, b.t1, CV_32F);
stream.enqueueConvert(b.gI2, b.t2, CV_32F);
b.gI1.convertTo(b.t1, CV_32F, stream);
b.gI2.convertTo(b.t2, CV_32F, stream);
gpu::split(b.t1, b.vI1, stream);
gpu::split(b.t2, b.vI2, stream);
@ -379,16 +379,16 @@ Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b)
for( int i = 0; i < b.gI1.channels(); ++i )
{
gpu::multiply(b.vI2[i], b.vI2[i], b.I2_2, stream); // I2^2
gpu::multiply(b.vI1[i], b.vI1[i], b.I1_2, stream); // I1^2
gpu::multiply(b.vI1[i], b.vI2[i], b.I1_I2, stream); // I1 * I2
gpu::multiply(b.vI2[i], b.vI2[i], b.I2_2, 1, -1, stream); // I2^2
gpu::multiply(b.vI1[i], b.vI1[i], b.I1_2, 1, -1, stream); // I1^2
gpu::multiply(b.vI1[i], b.vI2[i], b.I1_I2, 1, -1, stream); // I1 * I2
gpu::GaussianBlur(b.vI1[i], b.mu1, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);
gpu::GaussianBlur(b.vI2[i], b.mu2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);
gpu::multiply(b.mu1, b.mu1, b.mu1_2, stream);
gpu::multiply(b.mu2, b.mu2, b.mu2_2, stream);
gpu::multiply(b.mu1, b.mu2, b.mu1_mu2, stream);
gpu::multiply(b.mu1, b.mu1, b.mu1_2, 1, -1, stream);
gpu::multiply(b.mu2, b.mu2, b.mu2_2, 1, -1, stream);
gpu::multiply(b.mu1, b.mu2, b.mu1_mu2, 1, -1, stream);
gpu::GaussianBlur(b.I1_2, b.sigma1_2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream);
gpu::subtract(b.sigma1_2, b.mu1_2, b.sigma1_2, gpu::GpuMat(), -1, stream);

@ -82,8 +82,8 @@ int main()
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
<< dev_info.name() << ", CC " << dev_info.major()
<< dev_info.minor() << "\n";
return -1;
}
}

@ -112,8 +112,8 @@ int main(int argc, char** argv)
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
<< dev_info.name() << ", CC " << dev_info.major()
<< dev_info.minor() << "\n";
return -1;
}
}

@ -62,8 +62,8 @@ int main()
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
<< dev_info.name() << ", CC " << dev_info.major()
<< dev_info.minor() << "\n";
return -1;
}
}

@ -25,7 +25,7 @@ int main()
#include "opencv2/core/core.hpp"
#include "opencv2/core/opengl.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#include "opencv2/highgui/highgui.hpp"
using namespace std;

@ -191,7 +191,7 @@ int main(int argc, const char* argv[])
DeviceInfo dev_info(device);
if (!dev_info.isCompatible())
{
cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.major() << '.' << dev_info.minor() << endl;
return -1;
}
setDevice(device);

@ -81,8 +81,8 @@ int main(int argc, char** argv)
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
<< dev_info.name() << ", CC " << dev_info.major()
<< dev_info.minor() << "\n";
return -1;
}
}

Loading…
Cancel
Save