Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compiled code (CUDA kernels). Typically, it is used internally by OpenCV and by users who write device code. You can call its members from both host and device code. ::
PtrStepSz(int rows, int cols, T *data, size_t step);
template <typename U>
explicit DevMem2D_(const DevMem2D_<U>& d);
explicit PtrStepSz(const PtrStepSz<U>& d);
typedef T elem_type;
enum { elem_size = sizeof(elem_type) };
@ -34,25 +34,25 @@ Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compi
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
};
typedef DevMem2D_<unsigned char> DevMem2D;
typedef DevMem2D_<float> DevMem2Df;
typedef DevMem2D_<int> DevMem2Di;
typedef PtrStepSz<unsigned char> PtrStepSzb;
typedef PtrStepSz<float> PtrStepSzf;
typedef PtrStepSz<int> PtrStepSzi;
gpu::PtrStep\_
gpu::PtrStep
--------------
..ocv:class:: gpu::PtrStep\_
..ocv:class:: gpu::PtrStep
Structure similar to :ocv:class:`gpu::DevMem2D_` but containing only a pointer and row step. Width and height fields are excluded due to performance reasons. The structure is intended for internal use or for users who write device code. ::
Structure similar to :ocv:class:`gpu::PtrStepSz` but containing only a pointer and row step. Width and height fields are excluded due to performance reasons. The structure is intended for internal use or for users who write device code. ::
template<typename T> struct PtrStep_
template<typename T> struct PtrStep
{
T* data;
size_t step;
PtrStep_();
PtrStep_(const DevMem2D_<T>& mem);
PtrStep();
PtrStep(const PtrStepSz<T>& mem);
typedef T elem_type;
enum { elem_size = sizeof(elem_type) };
@ -62,25 +62,9 @@ Structure similar to :ocv:class:`gpu::DevMem2D_` but containing only a pointer a
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
};
typedef PtrStep_<unsigned char> PtrStep;
typedef PtrStep_<float> PtrStepf;
typedef PtrStep_<int> PtrStepi;
gpu::PtrElemStep\_
------------------
..ocv:class:: gpu::PtrElemStep\_
Structure similar to :ocv:class:`gpu::DevMem2D_` but containing only a pointer and a row step in elements. Width and height fields are excluded due to performance reasons. This class can only be constructed if ``sizeof(T)`` is a multiple of 256. The structure is intended for internal use or for users who write device code. ::
template<typename T> struct PtrElemStep_ : public PtrStep_<T>
{
PtrElemStep_(const DevMem2D_<T>& mem);
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0);
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
};
typedef PtrStep<unsigned char> PtrStep;
typedef PtrStep<float> PtrStepf;
typedef PtrStep<int> PtrStepi;
gpu::GpuMat
@ -93,7 +77,7 @@ Base storage class for GPU memory with reference counting. Its interface matches
* no functions that return references to their data (because references on GPU are not valid for CPU)
* no expression templates technique support
Beware that the latter limitation may lead to overloaded matrix operators that cause memory allocations. The ``GpuMat`` class is convertible to :ocv:class:`gpu::DevMem2D_` and :ocv:class:`gpu::PtrStep_` so it can be passed directly to the kernel.
Beware that the latter limitation may lead to overloaded matrix operators that cause memory allocations. The ``GpuMat`` class is convertible to :ocv:class:`gpu::PtrStepSz` and :ocv:class:`gpu::PtrStep` so it can be passed directly to the kernel.
..note:: In contrast with :ocv:class:`Mat`, in most cases ``GpuMat::isContinuous() == false`` . This means that rows are aligned to a size depending on the hardware. Single-row ``GpuMat`` is always a continuous matrix.
@ -113,10 +97,10 @@ Beware that the latter limitation may lead to overloaded matrix operators that c
//! builds GpuMat from Mat. Blocks uploading to device.
explicit GpuMat (const Mat& m);
//! returns lightweight DevMem2D_ structure for passing
//! returns lightweight PtrStepSz structure for passing
//to nvcc-compiled code. Contains size, data ptr and step.
template void linearColumnFilter_gpu<float , uchar >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, uchar4>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, short3>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , int >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , uchar >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , int >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
void filter2D_gpu(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst,
void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
int borderMode, const float* borderValue, cudaStream_t stream)
{
typedef void (*func_t)(const DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<uchar, uchar>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<uchar4, uchar4>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<ushort, ushort>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<ushort4, ushort4>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<float, float>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<float4, float4>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<uchar, uchar>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<uchar4, uchar4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<ushort, ushort>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<float, float>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<float4, float4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result)
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream)
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream)
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, int cn,
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, int cn,
cudaStream_t stream)
{
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream);
typedef void (*caller_t)(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
DevMem2Df result, int cn, cudaStream_t stream)
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
PtrStepSzf result, int cn, cudaStream_t stream)
{
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream);
typedef void (*caller_t)(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream);
template void linearRowFilter_gpu<uchar , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<uchar4, float4>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<short3, float3>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<int , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<float , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<uchar , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<uchar4, float4>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<short3, float3>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<int , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
{add_gpu<unsignedchar,unsignedchar>,0/*add_gpu<unsigned char, signed char>*/,add_gpu<unsignedchar,unsignedshort>,add_gpu<unsignedchar,short>,add_gpu<unsignedchar,int>,add_gpu<unsignedchar,float>,add_gpu<unsignedchar,double>},
{add_gpu<unsignedchar,unsignedchar>,0/*add_gpu<unsigned char, signed char>*/,add_gpu<unsignedchar,unsignedshort>,add_gpu<unsignedchar,short>,add_gpu<unsignedchar,int>,add_gpu<unsignedchar,float>,add_gpu<unsignedchar,double>},
{subtract_gpu<unsignedchar,unsignedchar>,0/*subtract_gpu<unsigned char, signed char>*/,subtract_gpu<unsignedchar,unsignedshort>,subtract_gpu<unsignedchar,short>,subtract_gpu<unsignedchar,int>,subtract_gpu<unsignedchar,float>,subtract_gpu<unsignedchar,double>},
{subtract_gpu<unsignedchar,unsignedchar>,0/*subtract_gpu<unsigned char, signed char>*/,subtract_gpu<unsignedchar,unsignedshort>,subtract_gpu<unsignedchar,short>,subtract_gpu<unsignedchar,int>,subtract_gpu<unsignedchar,float>,subtract_gpu<unsignedchar,double>},
{multiply_gpu<unsignedchar,unsignedchar>,0/*multiply_gpu<unsigned char, signed char>*/,multiply_gpu<unsignedchar,unsignedshort>,multiply_gpu<unsignedchar,short>,multiply_gpu<unsignedchar,int>,multiply_gpu<unsignedchar,float>,multiply_gpu<unsignedchar,double>},
{multiply_gpu<unsignedchar,unsignedchar>,0/*multiply_gpu<unsigned char, signed char>*/,multiply_gpu<unsignedchar,unsignedshort>,multiply_gpu<unsignedchar,short>,multiply_gpu<unsignedchar,int>,multiply_gpu<unsignedchar,float>,multiply_gpu<unsignedchar,double>},
{divide_gpu<unsignedchar,unsignedchar>,0/*divide_gpu<unsigned char, signed char>*/,divide_gpu<unsignedchar,unsignedshort>,divide_gpu<unsignedchar,short>,divide_gpu<unsignedchar,int>,divide_gpu<unsignedchar,float>,divide_gpu<unsignedchar,double>},
{divide_gpu<unsignedchar,unsignedchar>,0/*divide_gpu<unsigned char, signed char>*/,divide_gpu<unsignedchar,unsignedshort>,divide_gpu<unsignedchar,short>,divide_gpu<unsignedchar,int>,divide_gpu<unsignedchar,float>,divide_gpu<unsignedchar,double>},
{divide_gpu<unsignedchar,unsignedchar>,0/*divide_gpu<unsigned char, signed char>*/,divide_gpu<unsignedchar,unsignedshort>,divide_gpu<unsignedchar,short>,divide_gpu<unsignedchar,int>,divide_gpu<unsignedchar,float>,divide_gpu<unsignedchar,double>},
@ -955,10 +955,10 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St