diff --git a/modules/core/include/opencv2/core/devmem2d.hpp b/modules/core/include/opencv2/core/cuda_devptrs.hpp similarity index 77% rename from modules/core/include/opencv2/core/devmem2d.hpp rename to modules/core/include/opencv2/core/cuda_devptrs.hpp index 276aeb2331..257f5960e5 100644 --- a/modules/core/include/opencv2/core/devmem2d.hpp +++ b/modules/core/include/opencv2/core/cuda_devptrs.hpp @@ -1,161 +1,160 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other GpuMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_CORE_DevMem2D_HPP__ -#define __OPENCV_CORE_DevMem2D_HPP__ - -#ifdef __cplusplus - -#ifdef __CUDACC__ - #define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__ -#else - #define __CV_GPU_HOST_DEVICE__ -#endif - -namespace cv -{ - namespace gpu - { - // Simple lightweight structures that encapsulates information about an image on device. - // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile - - template struct StaticAssert; - template <> struct StaticAssert {static __CV_GPU_HOST_DEVICE__ void check(){}}; - - template struct DevPtr - { - typedef T elem_type; - typedef int index_type; - - enum { elem_size = sizeof(elem_type) }; - - T* data; - - __CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {} - __CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {} - - __CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; } - __CV_GPU_HOST_DEVICE__ operator T*() { return data; } - __CV_GPU_HOST_DEVICE__ operator const T*() const { return data; } - }; - - template struct PtrSz : public DevPtr - { - __CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {} - __CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr(data_), size(size_) {} - - size_t size; - }; - - template struct PtrStep : public DevPtr - { - __CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {} - __CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr(data_), step(step_) {} - - /** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */ - size_t step; - - __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr::data + y * step); } - __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr::data + y * step); } - - __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; } - __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } - }; - - template struct PtrStepSz : public PtrStep - { - __CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {} - __CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_) - : PtrStep(data_, step_), cols(cols_), rows(rows_) {} - - int cols; - int rows; - }; - - template struct DevMem2D_ : public PtrStepSz - { - DevMem2D_() {} - DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz(rows_, cols_, data_, step_) {} - - template - explicit DevMem2D_(const DevMem2D_& d) : PtrStepSz(d.rows, d.cols, (T*)d.data, d.step) {} - }; - - template struct PtrElemStep_ : public PtrStep - { - PtrElemStep_(const DevMem2D_& mem) : PtrStep(mem.data, mem.step) - { - StaticAssert<256 % sizeof(T) == 0>::check(); - - PtrStep::step /= PtrStep::elem_size; - } - __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep::data + y * PtrStep::step; } - __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep::data + y * PtrStep::step; } - - __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; } - __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } - }; - - template struct PtrStep_ : public PtrStep - { - PtrStep_() {} - PtrStep_(const DevMem2D_& mem) : PtrStep(mem.data, mem.step) {} - }; - - typedef DevMem2D_ DevMem2Db; - typedef DevMem2Db DevMem2D; - typedef DevMem2D_ DevMem2Df; - typedef DevMem2D_ DevMem2Di; - - typedef PtrStep PtrStepb; - typedef PtrStep PtrStepf; - typedef PtrStep PtrStepi; - - typedef PtrElemStep_ PtrElemStep; - typedef PtrElemStep_ PtrElemStepf; - typedef PtrElemStep_ PtrElemStepi; - } -} - -#endif // __cplusplus - -#endif /* __OPENCV_GPU_DevMem2D_HPP__ */ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_CORE_DEVPTRS_HPP__ +#define __OPENCV_CORE_DEVPTRS_HPP__ + +#ifdef __cplusplus + +#ifdef __CUDACC__ + #define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__ +#else + #define __CV_GPU_HOST_DEVICE__ +#endif + +namespace cv +{ + namespace gpu + { + // Simple lightweight structures that encapsulates information about an image on device. + // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile + + template struct StaticAssert; + template <> struct StaticAssert {static __CV_GPU_HOST_DEVICE__ void check(){}}; + + template struct DevPtr + { + typedef T elem_type; + typedef int index_type; + + enum { elem_size = sizeof(elem_type) }; + + T* data; + + __CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {} + __CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {} + + __CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; } + __CV_GPU_HOST_DEVICE__ operator T*() { return data; } + __CV_GPU_HOST_DEVICE__ operator const T*() const { return data; } + }; + + template struct PtrSz : public DevPtr + { + __CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {} + __CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr(data_), size(size_) {} + + size_t size; + }; + + template struct PtrStep : public DevPtr + { + __CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {} + __CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr(data_), step(step_) {} + + /** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */ + size_t step; + + __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr::data + y * step); } + __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr::data + y * step); } + + __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; } + __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } + }; + + template struct PtrStepSz : public PtrStep + { + __CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {} + __CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_) + : PtrStep(data_, step_), cols(cols_), rows(rows_) {} + + template + explicit PtrStepSz(const PtrStepSz& d) : PtrStep((T*)d.data, d.step), cols(d.cols), rows(d.rows){} + + int cols; + int rows; + }; + + typedef PtrStepSz PtrStepSzb; + typedef PtrStepSz PtrStepSzf; + typedef PtrStepSz PtrStepSzi; + + typedef PtrStep PtrStepb; + typedef PtrStep PtrStepf; + typedef PtrStep PtrStepi; + + +#if defined __GNUC__ + #define __CV_GPU_DEPR_BEFORE__ + #define __CV_GPU_DEPR_AFTER__ __attribute__ ((deprecated)) +#elif defined(__MSVC__) //|| defined(__CUDACC__) + #pragma deprecated(DevMem2D_) + #define __CV_GPU_DEPR_BEFORE__ __declspec(deprecated) + #define __CV_GPU_DEPR_AFTER__ +#else + #define __CV_GPU_DEPR_BEFORE__ + #define __CV_GPU_DEPR_AFTER__ +#endif + + template struct __CV_GPU_DEPR_BEFORE__ DevMem2D_ : public PtrStepSz + { + DevMem2D_() {} + DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz(rows_, cols_, data_, step_) {} + + template + explicit __CV_GPU_DEPR_BEFORE__ DevMem2D_(const DevMem2D_& d) : PtrStepSz(d.rows, d.cols, (T*)d.data, d.step) {} + } __CV_GPU_DEPR_AFTER__ ; + + typedef DevMem2D_ DevMem2Db; + typedef DevMem2Db DevMem2D; + typedef DevMem2D_ DevMem2Df; + typedef DevMem2D_ DevMem2Di; + +//#undef __CV_GPU_DEPR_BEFORE__ +//#undef __CV_GPU_DEPR_AFTER__ + + } +} + +#endif // __cplusplus + +#endif /* __OPENCV_CORE_DEVPTRS_HPP__ */ diff --git a/modules/core/include/opencv2/core/gpumat.hpp b/modules/core/include/opencv2/core/gpumat.hpp index e09f1bc4c3..c0ae17b268 100644 --- a/modules/core/include/opencv2/core/gpumat.hpp +++ b/modules/core/include/opencv2/core/gpumat.hpp @@ -46,7 +46,7 @@ #ifdef __cplusplus #include "opencv2/core/core.hpp" -#include "opencv2/core/devmem2d.hpp" +#include "opencv2/core/cuda_devptrs.hpp" namespace cv { namespace gpu { @@ -268,10 +268,14 @@ namespace cv { namespace gpu template _Tp* ptr(int y = 0); template const _Tp* ptr(int y = 0) const; - template operator DevMem2D_<_Tp>() const; - template operator PtrStep_<_Tp>() const; + template operator PtrStepSz<_Tp>() const; template operator PtrStep<_Tp>() const; + // Deprecated function + __CV_GPU_DEPR_BEFORE__ template operator DevMem2D_<_Tp>() const __CV_GPU_DEPR_AFTER__; + #undef __CV_GPU_DEPR_BEFORE__ + #undef __CV_GPU_DEPR_AFTER__ + /*! includes several bit-fields: - the magic signature - continuity flag @@ -502,19 +506,19 @@ namespace cv { namespace gpu return *this; } - template inline GpuMat::operator DevMem2D_() const + template inline GpuMat::operator PtrStepSz() const { - return DevMem2D_(rows, cols, (T*)data, step); + return PtrStepSz(rows, cols, (T*)data, step); } - template inline GpuMat::operator PtrStep_() const + template inline GpuMat::operator PtrStep() const { - return PtrStep_(static_cast< DevMem2D_ >(*this)); + return PtrStep((T*)data, step); } - template inline GpuMat::operator PtrStep() const + template inline GpuMat::operator DevMem2D_() const { - return PtrStep((T*)data, step); + return DevMem2D_(rows, cols, (T*)data, step); } inline GpuMat createContinuous(int rows, int cols, int type) diff --git a/modules/core/src/cuda/matrix_operations.cu b/modules/core/src/cuda/matrix_operations.cu index 671fa5fc31..f475136c87 100644 --- a/modules/core/src/cuda/matrix_operations.cu +++ b/modules/core/src/cuda/matrix_operations.cu @@ -44,6 +44,18 @@ #include "opencv2/gpu/device/transform.hpp" #include "opencv2/gpu/device/functional.hpp" +namespace cv { namespace gpu { namespace device +{ + void writeScalar(const uchar*); + void writeScalar(const schar*); + void writeScalar(const ushort*); + void writeScalar(const short int*); + void writeScalar(const int*); + void writeScalar(const float*); + void writeScalar(const double*); + void convert_gpu(PtrStepSzb, int, PtrStepSzb, int, double, double, cudaStream_t); +}}} + namespace cv { namespace gpu { namespace device { template struct shift_and_sizeof; @@ -59,17 +71,17 @@ namespace cv { namespace gpu { namespace device ////////////////////////////////// CopyTo ///////////////////////////////// /////////////////////////////////////////////////////////////////////////// - template void copyToWithMask(DevMem2Db src, DevMem2Db dst, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream) + template void copyToWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream) { if (colorMask) - cv::gpu::device::transform((DevMem2D_)src, (DevMem2D_)dst, identity(), SingleMask(mask), stream); + cv::gpu::device::transform((PtrStepSz)src, (PtrStepSz)dst, identity(), SingleMask(mask), stream); else - cv::gpu::device::transform((DevMem2D_)src, (DevMem2D_)dst, identity(), SingleMaskChannels(mask, cn), stream); + cv::gpu::device::transform((PtrStepSz)src, (PtrStepSz)dst, identity(), SingleMaskChannels(mask, cn), stream); } - void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, size_t elemSize1, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream) + void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream) { - typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream); + typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream); static func_t tab[] = { @@ -164,7 +176,7 @@ namespace cv { namespace gpu { namespace device } } template - void set_to_gpu(DevMem2Db mat, const T* scalar, DevMem2Db mask, int channels, cudaStream_t stream) + void set_to_gpu(PtrStepSzb mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream) { writeScalar(scalar); @@ -178,16 +190,16 @@ namespace cv { namespace gpu { namespace device cudaSafeCall ( cudaDeviceSynchronize() ); } - template void set_to_gpu(DevMem2Db mat, const uchar* scalar, DevMem2Db mask, int channels, cudaStream_t stream); - template void set_to_gpu(DevMem2Db mat, const schar* scalar, DevMem2Db mask, int channels, cudaStream_t stream); - template void set_to_gpu(DevMem2Db mat, const ushort* scalar, DevMem2Db mask, int channels, cudaStream_t stream); - template void set_to_gpu(DevMem2Db mat, const short* scalar, DevMem2Db mask, int channels, cudaStream_t stream); - template void set_to_gpu(DevMem2Db mat, const int* scalar, DevMem2Db mask, int channels, cudaStream_t stream); - template void set_to_gpu(DevMem2Db mat, const float* scalar, DevMem2Db mask, int channels, cudaStream_t stream); - template void set_to_gpu(DevMem2Db mat, const double* scalar, DevMem2Db mask, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const schar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const ushort* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const short* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const int* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const float* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); template - void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream) + void set_to_gpu(PtrStepSzb mat, const T* scalar, int channels, cudaStream_t stream) { writeScalar(scalar); @@ -201,13 +213,13 @@ namespace cv { namespace gpu { namespace device cudaSafeCall ( cudaDeviceSynchronize() ); } - template void set_to_gpu(DevMem2Db mat, const uchar* scalar, int channels, cudaStream_t stream); - template void set_to_gpu(DevMem2Db mat, const schar* scalar, int channels, cudaStream_t stream); - template void set_to_gpu(DevMem2Db mat, const ushort* scalar, int channels, cudaStream_t stream); - template void set_to_gpu(DevMem2Db mat, const short* scalar, int channels, cudaStream_t stream); - template void set_to_gpu(DevMem2Db mat, const int* scalar, int channels, cudaStream_t stream); - template void set_to_gpu(DevMem2Db mat, const float* scalar, int channels, cudaStream_t stream); - template void set_to_gpu(DevMem2Db mat, const double* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const schar* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const ushort* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const short* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const int* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const float* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(PtrStepSzb mat, const double* scalar, int channels, cudaStream_t stream); /////////////////////////////////////////////////////////////////////////// //////////////////////////////// ConvertTo //////////////////////////////// @@ -274,12 +286,12 @@ namespace cv { namespace gpu { namespace device }; template - void cvt_(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream) + void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream) { cudaSafeCall( cudaSetDoubleForDevice(&alpha) ); cudaSafeCall( cudaSetDoubleForDevice(&beta) ); Convertor op(alpha, beta); - cv::gpu::device::transform((DevMem2D_)src, (DevMem2D_)dst, op, WithOutMask(), stream); + cv::gpu::device::transform((PtrStepSz)src, (PtrStepSz)dst, op, WithOutMask(), stream); } #if defined __clang__ @@ -287,9 +299,9 @@ namespace cv { namespace gpu { namespace device # pragma clang diagnostic ignored "-Wmissing-declarations" #endif - void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream) + void convert_gpu(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream) { - typedef void (*caller_t)(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream); + typedef void (*caller_t)(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream); static const caller_t tab[8][8] = { diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp index 105a5ff15f..07d7363868 100644 --- a/modules/core/src/gpumat.cpp +++ b/modules/core/src/gpumat.cpp @@ -761,15 +761,15 @@ namespace namespace cv { namespace gpu { namespace device { - void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, size_t elemSize1, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream); + void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream); template - void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream); + void set_to_gpu(PtrStepSzb mat, const T* scalar, int channels, cudaStream_t stream); template - void set_to_gpu(DevMem2Db mat, const T* scalar, DevMem2Db mask, int channels, cudaStream_t stream); + void set_to_gpu(PtrStepSzb mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); - void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream); + void convert_gpu(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream); }}} namespace @@ -787,9 +787,22 @@ namespace } } + +namespace cv { namespace gpu +{ + CV_EXPORTS void copyWithMask(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, const cv::gpu::GpuMat&, CUstream_st*); + CV_EXPORTS void convertTo(const cv::gpu::GpuMat&, cv::gpu::GpuMat&); + CV_EXPORTS void convertTo(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, double, double, CUstream_st*); + CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar, CUstream_st*); + CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar, const cv::gpu::GpuMat&, CUstream_st*); + CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar); + CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar, const cv::gpu::GpuMat&); +}} + + namespace cv { namespace gpu { - CV_EXPORTS void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0) + void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0) { CV_Assert(src.size() == dst.size() && src.type() == dst.type()); CV_Assert(src.size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == src.channels())); @@ -797,17 +810,17 @@ namespace cv { namespace gpu cv::gpu::device::copyToWithMask_gpu(src.reshape(1), dst.reshape(1), src.elemSize1(), src.channels(), mask.reshape(1), mask.channels() != 1, stream); } - CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst) + void convertTo(const GpuMat& src, GpuMat& dst) { cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0); } - CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0) + void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0) { cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream); } - CV_EXPORTS void setTo(GpuMat& src, Scalar s, cudaStream_t stream) + void setTo(GpuMat& src, Scalar s, cudaStream_t stream) { typedef void (*caller_t)(GpuMat& src, Scalar s, cudaStream_t stream); @@ -820,7 +833,7 @@ namespace cv { namespace gpu callers[src.depth()](src, s, stream); } - CV_EXPORTS void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream) + void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream) { typedef void (*caller_t)(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream); @@ -833,12 +846,12 @@ namespace cv { namespace gpu callers[src.depth()](src, s, mask, stream); } - CV_EXPORTS void setTo(GpuMat& src, Scalar s) + void setTo(GpuMat& src, Scalar s) { setTo(src, s, 0); } - CV_EXPORTS void setTo(GpuMat& src, Scalar s, const GpuMat& mask) + void setTo(GpuMat& src, Scalar s, const GpuMat& mask) { setTo(src, s, mask, 0); } diff --git a/modules/gpu/doc/data_structures.rst b/modules/gpu/doc/data_structures.rst index 3152c510b4..68e702a793 100644 --- a/modules/gpu/doc/data_structures.rst +++ b/modules/gpu/doc/data_structures.rst @@ -5,24 +5,24 @@ Data Structures -gpu::DevMem2D\_ +gpu::PtrStepSz --------------- -.. ocv:class:: gpu::DevMem2D\_ +.. ocv:class:: gpu::PtrStepSz Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compiled code (CUDA kernels). Typically, it is used internally by OpenCV and by users who write device code. You can call its members from both host and device code. :: - template struct DevMem2D_ + template struct PtrStepSz { int cols; int rows; T* data; size_t step; - DevMem2D_() : cols(0), rows(0), data(0), step(0){}; - DevMem2D_(int rows, int cols, T *data, size_t step); + PtrStepSz() : cols(0), rows(0), data(0), step(0){}; + PtrStepSz(int rows, int cols, T *data, size_t step); template - explicit DevMem2D_(const DevMem2D_& d); + explicit PtrStepSz(const PtrStepSz& d); typedef T elem_type; enum { elem_size = sizeof(elem_type) }; @@ -34,25 +34,25 @@ Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compi __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const; }; - typedef DevMem2D_ DevMem2D; - typedef DevMem2D_ DevMem2Df; - typedef DevMem2D_ DevMem2Di; + typedef PtrStepSz PtrStepSzb; + typedef PtrStepSz PtrStepSzf; + typedef PtrStepSz PtrStepSzi; -gpu::PtrStep\_ +gpu::PtrStep -------------- -.. ocv:class:: gpu::PtrStep\_ +.. ocv:class:: gpu::PtrStep -Structure similar to :ocv:class:`gpu::DevMem2D_` but containing only a pointer and row step. Width and height fields are excluded due to performance reasons. The structure is intended for internal use or for users who write device code. :: +Structure similar to :ocv:class:`gpu::PtrStepSz` but containing only a pointer and row step. Width and height fields are excluded due to performance reasons. The structure is intended for internal use or for users who write device code. :: - template struct PtrStep_ + template struct PtrStep { T* data; size_t step; - PtrStep_(); - PtrStep_(const DevMem2D_& mem); + PtrStep(); + PtrStep(const PtrStepSz& mem); typedef T elem_type; enum { elem_size = sizeof(elem_type) }; @@ -62,25 +62,9 @@ Structure similar to :ocv:class:`gpu::DevMem2D_` but containing only a pointer a __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const; }; - typedef PtrStep_ PtrStep; - typedef PtrStep_ PtrStepf; - typedef PtrStep_ PtrStepi; - - - -gpu::PtrElemStep\_ ------------------- -.. ocv:class:: gpu::PtrElemStep\_ - -Structure similar to :ocv:class:`gpu::DevMem2D_` but containing only a pointer and a row step in elements. Width and height fields are excluded due to performance reasons. This class can only be constructed if ``sizeof(T)`` is a multiple of 256. The structure is intended for internal use or for users who write device code. :: - - template struct PtrElemStep_ : public PtrStep_ - { - PtrElemStep_(const DevMem2D_& mem); - __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0); - __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const; - }; - + typedef PtrStep PtrStep; + typedef PtrStep PtrStepf; + typedef PtrStep PtrStepi; gpu::GpuMat @@ -93,7 +77,7 @@ Base storage class for GPU memory with reference counting. Its interface matches * no functions that return references to their data (because references on GPU are not valid for CPU) * no expression templates technique support -Beware that the latter limitation may lead to overloaded matrix operators that cause memory allocations. The ``GpuMat`` class is convertible to :ocv:class:`gpu::DevMem2D_` and :ocv:class:`gpu::PtrStep_` so it can be passed directly to the kernel. +Beware that the latter limitation may lead to overloaded matrix operators that cause memory allocations. The ``GpuMat`` class is convertible to :ocv:class:`gpu::PtrStepSz` and :ocv:class:`gpu::PtrStep` so it can be passed directly to the kernel. .. note:: In contrast with :ocv:class:`Mat`, in most cases ``GpuMat::isContinuous() == false`` . This means that rows are aligned to a size depending on the hardware. Single-row ``GpuMat`` is always a continuous matrix. @@ -113,10 +97,10 @@ Beware that the latter limitation may lead to overloaded matrix operators that c //! builds GpuMat from Mat. Blocks uploading to device. explicit GpuMat (const Mat& m); - //! returns lightweight DevMem2D_ structure for passing + //! returns lightweight PtrStepSz structure for passing //to nvcc-compiled code. Contains size, data ptr and step. - template operator DevMem2D_() const; - template operator PtrStep_() const; + template operator PtrStepSz() const; + template operator PtrStep() const; //! blocks uploading data to GpuMat. void upload(const cv::Mat& m); diff --git a/modules/gpu/include/opencv2/gpu/devmem2d.hpp b/modules/gpu/include/opencv2/gpu/devmem2d.hpp index 33af66afa1..6d4bc50557 100644 --- a/modules/gpu/include/opencv2/gpu/devmem2d.hpp +++ b/modules/gpu/include/opencv2/gpu/devmem2d.hpp @@ -40,4 +40,4 @@ // //M*/ -#include "opencv2/core/devmem2d.hpp" +#include "opencv2/core/cuda_devptrs.hpp" diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp index 45196a522b..67f3936f99 100644 --- a/modules/gpu/src/arithm.cpp +++ b/modules/gpu/src/arithm.cpp @@ -454,8 +454,8 @@ namespace cv { namespace gpu { namespace device { namespace mathfunc { - void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream); - void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream); + void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream); + void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream); } }}} diff --git a/modules/gpu/src/bgfg_gmg.cpp b/modules/gpu/src/bgfg_gmg.cpp index 6e0ed9e631..f4133f4ff5 100644 --- a/modules/gpu/src/bgfg_gmg.cpp +++ b/modules/gpu/src/bgfg_gmg.cpp @@ -58,7 +58,7 @@ namespace cv { namespace gpu { namespace device { float decisionThreshold, int maxFeatures, int numInitializationFrames); template - void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, + void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); } }}} @@ -109,7 +109,7 @@ void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat& frame, cv::gpu::GpuMat { using namespace cv::gpu::device::bgfg_gmg; - typedef void (*func_t)(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, + typedef void (*func_t)(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); static const func_t funcs[6][4] = { diff --git a/modules/gpu/src/bgfg_mog.cpp b/modules/gpu/src/bgfg_mog.cpp index 7c0e924d0d..94668e817f 100644 --- a/modules/gpu/src/bgfg_mog.cpp +++ b/modules/gpu/src/bgfg_mog.cpp @@ -62,14 +62,14 @@ namespace cv { namespace gpu { namespace device { namespace mog { - void mog_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var, + void mog_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma, cudaStream_t stream); - void getBackgroundImage_gpu(int cn, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, int nmixtures, float backgroundRatio, cudaStream_t stream); + void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream); void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal); - void mog2_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); - void getBackgroundImage2_gpu(int cn, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream); + void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); + void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); } }}} diff --git a/modules/gpu/src/bgfg_vibe.cpp b/modules/gpu/src/bgfg_vibe.cpp index af90c428ed..cf6d3c5340 100644 --- a/modules/gpu/src/bgfg_vibe.cpp +++ b/modules/gpu/src/bgfg_vibe.cpp @@ -57,9 +57,9 @@ namespace cv { namespace gpu { namespace device { void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor); - void init_gpu(DevMem2Db frame, int cn, DevMem2Db samples, DevMem2D_ randStates, cudaStream_t stream); + void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream); - void update_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db samples, DevMem2D_ randStates, cudaStream_t stream); + void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream); } }}} diff --git a/modules/gpu/src/bilateral_filter.cpp b/modules/gpu/src/bilateral_filter.cpp index d24adee90e..04c312688a 100644 --- a/modules/gpu/src/bilateral_filter.cpp +++ b/modules/gpu/src/bilateral_filter.cpp @@ -59,10 +59,10 @@ namespace cv { namespace gpu { namespace device { namespace bilateral_filter { - void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc); + void load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc); - void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream); - void bilateral_filter_gpu(DevMem2D_ disp, DevMem2Db img, int channels, int iters, cudaStream_t stream); + void bilateral_filter_gpu(PtrStepSzb disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream); + void bilateral_filter_gpu(PtrStepSz disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream); } }}} @@ -120,7 +120,7 @@ namespace disp.copyTo(dst); } - bilateral_filter_gpu((DevMem2D_)dst, img, img.channels(), iters, StreamAccessor::getStream(stream)); + bilateral_filter_gpu((PtrStepSz)dst, img, img.channels(), iters, StreamAccessor::getStream(stream)); } typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, diff --git a/modules/gpu/src/brute_force_matcher.cpp b/modules/gpu/src/brute_force_matcher.cpp index a1fe066136..5e0ef21afe 100644 --- a/modules/gpu/src/brute_force_matcher.cpp +++ b/modules/gpu/src/brute_force_matcher.cpp @@ -86,72 +86,72 @@ namespace cv { namespace gpu { namespace device { namespace bf_match { - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); } namespace bf_knnmatch { - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, + const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, + const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, + const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); - template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); - template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); - template void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); } namespace bf_radius_match { - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); } }}} @@ -200,8 +200,8 @@ void cv::gpu::BFMatcher_GPU::matchSingle(const GpuMat& query, const GpuMat& trai using namespace cv::gpu::device::bf_match; - typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); static const caller_t callersL1[] = @@ -301,9 +301,9 @@ void cv::gpu::BFMatcher_GPU::makeGpuCollection(GpuMat& trainCollection, GpuMat& if (masks.empty()) { - Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(DevMem2Db))); + Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); - DevMem2Db* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); + PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr) *trainCollectionCPU_ptr = trainDescCollection[i]; @@ -315,10 +315,10 @@ void cv::gpu::BFMatcher_GPU::makeGpuCollection(GpuMat& trainCollection, GpuMat& { CV_Assert(masks.size() == trainDescCollection.size()); - Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(DevMem2Db))); + Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); Mat maskCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb))); - DevMem2Db* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); + PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr(); for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr) @@ -346,8 +346,8 @@ void cv::gpu::BFMatcher_GPU::matchCollection(const GpuMat& query, const GpuMat& using namespace cv::gpu::device::bf_match; - typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); static const caller_t callersL1[] = @@ -460,8 +460,8 @@ void cv::gpu::BFMatcher_GPU::knnMatchSingle(const GpuMat& query, const GpuMat& t using namespace cv::gpu::device::bf_knnmatch; - typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, + const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); static const caller_t callersL1[] = @@ -592,8 +592,8 @@ void cv::gpu::BFMatcher_GPU::knnMatch2Collection(const GpuMat& query, const GpuM using namespace cv::gpu::device::bf_knnmatch; - typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); static const caller_t callersL1[] = @@ -776,8 +776,8 @@ void cv::gpu::BFMatcher_GPU::radiusMatchSingle(const GpuMat& query, const GpuMat using namespace cv::gpu::device::bf_radius_match; - typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); static const caller_t callersL1[] = @@ -911,8 +911,8 @@ void cv::gpu::BFMatcher_GPU::radiusMatchCollection(const GpuMat& query, GpuMat& using namespace cv::gpu::device::bf_radius_match; - typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); static const caller_t callersL1[] = @@ -964,8 +964,8 @@ void cv::gpu::BFMatcher_GPU::radiusMatchCollection(const GpuMat& query, GpuMat& caller_t func = callers[query.depth()]; CV_Assert(func != 0); - vector trains_(trainDescCollection.begin(), trainDescCollection.end()); - vector masks_(masks.begin(), masks.end()); + vector trains_(trainDescCollection.begin(), trainDescCollection.end()); + vector masks_(masks.begin(), masks.end()); func(query, &trains_[0], static_cast(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], trainIdx, imgIdx, distance, nMatches, cc, StreamAccessor::getStream(stream)); diff --git a/modules/gpu/src/calib3d.cpp b/modules/gpu/src/calib3d.cpp index 8897996269..efbd76eb17 100644 --- a/modules/gpu/src/calib3d.cpp +++ b/modules/gpu/src/calib3d.cpp @@ -60,12 +60,12 @@ namespace cv { namespace gpu { namespace device { namespace transform_points { - void call(const DevMem2D_ src, const float* rot, const float* transl, DevMem2D_ dst, cudaStream_t stream); + void call(const PtrStepSz src, const float* rot, const float* transl, PtrStepSz dst, cudaStream_t stream); } namespace project_points { - void call(const DevMem2D_ src, const float* rot, const float* transl, const float* proj, DevMem2D_ dst, cudaStream_t stream); + void call(const PtrStepSz src, const float* rot, const float* transl, const float* proj, PtrStepSz dst, cudaStream_t stream); } namespace solve_pnp_ransac diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp index b5958697c9..d6fb10c98b 100644 --- a/modules/gpu/src/cascadeclassifier.cpp +++ b/modules/gpu/src/cascadeclassifier.cpp @@ -352,18 +352,18 @@ namespace cv { namespace gpu { namespace device float initalScale, float factor, int total, - const DevMem2Db& mstages, + const PtrStepSzb& mstages, const int nstages, - const DevMem2Di& mnodes, - const DevMem2Df& mleaves, - const DevMem2Di& msubsets, - const DevMem2Db& mfeatures, + const PtrStepSzi& mnodes, + const PtrStepSzf& mleaves, + const PtrStepSzi& msubsets, + const PtrStepSzb& mfeatures, const int subsetSize, - DevMem2D_ objects, + PtrStepSz objects, unsigned int* classified, - DevMem2Di integral); + PtrStepSzi integral); - void connectedConmonents(DevMem2D_ candidates, int ncandidates, DevMem2D_ objects,int groupThreshold, float grouping_eps, unsigned int* nclasses); + void connectedConmonents(PtrStepSz candidates, int ncandidates, PtrStepSz objects,int groupThreshold, float grouping_eps, unsigned int* nclasses); } }}} diff --git a/modules/gpu/src/color.cpp b/modules/gpu/src/color.cpp index cb2ae33d71..b8c5942d82 100644 --- a/modules/gpu/src/color.cpp +++ b/modules/gpu/src/color.cpp @@ -59,9 +59,9 @@ namespace cv { namespace gpu { namespace device { template - void Bayer2BGR_8u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream); + void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); template - void Bayer2BGR_16u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream); + void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); } }} @@ -69,7 +69,7 @@ using namespace ::cv::gpu::device; namespace { - typedef void (*gpu_func_t)(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream); + typedef void (*gpu_func_t)(const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); void bgr_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream) { @@ -1336,7 +1336,7 @@ namespace void bayer_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, bool blue_last, bool start_with_green, Stream& stream) { - typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream); + typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); static const func_t funcs[3][4] = { {0,0,Bayer2BGR_8u_gpu<3>, Bayer2BGR_8u_gpu<4>}, diff --git a/modules/gpu/src/cuda/NV12ToARGB.cu b/modules/gpu/src/cuda/NV12ToARGB.cu index 6430e2dc62..32d5749099 100644 --- a/modules/gpu/src/cuda/NV12ToARGB.cu +++ b/modules/gpu/src/cuda/NV12ToARGB.cu @@ -191,7 +191,7 @@ namespace cv { namespace gpu { namespace device { dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha); } - void NV12ToARGB_gpu(const PtrStepb decodedFrame, DevMem2D_ interopFrame, cudaStream_t stream) + void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz interopFrame, cudaStream_t stream) { dim3 block(32, 8); dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y)); diff --git a/modules/gpu/src/cuda/bf_knnmatch.cu b/modules/gpu/src/cuda/bf_knnmatch.cu index 8ee3c26c71..b545cb5ab2 100644 --- a/modules/gpu/src/cuda/bf_knnmatch.cu +++ b/modules/gpu/src/cuda/bf_knnmatch.cu @@ -209,7 +209,7 @@ namespace cv { namespace gpu { namespace device // Match Unrolled Cached template - __device__ void loadQueryToSmem(int queryIdx, const DevMem2D_& query, U* s_query) + __device__ void loadQueryToSmem(int queryIdx, const PtrStepSz& query, U* s_query) { #pragma unroll for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i) @@ -220,7 +220,7 @@ namespace cv { namespace gpu { namespace device } template - __device__ void loopUnrolledCached(int queryIdx, const DevMem2D_& query, int imgIdx, const DevMem2D_& train, const Mask& mask, + __device__ void loopUnrolledCached(int queryIdx, const PtrStepSz& query, int imgIdx, const PtrStepSz& train, const Mask& mask, typename Dist::value_type* s_query, typename Dist::value_type* s_train, float& bestDistance1, float& bestDistance2, int& bestTrainIdx1, int& bestTrainIdx2, @@ -281,7 +281,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void matchUnrolledCached(const DevMem2D_ query, const DevMem2D_ train, const Mask mask, int2* bestTrainIdx, float2* bestDistance) + __global__ void matchUnrolledCached(const PtrStepSz query, const PtrStepSz train, const Mask mask, int2* bestTrainIdx, float2* bestDistance) { extern __shared__ int smem[]; @@ -314,8 +314,8 @@ namespace cv { namespace gpu { namespace device } template - void matchUnrolledCached(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2D_& trainIdx, const DevMem2D_& distance, + void matchUnrolledCached(const PtrStepSz& query, const PtrStepSz& train, const Mask& mask, + const PtrStepSz& trainIdx, const PtrStepSz& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -331,7 +331,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void matchUnrolledCached(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance) + __global__ void matchUnrolledCached(const PtrStepSz query, const PtrStepSz* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance) { extern __shared__ int smem[]; @@ -353,7 +353,7 @@ namespace cv { namespace gpu { namespace device for (int imgIdx = 0; imgIdx < n; ++imgIdx) { - const DevMem2D_ train = trains[imgIdx]; + const PtrStepSz train = trains[imgIdx]; m.next(); loopUnrolledCached(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestImgIdx1, myBestImgIdx2); } @@ -375,8 +375,8 @@ namespace cv { namespace gpu { namespace device } template - void matchUnrolledCached(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2D_& trainIdx, const DevMem2D_& imgIdx, const DevMem2D_& distance, + void matchUnrolledCached(const PtrStepSz& query, const PtrStepSz* trains, int n, const Mask& mask, + const PtrStepSz& trainIdx, const PtrStepSz& imgIdx, const PtrStepSz& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -395,7 +395,7 @@ namespace cv { namespace gpu { namespace device // Match Unrolled template - __device__ void loopUnrolled(int queryIdx, const DevMem2D_& query, int imgIdx, const DevMem2D_& train, const Mask& mask, + __device__ void loopUnrolled(int queryIdx, const PtrStepSz& query, int imgIdx, const PtrStepSz& train, const Mask& mask, typename Dist::value_type* s_query, typename Dist::value_type* s_train, float& bestDistance1, float& bestDistance2, int& bestTrainIdx1, int& bestTrainIdx2, @@ -460,7 +460,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void matchUnrolled(const DevMem2D_ query, const DevMem2D_ train, const Mask mask, int2* bestTrainIdx, float2* bestDistance) + __global__ void matchUnrolled(const PtrStepSz query, const PtrStepSz train, const Mask mask, int2* bestTrainIdx, float2* bestDistance) { extern __shared__ int smem[]; @@ -491,8 +491,8 @@ namespace cv { namespace gpu { namespace device } template - void matchUnrolled(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2D_& trainIdx, const DevMem2D_& distance, + void matchUnrolled(const PtrStepSz& query, const PtrStepSz& train, const Mask& mask, + const PtrStepSz& trainIdx, const PtrStepSz& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -508,7 +508,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void matchUnrolled(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance) + __global__ void matchUnrolled(const PtrStepSz query, const PtrStepSz* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance) { extern __shared__ int smem[]; @@ -528,7 +528,7 @@ namespace cv { namespace gpu { namespace device for (int imgIdx = 0; imgIdx < n; ++imgIdx) { - const DevMem2D_ train = trains[imgIdx]; + const PtrStepSz train = trains[imgIdx]; m.next(); loopUnrolled(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestImgIdx1, myBestImgIdx2); } @@ -550,8 +550,8 @@ namespace cv { namespace gpu { namespace device } template - void matchUnrolled(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2D_& trainIdx, const DevMem2D_& imgIdx, const DevMem2D_& distance, + void matchUnrolled(const PtrStepSz& query, const PtrStepSz* trains, int n, const Mask& mask, + const PtrStepSz& trainIdx, const PtrStepSz& imgIdx, const PtrStepSz& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -570,7 +570,7 @@ namespace cv { namespace gpu { namespace device // Match template - __device__ void loop(int queryIdx, const DevMem2D_& query, int imgIdx, const DevMem2D_& train, const Mask& mask, + __device__ void loop(int queryIdx, const PtrStepSz& query, int imgIdx, const PtrStepSz& train, const Mask& mask, typename Dist::value_type* s_query, typename Dist::value_type* s_train, float& bestDistance1, float& bestDistance2, int& bestTrainIdx1, int& bestTrainIdx2, @@ -634,7 +634,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void match(const DevMem2D_ query, const DevMem2D_ train, const Mask mask, int2* bestTrainIdx, float2* bestDistance) + __global__ void match(const PtrStepSz query, const PtrStepSz train, const Mask mask, int2* bestTrainIdx, float2* bestDistance) { extern __shared__ int smem[]; @@ -665,8 +665,8 @@ namespace cv { namespace gpu { namespace device } template - void match(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2D_& trainIdx, const DevMem2D_& distance, + void match(const PtrStepSz& query, const PtrStepSz& train, const Mask& mask, + const PtrStepSz& trainIdx, const PtrStepSz& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -682,7 +682,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void match(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance) + __global__ void match(const PtrStepSz query, const PtrStepSz* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance) { extern __shared__ int smem[]; @@ -702,7 +702,7 @@ namespace cv { namespace gpu { namespace device for (int imgIdx = 0; imgIdx < n; ++imgIdx) { - const DevMem2D_ train = trains[imgIdx]; + const PtrStepSz train = trains[imgIdx]; m.next(); loop(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestImgIdx1, myBestImgIdx2); } @@ -724,8 +724,8 @@ namespace cv { namespace gpu { namespace device } template - void match(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2D_& trainIdx, const DevMem2D_& imgIdx, const DevMem2D_& distance, + void match(const PtrStepSz& query, const PtrStepSz* trains, int n, const Mask& mask, + const PtrStepSz& trainIdx, const PtrStepSz& imgIdx, const PtrStepSz& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -744,66 +744,66 @@ namespace cv { namespace gpu { namespace device // knnMatch 2 dispatcher template - void match2Dispatcher(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, + void match2Dispatcher(const PtrStepSz& query, const PtrStepSz& train, const Mask& mask, + const PtrStepSzb& trainIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream) { (void)cc; if (query.cols <= 64) { - matchUnrolledCached<16, 64, Dist>(query, train, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ > (distance), stream); + matchUnrolledCached<16, 64, Dist>(query, train, mask, static_cast< PtrStepSz >(trainIdx), static_cast< PtrStepSz > (distance), stream); } else if (query.cols <= 128) { - matchUnrolledCached<16, 128, Dist>(query, train, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ > (distance), stream); + matchUnrolledCached<16, 128, Dist>(query, train, mask, static_cast< PtrStepSz >(trainIdx), static_cast< PtrStepSz > (distance), stream); } /*else if (query.cols <= 256) { - matchUnrolled<16, 256, Dist>(query, train, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ > (distance), stream); + matchUnrolled<16, 256, Dist>(query, train, mask, static_cast< PtrStepSz >(trainIdx), static_cast< PtrStepSz > (distance), stream); } else if (query.cols <= 512) { - matchUnrolled<16, 512, Dist>(query, train, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ > (distance), stream); + matchUnrolled<16, 512, Dist>(query, train, mask, static_cast< PtrStepSz >(trainIdx), static_cast< PtrStepSz > (distance), stream); } else if (query.cols <= 1024) { - matchUnrolled<16, 1024, Dist>(query, train, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ > (distance), stream); + matchUnrolled<16, 1024, Dist>(query, train, mask, static_cast< PtrStepSz >(trainIdx), static_cast< PtrStepSz > (distance), stream); }*/ else { - match<16, Dist>(query, train, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ > (distance), stream); + match<16, Dist>(query, train, mask, static_cast< PtrStepSz >(trainIdx), static_cast< PtrStepSz > (distance), stream); } } template - void match2Dispatcher(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + void match2Dispatcher(const PtrStepSz& query, const PtrStepSz* trains, int n, const Mask& mask, + const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream) { (void)cc; if (query.cols <= 64) { - matchUnrolledCached<16, 64, Dist>(query, trains, n, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ >(imgIdx), static_cast< DevMem2D_ > (distance), stream); + matchUnrolledCached<16, 64, Dist>(query, trains, n, mask, static_cast< PtrStepSz >(trainIdx), static_cast< PtrStepSz >(imgIdx), static_cast< PtrStepSz > (distance), stream); } else if (query.cols <= 128) { - matchUnrolledCached<16, 128, Dist>(query, trains, n, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ >(imgIdx), static_cast< DevMem2D_ > (distance), stream); + matchUnrolledCached<16, 128, Dist>(query, trains, n, mask, static_cast< PtrStepSz >(trainIdx), static_cast< PtrStepSz >(imgIdx), static_cast< PtrStepSz > (distance), stream); } /*else if (query.cols <= 256) { - matchUnrolled<16, 256, Dist>(query, trains, n, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ >(imgIdx), static_cast< DevMem2D_ > (distance), stream); + matchUnrolled<16, 256, Dist>(query, trains, n, mask, static_cast< PtrStepSz >(trainIdx), static_cast< PtrStepSz >(imgIdx), static_cast< PtrStepSz > (distance), stream); } else if (query.cols <= 512) { - matchUnrolled<16, 512, Dist>(query, trains, n, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ >(imgIdx), static_cast< DevMem2D_ > (distance), stream); + matchUnrolled<16, 512, Dist>(query, trains, n, mask, static_cast< PtrStepSz >(trainIdx), static_cast< PtrStepSz >(imgIdx), static_cast< PtrStepSz > (distance), stream); } else if (query.cols <= 1024) { - matchUnrolled<16, 1024, Dist>(query, trains, n, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ >(imgIdx), static_cast< DevMem2D_ > (distance), stream); + matchUnrolled<16, 1024, Dist>(query, trains, n, mask, static_cast< PtrStepSz >(trainIdx), static_cast< PtrStepSz >(imgIdx), static_cast< PtrStepSz > (distance), stream); }*/ else { - match<16, Dist>(query, trains, n, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ >(imgIdx), static_cast< DevMem2D_ > (distance), stream); + match<16, Dist>(query, trains, n, mask, static_cast< PtrStepSz >(trainIdx), static_cast< PtrStepSz >(imgIdx), static_cast< PtrStepSz > (distance), stream); } } @@ -811,7 +811,7 @@ namespace cv { namespace gpu { namespace device // Calc distance kernel template - __global__ void calcDistanceUnrolled(const DevMem2D_ query, const DevMem2D_ train, const Mask mask, PtrStepf allDist) + __global__ void calcDistanceUnrolled(const PtrStepSz query, const PtrStepSz train, const Mask mask, PtrStepf allDist) { extern __shared__ int smem[]; @@ -860,7 +860,7 @@ namespace cv { namespace gpu { namespace device } template - void calcDistanceUnrolled(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream) + void calcDistanceUnrolled(const PtrStepSz& query, const PtrStepSz& train, const Mask& mask, const PtrStepSzf& allDist, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); @@ -875,7 +875,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void calcDistance(const DevMem2D_ query, const DevMem2D_ train, const Mask mask, PtrStepf allDist) + __global__ void calcDistance(const PtrStepSz query, const PtrStepSz train, const Mask mask, PtrStepf allDist) { extern __shared__ int smem[]; @@ -923,7 +923,7 @@ namespace cv { namespace gpu { namespace device } template - void calcDistance(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream) + void calcDistance(const PtrStepSz& query, const PtrStepSz& train, const Mask& mask, const PtrStepSzf& allDist, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); @@ -941,8 +941,8 @@ namespace cv { namespace gpu { namespace device // Calc Distance dispatcher template - void calcDistanceDispatcher(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2Df& allDist, + void calcDistanceDispatcher(const PtrStepSz& query, const PtrStepSz& train, const Mask& mask, + const PtrStepSzf& allDist, int cc, cudaStream_t stream) { (void)cc; @@ -976,7 +976,7 @@ namespace cv { namespace gpu { namespace device // find knn match kernel template - __global__ void findBestMatch(DevMem2Df allDist, int i, PtrStepi trainIdx, PtrStepf distance) + __global__ void findBestMatch(PtrStepSzf allDist, int i, PtrStepi trainIdx, PtrStepf distance) { const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64; __shared__ float s_dist[SMEM_SIZE]; @@ -1017,7 +1017,7 @@ namespace cv { namespace gpu { namespace device } template - void findKnnMatch(int k, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream) + void findKnnMatch(int k, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSzf& allDist, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, 1, 1); const dim3 grid(trainIdx.rows, 1, 1); @@ -1032,17 +1032,17 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - void findKnnMatchDispatcher(int k, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream) + void findKnnMatchDispatcher(int k, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream) { - findKnnMatch<256>(k, static_cast(trainIdx), static_cast(distance), allDist, stream); + findKnnMatch<256>(k, static_cast(trainIdx), static_cast(distance), allDist, stream); } /////////////////////////////////////////////////////////////////////////////// // knn match Dispatcher template - void matchDispatcher(const DevMem2D_& query, const DevMem2D_& train, int k, const Mask& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + void matchDispatcher(const PtrStepSz& query, const PtrStepSz& train, int k, const Mask& mask, + const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream) { if (k == 2) @@ -1059,104 +1059,104 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // knn match caller - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, + const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream) { if (mask.data) - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream); + matchDispatcher< L1Dist >(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream); else - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream); + matchDispatcher< L1Dist >(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream); } - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - //template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + //template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, + const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream) { if (mask.data) - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream); + matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream); else - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream); + matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream); } - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, + const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream) { if (mask.data) - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream); + matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream); else - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream); + matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream); } - template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - //template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - //template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream); - template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream) { if (masks.data) - match2Dispatcher< L1Dist >(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream); + match2Dispatcher< L1Dist >(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream); else - match2Dispatcher< L1Dist >(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream); + match2Dispatcher< L1Dist >(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream); } - template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - //template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); + template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + //template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + template void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); - template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream) { if (masks.data) - match2Dispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream); + match2Dispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream); else - match2Dispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream); + match2Dispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream); } - //template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - //template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - //template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - //template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - //template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Di& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); + //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + //template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + template void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); - template void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream) { if (masks.data) - match2Dispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream); + match2Dispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream); else - match2Dispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream); + match2Dispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream); } - template void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - //template void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - template void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - //template void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - template void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); + template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + //template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + //template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); + template void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream); } // namespace bf_knnmatch }}} // namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/src/cuda/bf_match.cu b/modules/gpu/src/cuda/bf_match.cu index 3068c81dc4..d8489d933b 100644 --- a/modules/gpu/src/cuda/bf_match.cu +++ b/modules/gpu/src/cuda/bf_match.cu @@ -86,7 +86,7 @@ namespace cv { namespace gpu { namespace device // Match Unrolled Cached template - __device__ void loadQueryToSmem(int queryIdx, const DevMem2D_& query, U* s_query) + __device__ void loadQueryToSmem(int queryIdx, const PtrStepSz& query, U* s_query) { #pragma unroll for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i) @@ -97,7 +97,7 @@ namespace cv { namespace gpu { namespace device } template - __device__ void loopUnrolledCached(int queryIdx, const DevMem2D_& query,volatile int imgIdx, const DevMem2D_& train, const Mask& mask, + __device__ void loopUnrolledCached(int queryIdx, const PtrStepSz& query,volatile int imgIdx, const PtrStepSz& train, const Mask& mask, typename Dist::value_type* s_query, typename Dist::value_type* s_train, float& bestDistance, int& bestTrainIdx, int& bestImgIdx) { @@ -143,7 +143,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void matchUnrolledCached(const DevMem2D_ query, const DevMem2D_ train, const Mask mask, int* bestTrainIdx, float* bestDistance) + __global__ void matchUnrolledCached(const PtrStepSz query, const PtrStepSz train, const Mask mask, int* bestTrainIdx, float* bestDistance) { extern __shared__ int smem[]; @@ -174,8 +174,8 @@ namespace cv { namespace gpu { namespace device } template - void matchUnrolledCached(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + void matchUnrolledCached(const PtrStepSz& query, const PtrStepSz& train, const Mask& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -191,7 +191,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void matchUnrolledCached(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, + __global__ void matchUnrolledCached(const PtrStepSz query, const PtrStepSz* trains, int n, const Mask mask, int* bestTrainIdx, int* bestImgIdx, float* bestDistance) { extern __shared__ int smem[]; @@ -211,7 +211,7 @@ namespace cv { namespace gpu { namespace device for (int imgIdx = 0; imgIdx < n; ++imgIdx) { - const DevMem2D_ train = trains[imgIdx]; + const PtrStepSz train = trains[imgIdx]; m.next(); loopUnrolledCached(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance, myBestTrainIdx, myBestImgIdx); } @@ -233,8 +233,8 @@ namespace cv { namespace gpu { namespace device } template - void matchUnrolledCached(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + void matchUnrolledCached(const PtrStepSz& query, const PtrStepSz* trains, int n, const Mask& mask, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -253,7 +253,7 @@ namespace cv { namespace gpu { namespace device // Match Unrolled template - __device__ void loopUnrolled(int queryIdx, const DevMem2D_& query,volatile int imgIdx, const DevMem2D_& train, const Mask& mask, + __device__ void loopUnrolled(int queryIdx, const PtrStepSz& query,volatile int imgIdx, const PtrStepSz& train, const Mask& mask, typename Dist::value_type* s_query, typename Dist::value_type* s_train, float& bestDistance, int& bestTrainIdx, int& bestImgIdx) { @@ -303,7 +303,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void matchUnrolled(const DevMem2D_ query, const DevMem2D_ train, const Mask mask, int* bestTrainIdx, float* bestDistance) + __global__ void matchUnrolled(const PtrStepSz query, const PtrStepSz train, const Mask mask, int* bestTrainIdx, float* bestDistance) { extern __shared__ int smem[]; @@ -332,8 +332,8 @@ namespace cv { namespace gpu { namespace device } template - void matchUnrolled(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + void matchUnrolled(const PtrStepSz& query, const PtrStepSz& train, const Mask& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -349,7 +349,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void matchUnrolled(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, + __global__ void matchUnrolled(const PtrStepSz query, const PtrStepSz* trains, int n, const Mask mask, int* bestTrainIdx, int* bestImgIdx, float* bestDistance) { extern __shared__ int smem[]; @@ -367,7 +367,7 @@ namespace cv { namespace gpu { namespace device for (int imgIdx = 0; imgIdx < n; ++imgIdx) { - const DevMem2D_ train = trains[imgIdx]; + const PtrStepSz train = trains[imgIdx]; m.next(); loopUnrolled(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance, myBestTrainIdx, myBestImgIdx); } @@ -389,8 +389,8 @@ namespace cv { namespace gpu { namespace device } template - void matchUnrolled(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + void matchUnrolled(const PtrStepSz& query, const PtrStepSz* trains, int n, const Mask& mask, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -409,7 +409,7 @@ namespace cv { namespace gpu { namespace device // Match template - __device__ void loop(int queryIdx, const DevMem2D_& query, volatile int imgIdx, const DevMem2D_& train, const Mask& mask, + __device__ void loop(int queryIdx, const PtrStepSz& query, volatile int imgIdx, const PtrStepSz& train, const Mask& mask, typename Dist::value_type* s_query, typename Dist::value_type* s_train, float& bestDistance, int& bestTrainIdx, int& bestImgIdx) { @@ -458,7 +458,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void match(const DevMem2D_ query, const DevMem2D_ train, const Mask mask, int* bestTrainIdx, float* bestDistance) + __global__ void match(const PtrStepSz query, const PtrStepSz train, const Mask mask, int* bestTrainIdx, float* bestDistance) { extern __shared__ int smem[]; @@ -487,8 +487,8 @@ namespace cv { namespace gpu { namespace device } template - void match(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + void match(const PtrStepSz& query, const PtrStepSz& train, const Mask& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -504,7 +504,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void match(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, + __global__ void match(const PtrStepSz query, const PtrStepSz* trains, int n, const Mask mask, int* bestTrainIdx, int* bestImgIdx, float* bestDistance) { extern __shared__ int smem[]; @@ -521,7 +521,7 @@ namespace cv { namespace gpu { namespace device Mask m = mask; for (int imgIdx = 0; imgIdx < n; ++imgIdx) { - const DevMem2D_ train = trains[imgIdx]; + const PtrStepSz train = trains[imgIdx]; m.next(); loop(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance, myBestTrainIdx, myBestImgIdx); } @@ -543,8 +543,8 @@ namespace cv { namespace gpu { namespace device } template - void match(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + void match(const PtrStepSz& query, const PtrStepSz* trains, int n, const Mask& mask, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -563,8 +563,8 @@ namespace cv { namespace gpu { namespace device // Match dispatcher template - void matchDispatcher(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + void matchDispatcher(const PtrStepSz& query, const PtrStepSz& train, const Mask& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream) { (void)cc; @@ -595,8 +595,8 @@ namespace cv { namespace gpu { namespace device } template - void matchDispatcher(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + void matchDispatcher(const PtrStepSz& query, const PtrStepSz* trains, int n, const Mask& mask, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream) { (void)cc; @@ -629,152 +629,152 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // Match caller - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream) { if (mask.data) { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), SingleMask(mask), + matchDispatcher< L1Dist >(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), SingleMask(mask), trainIdx, distance, cc, stream); } else { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), WithOutMask(), + matchDispatcher< L1Dist >(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), WithOutMask(), trainIdx, distance, cc, stream); } } - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream) { if (mask.data) { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), SingleMask(mask), + matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), SingleMask(mask), trainIdx, distance, cc, stream); } else { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), WithOutMask(), + matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), WithOutMask(), trainIdx, distance, cc, stream); } } - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream) { if (mask.data) { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), SingleMask(mask), + matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), SingleMask(mask), trainIdx, distance, cc, stream); } else { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), WithOutMask(), + matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), WithOutMask(), trainIdx, distance, cc, stream); } } - template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream) { if (masks.data) { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, MaskCollection(masks.data), + matchDispatcher< L1Dist >(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream); } else { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, WithOutMask(), + matchDispatcher< L1Dist >(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream); } } - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream) { if (masks.data) { - matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, MaskCollection(masks.data), + matchDispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream); } else { - matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, WithOutMask(), + matchDispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream); } } - //template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& maskCollection, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream) { if (masks.data) { - matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, MaskCollection(masks.data), + matchDispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream); } else { - matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, WithOutMask(), + matchDispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream); } } - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - //template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + //template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream); } // namespace bf_match }}} // namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/src/cuda/bf_radius_match.cu b/modules/gpu/src/cuda/bf_radius_match.cu index c6cba928c1..63dcd01537 100644 --- a/modules/gpu/src/cuda/bf_radius_match.cu +++ b/modules/gpu/src/cuda/bf_radius_match.cu @@ -53,7 +53,7 @@ namespace cv { namespace gpu { namespace device // Match Unrolled template - __global__ void matchUnrolled(const DevMem2D_ query, int imgIdx, const DevMem2D_ train, float maxDistance, const Mask mask, + __global__ void matchUnrolled(const PtrStepSz query, int imgIdx, const PtrStepSz train, float maxDistance, const Mask mask, PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount) { #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110) @@ -113,8 +113,8 @@ namespace cv { namespace gpu { namespace device } template - void matchUnrolled(const DevMem2D_& query, const DevMem2D_& train, float maxDistance, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, cudaStream_t stream) + void matchUnrolled(const PtrStepSz& query, const PtrStepSz& train, float maxDistance, const Mask& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); @@ -130,8 +130,8 @@ namespace cv { namespace gpu { namespace device } template - void matchUnrolled(const DevMem2D_& query, const DevMem2D_* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + void matchUnrolled(const PtrStepSz& query, const PtrStepSz* trains, int n, float maxDistance, const PtrStepSzb* masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -140,7 +140,7 @@ namespace cv { namespace gpu { namespace device for (int i = 0; i < n; ++i) { - const DevMem2D_ train = trains[i]; + const PtrStepSz train = trains[i]; const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); @@ -165,7 +165,7 @@ namespace cv { namespace gpu { namespace device // Match template - __global__ void match(const DevMem2D_ query, int imgIdx, const DevMem2D_ train, float maxDistance, const Mask mask, + __global__ void match(const PtrStepSz query, int imgIdx, const PtrStepSz train, float maxDistance, const Mask mask, PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount) { #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110) @@ -224,8 +224,8 @@ namespace cv { namespace gpu { namespace device } template - void match(const DevMem2D_& query, const DevMem2D_& train, float maxDistance, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + void match(const PtrStepSz& query, const PtrStepSz& train, float maxDistance, const Mask& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -242,8 +242,8 @@ namespace cv { namespace gpu { namespace device } template - void match(const DevMem2D_& query, const DevMem2D_* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + void match(const PtrStepSz& query, const PtrStepSz* trains, int n, float maxDistance, const PtrStepSzb* masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -252,7 +252,7 @@ namespace cv { namespace gpu { namespace device for (int i = 0; i < n; ++i) { - const DevMem2D_ train = trains[i]; + const PtrStepSz train = trains[i]; const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE)); @@ -277,8 +277,8 @@ namespace cv { namespace gpu { namespace device // Match dispatcher template - void matchDispatcher(const DevMem2D_& query, const DevMem2D_& train, float maxDistance, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + void matchDispatcher(const PtrStepSz& query, const PtrStepSz& train, float maxDistance, const Mask& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream) { (void)cc; @@ -309,8 +309,8 @@ namespace cv { namespace gpu { namespace device } template - void matchDispatcher(const DevMem2D_& query, const DevMem2D_* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + void matchDispatcher(const PtrStepSz& query, const PtrStepSz* trains, int n, float maxDistance, const PtrStepSzb* masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream) { (void)cc; @@ -343,125 +343,125 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // Radius Match caller - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream) { if (mask.data) { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, SingleMask(mask), + matchDispatcher< L1Dist >(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), maxDistance, SingleMask(mask), trainIdx, distance, nMatches, cc, stream); } else { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, WithOutMask(), + matchDispatcher< L1Dist >(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), maxDistance, WithOutMask(), trainIdx, distance, nMatches, cc, stream); } } - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream) { if (mask.data) { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, SingleMask(mask), + matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), maxDistance, SingleMask(mask), trainIdx, distance, nMatches, cc, stream); } else { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, WithOutMask(), + matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), maxDistance, WithOutMask(), trainIdx, distance, nMatches, cc, stream); } } - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchL2_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream) { if (mask.data) { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, SingleMask(mask), + matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), maxDistance, SingleMask(mask), trainIdx, distance, nMatches, cc, stream); } else { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, WithOutMask(), + matchDispatcher(static_cast< PtrStepSz >(query), static_cast< PtrStepSz >(train), maxDistance, WithOutMask(), trainIdx, distance, nMatches, cc, stream); } } - template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream) { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains, n, maxDistance, masks, + matchDispatcher< L1Dist >(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, cc, stream); } - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream) { - matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains, n, maxDistance, masks, + matchDispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, cc, stream); } - //template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream) { - matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains, n, maxDistance, masks, + matchDispatcher(static_cast< PtrStepSz >(query), (const PtrStepSz*)trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, cc, stream); } - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - //template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + //template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); + template void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, int cc, cudaStream_t stream); } // namespace bf_radius_match }}} // namespace cv { namespace gpu { namespace device diff --git a/modules/gpu/src/cuda/bgfg_gmg.cu b/modules/gpu/src/cuda/bgfg_gmg.cu index 76ebb2da09..7fee2d87b3 100644 --- a/modules/gpu/src/cuda/bgfg_gmg.cu +++ b/modules/gpu/src/cuda/bgfg_gmg.cu @@ -168,7 +168,7 @@ namespace cv { namespace gpu { namespace device { template struct Quantization : detail::Quantization::cn> {}; template - __global__ void update(const PtrStep_ frame, PtrStepb fgmask, PtrStepi colors_, PtrStepf weights_, PtrStepi nfeatures_, + __global__ void update(const PtrStep frame, PtrStepb fgmask, PtrStepi colors_, PtrStepf weights_, PtrStepi nfeatures_, const int frameNum, const float learningRate, const bool updateBackgroundModel) { const int x = blockIdx.x * blockDim.x + threadIdx.x; @@ -222,7 +222,7 @@ namespace cv { namespace gpu { namespace device { } template - void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, + void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream) { const dim3 block(32, 8); @@ -230,7 +230,7 @@ namespace cv { namespace gpu { namespace device { cudaSafeCall( cudaFuncSetCacheConfig(update, cudaFuncCachePreferL1) ); - update<<>>((DevMem2D_) frame, fgmask, colors, weights, nfeatures, frameNum, learningRate, updateBackgroundModel); + update<<>>((PtrStepSz) frame, fgmask, colors, weights, nfeatures, frameNum, learningRate, updateBackgroundModel); cudaSafeCall( cudaGetLastError() ); @@ -238,16 +238,16 @@ namespace cv { namespace gpu { namespace device { cudaSafeCall( cudaDeviceSynchronize() ); } - template void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); - template void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); - template void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); + template void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); + template void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); + template void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); - template void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); - template void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); - template void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); + template void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); + template void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); + template void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); - template void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); - template void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); - template void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); + template void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); + template void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); + template void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); } }}} diff --git a/modules/gpu/src/cuda/bgfg_mog.cu b/modules/gpu/src/cuda/bgfg_mog.cu index 5b4dfc9ada..dcfd502848 100644 --- a/modules/gpu/src/cuda/bgfg_mog.cu +++ b/modules/gpu/src/cuda/bgfg_mog.cu @@ -121,8 +121,8 @@ namespace cv { namespace gpu { namespace device // MOG without learning template - __global__ void mog_withoutLearning(const DevMem2D_ frame, PtrStepb fgmask, - const PtrStepf gmm_weight, const PtrStep_ gmm_mean, const PtrStep_ gmm_var, + __global__ void mog_withoutLearning(const PtrStepSz frame, PtrStepb fgmask, + const PtrStepf gmm_weight, const PtrStep gmm_mean, const PtrStep gmm_var, const int nmixtures, const float varThreshold, const float backgroundRatio) { const int x = blockIdx.x * blockDim.x + threadIdx.x; @@ -172,7 +172,7 @@ namespace cv { namespace gpu { namespace device } template - void mog_withoutLearning_caller(DevMem2Db frame, DevMem2Db fgmask, DevMem2Df weight, DevMem2Db mean, DevMem2Db var, + void mog_withoutLearning_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream) { dim3 block(32, 8); @@ -180,8 +180,8 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaFuncSetCacheConfig(mog_withoutLearning, cudaFuncCachePreferL1) ); - mog_withoutLearning<<>>((DevMem2D_) frame, fgmask, - weight, (DevMem2D_) mean, (DevMem2D_) var, + mog_withoutLearning<<>>((PtrStepSz) frame, fgmask, + weight, (PtrStepSz) mean, (PtrStepSz) var, nmixtures, varThreshold, backgroundRatio); cudaSafeCall( cudaGetLastError() ); @@ -194,8 +194,8 @@ namespace cv { namespace gpu { namespace device // MOG with learning template - __global__ void mog_withLearning(const DevMem2D_ frame, PtrStepb fgmask, - PtrStepf gmm_weight, PtrStepf gmm_sortKey, PtrStep_ gmm_mean, PtrStep_ gmm_var, + __global__ void mog_withLearning(const PtrStepSz frame, PtrStepb fgmask, + PtrStepf gmm_weight, PtrStepf gmm_sortKey, PtrStep gmm_mean, PtrStep gmm_var, const int nmixtures, const float varThreshold, const float backgroundRatio, const float learningRate, const float minVar) { const float w0 = 0.05f; @@ -324,7 +324,7 @@ namespace cv { namespace gpu { namespace device } template - void mog_withLearning_caller(DevMem2Db frame, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var, + void mog_withLearning_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar, cudaStream_t stream) { @@ -333,8 +333,8 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaFuncSetCacheConfig(mog_withLearning, cudaFuncCachePreferL1) ); - mog_withLearning<<>>((DevMem2D_) frame, fgmask, - weight, sortKey, (DevMem2D_) mean, (DevMem2D_) var, + mog_withLearning<<>>((PtrStepSz) frame, fgmask, + weight, sortKey, (PtrStepSz) mean, (PtrStepSz) var, nmixtures, varThreshold, backgroundRatio, learningRate, minVar); cudaSafeCall( cudaGetLastError() ); @@ -346,10 +346,10 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////// // MOG - void mog_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var, int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma, cudaStream_t stream) + void mog_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma, cudaStream_t stream) { - typedef void (*withoutLearning_t)(DevMem2Db frame, DevMem2Db fgmask, DevMem2Df weight, DevMem2Db mean, DevMem2Db var, int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream); - typedef void (*withLearning_t)(DevMem2Db frame, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var, int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar, cudaStream_t stream); + typedef void (*withoutLearning_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream); + typedef void (*withLearning_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar, cudaStream_t stream); static const withoutLearning_t withoutLearning[] = { @@ -369,7 +369,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void getBackgroundImage(const PtrStepf gmm_weight, const PtrStep_ gmm_mean, DevMem2D_ dst, const int nmixtures, const float backgroundRatio) + __global__ void getBackgroundImage(const PtrStepf gmm_weight, const PtrStep gmm_mean, PtrStepSz dst, const int nmixtures, const float backgroundRatio) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -399,23 +399,23 @@ namespace cv { namespace gpu { namespace device } template - void getBackgroundImage_caller(DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, int nmixtures, float backgroundRatio, cudaStream_t stream) + void getBackgroundImage_caller(PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream) { dim3 block(32, 8); dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage, cudaFuncCachePreferL1) ); - getBackgroundImage<<>>(weight, (DevMem2D_) mean, (DevMem2D_) dst, nmixtures, backgroundRatio); + getBackgroundImage<<>>(weight, (PtrStepSz) mean, (PtrStepSz) dst, nmixtures, backgroundRatio); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - void getBackgroundImage_gpu(int cn, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, int nmixtures, float backgroundRatio, cudaStream_t stream) + void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream) { - typedef void (*func_t)(DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, int nmixtures, float backgroundRatio, cudaStream_t stream); + typedef void (*func_t)(PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream); static const func_t funcs[] = { @@ -455,8 +455,8 @@ namespace cv { namespace gpu { namespace device } template - __global__ void mog2(const DevMem2D_ frame, PtrStepb fgmask, PtrStepb modesUsed, - PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep_ gmm_mean, + __global__ void mog2(const PtrStepSz frame, PtrStepb fgmask, PtrStepb modesUsed, + PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep gmm_mean, const float alphaT, const float alpha1, const float prune) { const int x = blockIdx.x * blockDim.x + threadIdx.x; @@ -653,7 +653,7 @@ namespace cv { namespace gpu { namespace device } template - void mog2_caller(DevMem2Db frame, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean, + void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream) { dim3 block(32, 8); @@ -665,16 +665,16 @@ namespace cv { namespace gpu { namespace device { cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); - mog2<<>>((DevMem2D_) frame, fgmask, modesUsed, - weight, variance, (DevMem2D_) mean, + mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, + weight, variance, (PtrStepSz) mean, alphaT, alpha1, prune); } else { cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); - mog2<<>>((DevMem2D_) frame, fgmask, modesUsed, - weight, variance, (DevMem2D_) mean, + mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, + weight, variance, (PtrStepSz) mean, alphaT, alpha1, prune); } @@ -684,10 +684,10 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - void mog2_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean, + void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream) { - typedef void (*func_t)(DevMem2Db frame, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); + typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); static const func_t funcs[] = { @@ -698,7 +698,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void getBackgroundImage2(const DevMem2Db modesUsed, const PtrStepf gmm_weight, const PtrStep_ gmm_mean, PtrStep_ dst) + __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep gmm_mean, PtrStep dst) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -730,23 +730,23 @@ namespace cv { namespace gpu { namespace device } template - void getBackgroundImage2_caller(DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream) + void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) { dim3 block(32, 8); dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2, cudaFuncCachePreferL1) ); - getBackgroundImage2<<>>(modesUsed, weight, (DevMem2D_) mean, (DevMem2D_) dst); + getBackgroundImage2<<>>(modesUsed, weight, (PtrStepSz) mean, (PtrStepSz) dst); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - void getBackgroundImage2_gpu(int cn, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream) + void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) { - typedef void (*func_t)(DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream); + typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[] = { diff --git a/modules/gpu/src/cuda/bgfg_vibe.cu b/modules/gpu/src/cuda/bgfg_vibe.cu index ed95dfef21..2b3b7074ea 100644 --- a/modules/gpu/src/cuda/bgfg_vibe.cu +++ b/modules/gpu/src/cuda/bgfg_vibe.cu @@ -90,7 +90,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void init(const DevMem2D_ frame, PtrStep_ samples, PtrStep_ randStates) + __global__ void init(const PtrStepSz frame, PtrStep samples, PtrStep randStates) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -116,23 +116,23 @@ namespace cv { namespace gpu { namespace device } template - void init_caller(DevMem2Db frame, DevMem2Db samples, DevMem2D_ randStates, cudaStream_t stream) + void init_caller(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream) { dim3 block(32, 8); dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); cudaSafeCall( cudaFuncSetCacheConfig(init, cudaFuncCachePreferL1) ); - init<<>>((DevMem2D_) frame, (DevMem2D_) samples, randStates); + init<<>>((PtrStepSz) frame, (PtrStepSz) samples, randStates); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - void init_gpu(DevMem2Db frame, int cn, DevMem2Db samples, DevMem2D_ randStates, cudaStream_t stream) + void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream) { - typedef void (*func_t)(DevMem2Db frame, DevMem2Db samples, DevMem2D_ randStates, cudaStream_t stream); + typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream); static const func_t funcs[] = { 0, init_caller, 0, init_caller, init_caller @@ -155,7 +155,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void update(const DevMem2D_ frame, PtrStepb fgmask, PtrStep_ samples, PtrStep_ randStates) + __global__ void update(const PtrStepSz frame, PtrStepb fgmask, PtrStep samples, PtrStep randStates) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -225,23 +225,23 @@ namespace cv { namespace gpu { namespace device } template - void update_caller(DevMem2Db frame, DevMem2Db fgmask, DevMem2Db samples, DevMem2D_ randStates, cudaStream_t stream) + void update_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream) { dim3 block(32, 8); dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); cudaSafeCall( cudaFuncSetCacheConfig(update, cudaFuncCachePreferL1) ); - update<<>>((DevMem2D_) frame, fgmask, (DevMem2D_) samples, randStates); + update<<>>((PtrStepSz) frame, fgmask, (PtrStepSz) samples, randStates); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - void update_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db samples, DevMem2D_ randStates, cudaStream_t stream) + void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream) { - typedef void (*func_t)(DevMem2Db frame, DevMem2Db fgmask, DevMem2Db samples, DevMem2D_ randStates, cudaStream_t stream); + typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream); static const func_t funcs[] = { 0, update_caller, 0, update_caller, update_caller diff --git a/modules/gpu/src/cuda/bilateral_filter.cu b/modules/gpu/src/cuda/bilateral_filter.cu index d09268ad8d..abae91d2c5 100644 --- a/modules/gpu/src/cuda/bilateral_filter.cu +++ b/modules/gpu/src/cuda/bilateral_filter.cu @@ -57,7 +57,7 @@ namespace cv { namespace gpu { namespace device __constant__ short cedge_disc; __constant__ short cmax_disc; - void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc) + void load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc) { cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) ); cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) ); @@ -176,7 +176,7 @@ namespace cv { namespace gpu { namespace device } template - void bilateral_filter_caller(DevMem2D_ disp, DevMem2Db img, int channels, int iters, cudaStream_t stream) + void bilateral_filter_caller(PtrStepSz disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -213,12 +213,12 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream) + void bilateral_filter_gpu(PtrStepSzb disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream) { bilateral_filter_caller(disp, img, channels, iters, stream); } - void bilateral_filter_gpu(DevMem2D_ disp, DevMem2Db img, int channels, int iters, cudaStream_t stream) + void bilateral_filter_gpu(PtrStepSz disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream) { bilateral_filter_caller(disp, img, channels, iters, stream); } diff --git a/modules/gpu/src/cuda/calib3d.cu b/modules/gpu/src/cuda/calib3d.cu index 4776c55724..5def4ad245 100644 --- a/modules/gpu/src/cuda/calib3d.cu +++ b/modules/gpu/src/cuda/calib3d.cu @@ -66,8 +66,8 @@ namespace cv { namespace gpu { namespace device } }; - void call(const DevMem2D_ src, const float* rot, - const float* transl, DevMem2D_ dst, + void call(const PtrStepSz src, const float* rot, + const float* transl, PtrStepSz dst, cudaStream_t stream) { cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3)); @@ -103,8 +103,8 @@ namespace cv { namespace gpu { namespace device } }; - void call(const DevMem2D_ src, const float* rot, - const float* transl, const float* proj, DevMem2D_ dst, + void call(const PtrStepSz src, const float* rot, + const float* transl, const float* proj, PtrStepSz dst, cudaStream_t stream) { cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3)); diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu index 076fdcc125..e92615e5f7 100644 --- a/modules/gpu/src/cuda/ccomponetns.cu +++ b/modules/gpu/src/cuda/ccomponetns.cu @@ -176,7 +176,7 @@ namespace cv { namespace gpu { namespace device template - __global__ void computeConnectivity(const DevMem2D_ image, DevMem2D components, F connected) + __global__ void computeConnectivity(const PtrStepSz image, PtrStepSzb components, F connected) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; @@ -202,7 +202,7 @@ namespace cv { namespace gpu { namespace device } template< typename T> - void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream) + void computeEdges(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream) { dim3 block(CTA_SIZE_X, CTA_SIZE_Y); dim3 grid(divUp(image.cols, block.x), divUp(image.rows, block.y)); @@ -210,23 +210,23 @@ namespace cv { namespace gpu { namespace device typedef InInterval::dist_type, IntervalsTraits::ch> Int_t; Int_t inInt(lo, hi); - computeConnectivity<<>>(static_cast >(image), edges, inInt); + computeConnectivity<<>>(static_cast >(image), edges, inInt); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - template void computeEdges (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); + template void computeEdges (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); - __global__ void lableTiles(const DevMem2D edges, DevMem2Di comps) + __global__ void lableTiles(const PtrStepSzb edges, PtrStepSzi comps) { int x = threadIdx.x + blockIdx.x * TILE_COLS; int y = threadIdx.y + blockIdx.y * TILE_ROWS; @@ -360,7 +360,7 @@ namespace cv { namespace gpu { namespace device } } - __device__ __forceinline__ int root(const DevMem2Di& comps, int label) + __device__ __forceinline__ int root(const PtrStepSzi& comps, int label) { while(1) { @@ -376,7 +376,7 @@ namespace cv { namespace gpu { namespace device return label; } - __device__ __forceinline__ void isConnected(DevMem2Di& comps, int l1, int l2, bool& changed) + __device__ __forceinline__ void isConnected(PtrStepSzi& comps, int l1, int l2, bool& changed) { int r1 = root(comps, l1); int r2 = root(comps, l2); @@ -394,7 +394,7 @@ namespace cv { namespace gpu { namespace device } __global__ void crossMerge(const int tilesNumY, const int tilesNumX, int tileSizeY, int tileSizeX, - const DevMem2D edges, DevMem2Di comps, const int yIncomplete, int xIncomplete) + const PtrStepSzb edges, PtrStepSzi comps, const int yIncomplete, int xIncomplete) { int tid = threadIdx.y * blockDim.x + threadIdx.x; int stride = blockDim.y * blockDim.x; @@ -482,7 +482,7 @@ namespace cv { namespace gpu { namespace device } while (Emulation::syncthreadsOr(changed)); } - __global__ void flatten(const DevMem2D edges, DevMem2Di comps) + __global__ void flatten(const PtrStepSzb edges, PtrStepSzi comps) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; @@ -493,7 +493,7 @@ namespace cv { namespace gpu { namespace device enum {CC_NO_COMPACT = 0, CC_COMPACT_LABELS = 1}; - void labelComponents(const DevMem2D& edges, DevMem2Di comps, int flags, cudaStream_t stream) + void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream) { dim3 block(CTA_SIZE_X, CTA_SIZE_Y); dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS)); diff --git a/modules/gpu/src/cuda/color.cu b/modules/gpu/src/cuda/color.cu index 562f3ff2c5..54925abe62 100644 --- a/modules/gpu/src/cuda/color.cu +++ b/modules/gpu/src/cuda/color.cu @@ -222,12 +222,12 @@ namespace cv { namespace gpu { namespace device }; #define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \ - void name(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream) \ + void name(const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) \ { \ traits::functor_type functor = traits::create_functor(); \ typedef typename traits::functor_type::argument_type src_t; \ typedef typename traits::functor_type::result_type dst_t; \ - cv::gpu::device::transform((DevMem2D_)src, (DevMem2D_)dst, functor, WithOutMask(), stream); \ + cv::gpu::device::transform((PtrStepSz)src, (PtrStepSz)dst, functor, WithOutMask(), stream); \ } #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \ diff --git a/modules/gpu/src/cuda/column_filter.cu b/modules/gpu/src/cuda/column_filter.cu index fb52d6e989..307e87ad34 100644 --- a/modules/gpu/src/cuda/column_filter.cu +++ b/modules/gpu/src/cuda/column_filter.cu @@ -62,7 +62,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void linearColumnFilter(const DevMem2D_ src, PtrStep dst, const int anchor, const B brd) + __global__ void linearColumnFilter(const PtrStepSz src, PtrStep dst, const int anchor, const B brd) { #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200) const int BLOCK_DIM_X = 16; @@ -125,7 +125,7 @@ namespace cv { namespace gpu { namespace device } template class B> - void linearColumnFilter_caller(DevMem2D_ src, DevMem2D_ dst, int anchor, int cc, cudaStream_t stream) + void linearColumnFilter_caller(PtrStepSz src, PtrStepSz dst, int anchor, int cc, cudaStream_t stream) { int BLOCK_DIM_X; int BLOCK_DIM_Y; @@ -158,9 +158,9 @@ namespace cv { namespace gpu { namespace device } template - void linearColumnFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream) + void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream) { - typedef void (*caller_t)(DevMem2D_ src, DevMem2D_ dst, int anchor, int cc, cudaStream_t stream); + typedef void (*caller_t)(PtrStepSz src, PtrStepSz dst, int anchor, int cc, cudaStream_t stream); static const caller_t callers[5][33] = { @@ -343,13 +343,13 @@ namespace cv { namespace gpu { namespace device loadKernel(kernel, ksize); - callers[brd_type][ksize]((DevMem2D_)src, (DevMem2D_)dst, anchor, cc, stream); + callers[brd_type][ksize]((PtrStepSz)src, (PtrStepSz)dst, anchor, cc, stream); } - template void linearColumnFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); - template void linearColumnFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); - template void linearColumnFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); - template void linearColumnFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); - template void linearColumnFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); + template void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); + template void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); + template void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); + template void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); + template void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); } // namespace column_filter }}} // namespace cv { namespace gpu { namespace device diff --git a/modules/gpu/src/cuda/copy_make_border.cu b/modules/gpu/src/cuda/copy_make_border.cu index a54a9b7db2..71859f2b77 100644 --- a/modules/gpu/src/cuda/copy_make_border.cu +++ b/modules/gpu/src/cuda/copy_make_border.cu @@ -47,7 +47,7 @@ namespace cv { namespace gpu { namespace device { namespace imgproc { - template __global__ void copyMakeBorder(const Ptr2D src, DevMem2D_ dst, int top, int left) + template __global__ void copyMakeBorder(const Ptr2D src, PtrStepSz dst, int top, int left) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; @@ -58,7 +58,7 @@ namespace cv { namespace gpu { namespace device template