diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 6c133e8ebb..baa287bca7 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -1,5 +1,5 @@ set(the_description "The Core Functionality") -ocv_add_module(core ${ZLIB_LIBRARIES}) +ocv_add_module(core ${ZLIB_LIBRARIES} OPTIONAL opencv_cudev) ocv_module_include_directories(${ZLIB_INCLUDE_DIR}) if (HAVE_WINRT) @@ -7,7 +7,7 @@ if (HAVE_WINRT) endif() if(HAVE_CUDA) - ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef) + ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wenum-compare -Wunused-function) endif() file(GLOB lib_cuda_hdrs "include/opencv2/${name}/cuda/*.hpp" "include/opencv2/${name}/cuda/*.h") diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index 637ecdf513..9644f98016 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -498,6 +498,11 @@ namespace gpu class CV_EXPORTS Event; } +namespace cudev +{ + template class GpuMat_; +} + } // cv #endif //__OPENCV_CORE_BASE_HPP__ diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index d5826a9b61..b69946939c 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -96,6 +96,7 @@ public: _InputArray(const gpu::GpuMat& d_mat); _InputArray(const ogl::Buffer& buf); _InputArray(const gpu::CudaMem& cuda_mem); + template _InputArray(const cudev::GpuMat_<_Tp>& m); virtual Mat getMat(int i=-1) const; virtual void getMatVector(std::vector& mv) const; @@ -144,6 +145,7 @@ public: _OutputArray(gpu::GpuMat& d_mat); _OutputArray(ogl::Buffer& buf); _OutputArray(gpu::CudaMem& cuda_mem); + template _OutputArray(cudev::GpuMat_<_Tp>& m); template _OutputArray(std::vector<_Tp>& vec); template _OutputArray(std::vector >& vec); template _OutputArray(std::vector >& vec); @@ -156,6 +158,7 @@ public: _OutputArray(const gpu::GpuMat& d_mat); _OutputArray(const ogl::Buffer& buf); _OutputArray(const gpu::CudaMem& cuda_mem); + template _OutputArray(const cudev::GpuMat_<_Tp>& m); template _OutputArray(const std::vector<_Tp>& vec); template _OutputArray(const std::vector >& vec); template _OutputArray(const std::vector >& vec); diff --git a/modules/core/src/cuda/gpu_mat.cu b/modules/core/src/cuda/gpu_mat.cu new file mode 100644 index 0000000000..0db1584212 --- /dev/null +++ b/modules/core/src/cuda/gpu_mat.cu @@ -0,0 +1,486 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/core/gpu.hpp" +#include "opencv2/cudev.hpp" + +using namespace cv; +using namespace cv::gpu; +using namespace cv::cudev; + +///////////////////////////////////////////////////// +/// create + +void cv::gpu::GpuMat::create(int _rows, int _cols, int _type) +{ + CV_DbgAssert( _rows >= 0 && _cols >= 0 ); + + _type &= Mat::TYPE_MASK; + + if (rows == _rows && cols == _cols && type() == _type && data) + return; + + if (data) + release(); + + if (_rows > 0 && _cols > 0) + { + flags = Mat::MAGIC_VAL + _type; + rows = _rows; + cols = _cols; + + size_t esz = elemSize(); + + void* devPtr; + + if (rows > 1 && cols > 1) + { + CV_CUDEV_SAFE_CALL( cudaMallocPitch(&devPtr, &step, esz * cols, rows) ); + } + else + { + // Single row or single column must be continuous + CV_CUDEV_SAFE_CALL( cudaMalloc(&devPtr, esz * cols * rows) ); + step = esz * cols; + } + + if (esz * cols == step) + flags |= Mat::CONTINUOUS_FLAG; + + int64 _nettosize = static_cast(step) * rows; + size_t nettosize = static_cast(_nettosize); + + datastart = data = static_cast(devPtr); + dataend = data + nettosize; + + refcount = static_cast(fastMalloc(sizeof(*refcount))); + *refcount = 1; + } +} + +///////////////////////////////////////////////////// +/// release + +void cv::gpu::GpuMat::release() +{ + if (refcount && CV_XADD(refcount, -1) == 1) + { + cudaFree(datastart); + fastFree(refcount); + } + + data = datastart = dataend = 0; + step = rows = cols = 0; + refcount = 0; +} + +///////////////////////////////////////////////////// +/// upload + +void cv::gpu::GpuMat::upload(InputArray arr) +{ + Mat mat = arr.getMat(); + + CV_DbgAssert( !mat.empty() ); + + create(mat.size(), mat.type()); + + CV_CUDEV_SAFE_CALL( cudaMemcpy2D(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) ); +} + +void cv::gpu::GpuMat::upload(InputArray arr, Stream& _stream) +{ + Mat mat = arr.getMat(); + + CV_DbgAssert( !mat.empty() ); + + create(mat.size(), mat.type()); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + CV_CUDEV_SAFE_CALL( cudaMemcpy2DAsync(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice, stream) ); +} + +///////////////////////////////////////////////////// +/// download + +void cv::gpu::GpuMat::download(OutputArray _dst) const +{ + CV_DbgAssert( !empty() ); + + _dst.create(size(), type()); + Mat dst = _dst.getMat(); + + CV_CUDEV_SAFE_CALL( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) ); +} + +void cv::gpu::GpuMat::download(OutputArray _dst, Stream& _stream) const +{ + CV_DbgAssert( !empty() ); + + _dst.create(size(), type()); + Mat dst = _dst.getMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + CV_CUDEV_SAFE_CALL( cudaMemcpy2DAsync(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost, stream) ); +} + +///////////////////////////////////////////////////// +/// copyTo + +void cv::gpu::GpuMat::copyTo(OutputArray _dst) const +{ + CV_DbgAssert( !empty() ); + + _dst.create(size(), type()); + GpuMat dst = _dst.getGpuMat(); + + CV_CUDEV_SAFE_CALL( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) ); +} + +void cv::gpu::GpuMat::copyTo(OutputArray _dst, Stream& _stream) const +{ + CV_DbgAssert( !empty() ); + + _dst.create(size(), type()); + GpuMat dst = _dst.getGpuMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + CV_CUDEV_SAFE_CALL( cudaMemcpy2DAsync(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice, stream) ); +} + +namespace +{ + template struct CopyToPolicy : DefaultTransformPolicy + { + }; + template <> struct CopyToPolicy<4> : DefaultTransformPolicy + { + enum { + shift = 2 + }; + }; + template <> struct CopyToPolicy<8> : DefaultTransformPolicy + { + enum { + shift = 1 + }; + }; + + template + void copyWithMask(const GpuMat& src, const GpuMat& dst, const GpuMat& mask, Stream& stream) + { + gridTransform_< CopyToPolicy::elem_type)> >(globPtr(src), globPtr(dst), identity(), globPtr(mask), stream); + } +} + +void cv::gpu::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& stream) const +{ + CV_DbgAssert( !empty() ); + CV_DbgAssert( depth() <= CV_64F && channels() <= 4 ); + + GpuMat mask = _mask.getGpuMat(); + CV_DbgAssert( size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == channels()) ); + + _dst.create(size(), type()); + GpuMat dst = _dst.getGpuMat(); + + typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, const GpuMat& mask, Stream& stream); + static const func_t funcs[9][4] = + { + {0,0,0,0}, + {copyWithMask, copyWithMask, copyWithMask, copyWithMask}, + {copyWithMask, copyWithMask, copyWithMask, copyWithMask}, + {0,0,0,0}, + {copyWithMask, copyWithMask, copyWithMask, copyWithMask}, + {0,0,0,0}, + {0,0,0,0}, + {0,0,0,0}, + {copyWithMask, copyWithMask, copyWithMask, copyWithMask} + }; + + if (mask.channels() == channels()) + { + const func_t func = funcs[elemSize1()][0]; + CV_DbgAssert( func != 0 ); + func(reshape(1), dst.reshape(1), mask.reshape(1), stream); + } + else + { + const func_t func = funcs[elemSize1()][channels() - 1]; + CV_DbgAssert( func != 0 ); + func(*this, dst, mask, stream); + } +} + +///////////////////////////////////////////////////// +/// setTo + +namespace +{ + template + void setToWithOutMask(const GpuMat& mat, Scalar _scalar, Stream& stream) + { + Scalar_::elem_type> scalar = _scalar; + gridTransform(constantPtr(VecTraits::make(scalar.val), mat.rows, mat.cols), globPtr(mat), identity(), stream); + } + + template + void setToWithMask(const GpuMat& mat, const GpuMat& mask, Scalar _scalar, Stream& stream) + { + Scalar_::elem_type> scalar = _scalar; + gridTransform(constantPtr(VecTraits::make(scalar.val), mat.rows, mat.cols), globPtr(mat), identity(), globPtr(mask), stream); + } +} + +GpuMat& cv::gpu::GpuMat::setTo(Scalar value, Stream& stream) +{ + CV_DbgAssert( !empty() ); + CV_DbgAssert( depth() <= CV_64F && channels() <= 4 ); + + if (value[0] == 0.0 && value[1] == 0.0 && value[2] == 0.0 && value[3] == 0.0) + { + // Zero fill + + if (stream) + CV_CUDEV_SAFE_CALL( cudaMemset2DAsync(data, step, 0, cols * elemSize(), rows, StreamAccessor::getStream(stream)) ); + else + CV_CUDEV_SAFE_CALL( cudaMemset2D(data, step, 0, cols * elemSize(), rows) ); + + return *this; + } + + if (depth() == CV_8U) + { + const int cn = channels(); + + if (cn == 1 + || (cn == 2 && value[0] == value[1]) + || (cn == 3 && value[0] == value[1] && value[0] == value[2]) + || (cn == 4 && value[0] == value[1] && value[0] == value[2] && value[0] == value[3])) + { + const int val = cv::saturate_cast(value[0]); + + if (stream) + CV_CUDEV_SAFE_CALL( cudaMemset2DAsync(data, step, val, cols * elemSize(), rows, StreamAccessor::getStream(stream)) ); + else + CV_CUDEV_SAFE_CALL( cudaMemset2D(data, step, val, cols * elemSize(), rows) ); + + return *this; + } + } + + typedef void (*func_t)(const GpuMat& mat, Scalar scalar, Stream& stream); + static const func_t funcs[7][4] = + { + {setToWithOutMask,setToWithOutMask,setToWithOutMask,setToWithOutMask}, + {setToWithOutMask,setToWithOutMask,setToWithOutMask,setToWithOutMask}, + {setToWithOutMask,setToWithOutMask,setToWithOutMask,setToWithOutMask}, + {setToWithOutMask,setToWithOutMask,setToWithOutMask,setToWithOutMask}, + {setToWithOutMask,setToWithOutMask,setToWithOutMask,setToWithOutMask}, + {setToWithOutMask,setToWithOutMask,setToWithOutMask,setToWithOutMask}, + {setToWithOutMask,setToWithOutMask,setToWithOutMask,setToWithOutMask} + }; + + funcs[depth()][channels() - 1](*this, value, stream); + + return *this; +} + +GpuMat& cv::gpu::GpuMat::setTo(Scalar value, InputArray _mask, Stream& stream) +{ + CV_DbgAssert( !empty() ); + CV_DbgAssert( depth() <= CV_64F && channels() <= 4 ); + + GpuMat mask = _mask.getGpuMat(); + + CV_DbgAssert( size() == mask.size() && mask.type() == CV_8UC1 ); + + typedef void (*func_t)(const GpuMat& mat, const GpuMat& mask, Scalar scalar, Stream& stream); + static const func_t funcs[7][4] = + { + {setToWithMask,setToWithMask,setToWithMask,setToWithMask}, + {setToWithMask,setToWithMask,setToWithMask,setToWithMask}, + {setToWithMask,setToWithMask,setToWithMask,setToWithMask}, + {setToWithMask,setToWithMask,setToWithMask,setToWithMask}, + {setToWithMask,setToWithMask,setToWithMask,setToWithMask}, + {setToWithMask,setToWithMask,setToWithMask,setToWithMask}, + {setToWithMask,setToWithMask,setToWithMask,setToWithMask} + }; + + funcs[depth()][channels() - 1](*this, mask, value, stream); + + return *this; +} + +///////////////////////////////////////////////////// +/// convertTo + +namespace +{ + template struct ConvertToPolicy : DefaultTransformPolicy + { + }; + template <> struct ConvertToPolicy : DefaultTransformPolicy + { + enum { + shift = 1 + }; + }; + + template + void convertToNoScale(const GpuMat& src, const GpuMat& dst, Stream& stream) + { + typedef typename VecTraits::elem_type src_elem_type; + typedef typename VecTraits::elem_type dst_elem_type; + typedef typename LargerType::type larger_elem_type; + typedef typename LargerType::type scalar_type; + + gridTransform_< ConvertToPolicy >(globPtr(src), globPtr(dst), saturate_cast_func(), stream); + } + + template struct Convertor : unary_function + { + S alpha; + S beta; + + __device__ __forceinline__ D operator ()(typename TypeTraits::parameter_type src) const + { + return cudev::saturate_cast(alpha * src + beta); + } + }; + + template + void convertToScale(const GpuMat& src, const GpuMat& dst, double alpha, double beta, Stream& stream) + { + typedef typename VecTraits::elem_type src_elem_type; + typedef typename VecTraits::elem_type dst_elem_type; + typedef typename LargerType::type larger_elem_type; + typedef typename LargerType::type scalar_type; + + Convertor op; + op.alpha = cv::saturate_cast(alpha); + op.beta = cv::saturate_cast(beta); + + gridTransform_< ConvertToPolicy >(globPtr(src), globPtr(dst), op, stream); + } +} + +void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& stream) const +{ + if (rtype < 0) + rtype = type(); + else + rtype = CV_MAKE_TYPE(CV_MAT_DEPTH(rtype), channels()); + + const int sdepth = depth(); + const int ddepth = CV_MAT_DEPTH(rtype); + if (sdepth == ddepth) + { + if (stream) + copyTo(_dst, stream); + else + copyTo(_dst); + + return; + } + + CV_DbgAssert( sdepth <= CV_64F && ddepth <= CV_64F ); + + GpuMat src = *this; + + _dst.create(size(), rtype); + GpuMat dst = _dst.getGpuMat(); + + typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream); + static const func_t funcs[7][7] = + { + {0, convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale}, + {convertToNoScale, 0, convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale}, + {convertToNoScale, convertToNoScale, 0, convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale}, + {convertToNoScale, convertToNoScale, convertToNoScale, 0, convertToNoScale, convertToNoScale, convertToNoScale}, + {convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale, 0, convertToNoScale, convertToNoScale}, + {convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale, 0, convertToNoScale}, + {convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale, convertToNoScale, 0} + }; + + funcs[sdepth][ddepth](reshape(1), dst.reshape(1), stream); +} + +void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& stream) const +{ + if (rtype < 0) + rtype = type(); + else + rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); + + const int sdepth = depth(); + const int ddepth = CV_MAT_DEPTH(rtype); + + GpuMat src = *this; + + _dst.create(size(), rtype); + GpuMat dst = _dst.getGpuMat(); + + typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, double alpha, double beta, Stream& stream); + static const func_t funcs[7][7] = + { + {convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale}, + {convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale}, + {convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale}, + {convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale}, + {convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale}, + {convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale}, + {convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale, convertToScale} + }; + + funcs[sdepth][ddepth](reshape(1), dst.reshape(1), alpha, beta, stream); +} + +#endif diff --git a/modules/core/src/cuda/matrix_operations.cu b/modules/core/src/cuda/matrix_operations.cu deleted file mode 100644 index 7de5205ec6..0000000000 --- a/modules/core/src/cuda/matrix_operations.cu +++ /dev/null @@ -1,296 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "opencv2/core/cuda/saturate_cast.hpp" -#include "opencv2/core/cuda/transform.hpp" -#include "opencv2/core/cuda/functional.hpp" -#include "opencv2/core/cuda/type_traits.hpp" -#include "opencv2/core/cuda/vec_traits.hpp" - -#include "matrix_operations.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - /////////////////////////////////////////////////////////////////////////// - // copyWithMask - - template - void copyWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream) - { - if (multiChannelMask) - cv::gpu::cudev::transform((PtrStepSz) src, (PtrStepSz) dst, identity(), SingleMask(mask), stream); - else - cv::gpu::cudev::transform((PtrStepSz) src, (PtrStepSz) dst, identity(), SingleMaskChannels(mask, cn), stream); - } - - void copyWithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream) - { - typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream); - - static const func_t tab[] = - { - 0, - copyWithMask, - copyWithMask, - 0, - copyWithMask, - 0, - 0, - 0, - copyWithMask - }; - - const func_t func = tab[elemSize1]; - CV_DbgAssert( func != 0 ); - - func(src, dst, cn, mask, multiChannelMask, stream); - } - - /////////////////////////////////////////////////////////////////////////// - // set - - template - __global__ void set(PtrStepSz mat, const Mask mask, const int channels, const typename TypeVec::vec_type value) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= mat.cols * channels || y >= mat.rows) - return; - - const T scalar[4] = {value.x, value.y, value.z, value.w}; - - if (mask(y, x / channels)) - mat(y, x) = scalar[x % channels]; - } - - template - void set(PtrStepSz mat, const T* scalar, int channels, cudaStream_t stream) - { - typedef typename TypeVec::vec_type scalar_t; - - dim3 block(32, 8); - dim3 grid(divUp(mat.cols * channels, block.x), divUp(mat.rows, block.y)); - - set<<>>(mat, WithOutMask(), channels, VecTraits::make(scalar)); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall ( cudaDeviceSynchronize() ); - } - - template void set(PtrStepSz mat, const uchar* scalar, int channels, cudaStream_t stream); - template void set(PtrStepSz mat, const schar* scalar, int channels, cudaStream_t stream); - template void set(PtrStepSz mat, const ushort* scalar, int channels, cudaStream_t stream); - template void set(PtrStepSz mat, const short* scalar, int channels, cudaStream_t stream); - template void set(PtrStepSz mat, const int* scalar, int channels, cudaStream_t stream); - template void set(PtrStepSz mat, const float* scalar, int channels, cudaStream_t stream); - template void set(PtrStepSz mat, const double* scalar, int channels, cudaStream_t stream); - - template - void set(PtrStepSz mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream) - { - typedef typename TypeVec::vec_type scalar_t; - - dim3 block(32, 8); - dim3 grid(divUp(mat.cols * channels, block.x), divUp(mat.rows, block.y)); - - set<<>>(mat, SingleMask(mask), channels, VecTraits::make(scalar)); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall ( cudaDeviceSynchronize() ); - } - - template void set(PtrStepSz mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); - template void set(PtrStepSz mat, const schar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); - template void set(PtrStepSz mat, const ushort* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); - template void set(PtrStepSz mat, const short* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); - template void set(PtrStepSz mat, const int* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); - template void set(PtrStepSz mat, const float* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); - template void set(PtrStepSz mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); - - /////////////////////////////////////////////////////////////////////////// - // convert - - template struct Convertor : unary_function - { - Convertor(S alpha_, S beta_) : alpha(alpha_), beta(beta_) {} - - __device__ __forceinline__ D operator()(typename TypeTraits::ParameterType src) const - { - return saturate_cast(alpha * src + beta); - } - - S alpha, beta; - }; - - namespace detail - { - template struct ConvertTraitsDispatcher : DefaultTransformFunctorTraits - { - }; - template struct ConvertTraitsDispatcher<1, 1, F> : DefaultTransformFunctorTraits - { - enum { smart_shift = 8 }; - }; - template struct ConvertTraitsDispatcher<1, 2, F> : DefaultTransformFunctorTraits - { - enum { smart_shift = 4 }; - }; - template struct ConvertTraitsDispatcher<1, 4, F> : DefaultTransformFunctorTraits - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - template struct ConvertTraitsDispatcher<2, 2, F> : DefaultTransformFunctorTraits - { - enum { smart_shift = 4 }; - }; - template struct ConvertTraitsDispatcher<2, 4, F> : DefaultTransformFunctorTraits - { - enum { smart_shift = 2 }; - }; - - template struct ConvertTraitsDispatcher<4, 2, F> : DefaultTransformFunctorTraits - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template struct ConvertTraitsDispatcher<4, 4, F> : DefaultTransformFunctorTraits - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 2 }; - }; - - template struct ConvertTraits : ConvertTraitsDispatcher - { - }; - } - - template struct TransformFunctorTraits< Convertor > : detail::ConvertTraits< Convertor > - { - }; - - template - void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream) - { - Convertor op(static_cast(alpha), static_cast(beta)); - cv::gpu::cudev::transform((PtrStepSz)src, (PtrStepSz)dst, op, WithOutMask(), stream); - } - - void convert(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream) - { - typedef void (*caller_t)(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream); - - static const caller_t tab[7][7] = - { - { - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_ - }, - { - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_ - }, - { - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_ - }, - { - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_ - }, - { - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_ - }, - { - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_ - }, - { - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_, - cvt_ - } - }; - - const caller_t func = tab[sdepth][ddepth]; - func(src, dst, alpha, beta, stream); - } -}}} // namespace cv { namespace gpu { namespace cudev diff --git a/modules/core/src/cuda/matrix_operations.hpp b/modules/core/src/cuda/matrix_operations.hpp deleted file mode 100644 index 4e451061b8..0000000000 --- a/modules/core/src/cuda/matrix_operations.hpp +++ /dev/null @@ -1,57 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "opencv2/core/cuda/common.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - void copyWithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream); - - template - void set(PtrStepSz mat, const T* scalar, int channels, cudaStream_t stream); - - template - void set(PtrStepSz mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); - - void convert(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream); -}}} diff --git a/modules/core/src/gpu_mat.cpp b/modules/core/src/gpu_mat.cpp index a2e8da65a8..33a6046fed 100644 --- a/modules/core/src/gpu_mat.cpp +++ b/modules/core/src/gpu_mat.cpp @@ -46,504 +46,6 @@ using namespace cv; using namespace cv::gpu; -/////////////////////////// matrix operations ///////////////////////// - -#ifdef HAVE_CUDA - -// CUDA implementation - -#include "cuda/matrix_operations.hpp" - -namespace -{ - template void cudaSet_(GpuMat& src, Scalar s, cudaStream_t stream) - { - Scalar_ sf = s; - cudev::set(PtrStepSz(src), sf.val, src.channels(), stream); - } - - void cudaSet(GpuMat& src, Scalar s, cudaStream_t stream) - { - typedef void (*func_t)(GpuMat& src, Scalar s, cudaStream_t stream); - static const func_t funcs[] = - { - cudaSet_, - cudaSet_, - cudaSet_, - cudaSet_, - cudaSet_, - cudaSet_, - cudaSet_ - }; - - funcs[src.depth()](src, s, stream); - } - - template void cudaSet_(GpuMat& src, Scalar s, PtrStepSzb mask, cudaStream_t stream) - { - Scalar_ sf = s; - cudev::set(PtrStepSz(src), sf.val, mask, src.channels(), stream); - } - - void cudaSet(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream) - { - typedef void (*func_t)(GpuMat& src, Scalar s, PtrStepSzb mask, cudaStream_t stream); - static const func_t funcs[] = - { - cudaSet_, - cudaSet_, - cudaSet_, - cudaSet_, - cudaSet_, - cudaSet_, - cudaSet_ - }; - - funcs[src.depth()](src, s, mask, stream); - } - - void cudaCopyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) - { - cudev::copyWithMask(src.reshape(1), dst.reshape(1), src.elemSize1(), src.channels(), mask.reshape(1), mask.channels() != 1, stream); - } - - void cudaConvert(const GpuMat& src, GpuMat& dst, cudaStream_t stream) - { - cudev::convert(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, stream); - } - - void cudaConvert(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream) - { - cudev::convert(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream); - } -} - -// NPP implementation - -namespace -{ - ////////////////////////////////////////////////////////////////////////// - // Convert - - template struct NppConvertFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - typedef typename NPPTypeTraits::npp_type dst_t; - - typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI); - }; - template struct NppConvertFunc - { - typedef typename NPPTypeTraits::npp_type dst_t; - - typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode); - }; - - template::func_ptr func> struct NppCvt - { - typedef typename NPPTypeTraits::npp_type src_t; - typedef typename NPPTypeTraits::npp_type dst_t; - - static void call(const GpuMat& src, GpuMat& dst, cudaStream_t stream) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - template::func_ptr func> struct NppCvt - { - typedef typename NPPTypeTraits::npp_type dst_t; - - static void call(const GpuMat& src, GpuMat& dst, cudaStream_t stream) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, NPP_RND_NEAR) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - - ////////////////////////////////////////////////////////////////////////// - // Set - - template struct NppSetFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI); - }; - template struct NppSetFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI); - }; - template struct NppSetFunc - { - typedef NppStatus (*func_ptr)(Npp8s values[], Npp8s* pSrc, int nSrcStep, NppiSize oSizeROI); - }; - template<> struct NppSetFunc - { - typedef NppStatus (*func_ptr)(Npp8s val, Npp8s* pSrc, int nSrcStep, NppiSize oSizeROI); - }; - - template::func_ptr func> struct NppSet - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void call(GpuMat& src, Scalar s, cudaStream_t stream) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - Scalar_ nppS = s; - - NppStreamHandler h(stream); - - nppSafeCall( func(nppS.val, src.ptr(), static_cast(src.step), sz) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - template::func_ptr func> struct NppSet - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void call(GpuMat& src, Scalar s, cudaStream_t stream) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - Scalar_ nppS = s; - - NppStreamHandler h(stream); - - nppSafeCall( func(nppS[0], src.ptr(), static_cast(src.step), sz) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - - template struct NppSetMaskFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); - }; - template struct NppSetMaskFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); - }; - - template::func_ptr func> struct NppSetMask - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void call(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - Scalar_ nppS = s; - - NppStreamHandler h(stream); - - nppSafeCall( func(nppS.val, src.ptr(), static_cast(src.step), sz, mask.ptr(), static_cast(mask.step)) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - template::func_ptr func> struct NppSetMask - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void call(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - Scalar_ nppS = s; - - NppStreamHandler h(stream); - - nppSafeCall( func(nppS[0], src.ptr(), static_cast(src.step), sz, mask.ptr(), static_cast(mask.step)) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - - ////////////////////////////////////////////////////////////////////////// - // CopyMasked - - template struct NppCopyWithMaskFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, src_t* pDst, int nDstStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); - }; - - template::func_ptr func> struct NppCopyWithMask - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void call(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, mask.ptr(), static_cast(mask.step)) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; -} - -// Dispatcher - -namespace -{ - void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0) - { - CV_DbgAssert( src.size() == dst.size() && src.type() == dst.type() ); - - CV_Assert( src.depth() <= CV_64F && src.channels() <= 4 ); - CV_Assert( src.size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == src.channels()) ); - - if (src.depth() == CV_64F) - { - CV_Assert( deviceSupports(NATIVE_DOUBLE) ); - } - - typedef void (*func_t)(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream); - static const func_t funcs[7][4] = - { - /* 8U */ {NppCopyWithMask::call, cudaCopyWithMask, NppCopyWithMask::call, NppCopyWithMask::call}, - /* 8S */ {cudaCopyWithMask , cudaCopyWithMask, cudaCopyWithMask , cudaCopyWithMask }, - /* 16U */ {NppCopyWithMask::call, cudaCopyWithMask, NppCopyWithMask::call, NppCopyWithMask::call}, - /* 16S */ {NppCopyWithMask::call, cudaCopyWithMask, NppCopyWithMask::call, NppCopyWithMask::call}, - /* 32S */ {NppCopyWithMask::call, cudaCopyWithMask, NppCopyWithMask::call, NppCopyWithMask::call}, - /* 32F */ {NppCopyWithMask::call, cudaCopyWithMask, NppCopyWithMask::call, NppCopyWithMask::call}, - /* 64F */ {cudaCopyWithMask , cudaCopyWithMask, cudaCopyWithMask , cudaCopyWithMask } - }; - - const func_t func = mask.channels() == src.channels() ? funcs[src.depth()][src.channels() - 1] : cudaCopyWithMask; - - func(src, dst, mask, stream); - } - - void convert(const GpuMat& src, GpuMat& dst, cudaStream_t stream = 0) - { - CV_DbgAssert( src.size() == dst.size() && src.channels() == dst.channels() ); - - CV_Assert( src.depth() <= CV_64F && src.channels() <= 4 ); - CV_Assert( dst.depth() <= CV_64F ); - - if (src.depth() == CV_64F || dst.depth() == CV_64F) - { - CV_Assert( deviceSupports(NATIVE_DOUBLE) ); - } - - typedef void (*func_t)(const GpuMat& src, GpuMat& dst, cudaStream_t stream); - static const func_t funcs[7][7][4] = - { - { - /* 8U -> 8U */ {0, 0, 0, 0}, - /* 8U -> 8S */ {cudaConvert , cudaConvert, cudaConvert, cudaConvert }, - /* 8U -> 16U */ {NppCvt::call, cudaConvert, cudaConvert, NppCvt::call}, - /* 8U -> 16S */ {NppCvt::call, cudaConvert, cudaConvert, NppCvt::call}, - /* 8U -> 32S */ {cudaConvert , cudaConvert, cudaConvert, cudaConvert }, - /* 8U -> 32F */ {NppCvt::call, cudaConvert, cudaConvert, cudaConvert }, - /* 8U -> 64F */ {cudaConvert , cudaConvert, cudaConvert, cudaConvert } - }, - { - /* 8S -> 8U */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 8S -> 8S */ {0,0,0,0}, - /* 8S -> 16U */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 8S -> 16S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 8S -> 32S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 8S -> 32F */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 8S -> 64F */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert} - }, - { - /* 16U -> 8U */ {NppCvt::call, cudaConvert, cudaConvert, NppCvt::call}, - /* 16U -> 8S */ {cudaConvert , cudaConvert, cudaConvert, cudaConvert }, - /* 16U -> 16U */ {0,0,0,0}, - /* 16U -> 16S */ {cudaConvert , cudaConvert, cudaConvert, cudaConvert }, - /* 16U -> 32S */ {NppCvt::call, cudaConvert, cudaConvert, cudaConvert }, - /* 16U -> 32F */ {NppCvt::call, cudaConvert, cudaConvert, cudaConvert }, - /* 16U -> 64F */ {cudaConvert , cudaConvert, cudaConvert, cudaConvert } - }, - { - /* 16S -> 8U */ {NppCvt::call, cudaConvert, cudaConvert, NppCvt::call}, - /* 16S -> 8S */ {cudaConvert , cudaConvert, cudaConvert, cudaConvert }, - /* 16S -> 16U */ {cudaConvert , cudaConvert, cudaConvert, cudaConvert }, - /* 16S -> 16S */ {0,0,0,0}, - /* 16S -> 32S */ {NppCvt::call, cudaConvert, cudaConvert, cudaConvert }, - /* 16S -> 32F */ {NppCvt::call, cudaConvert, cudaConvert, cudaConvert }, - /* 16S -> 64F */ {cudaConvert , cudaConvert, cudaConvert, cudaConvert } - }, - { - /* 32S -> 8U */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 32S -> 8S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 32S -> 16U */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 32S -> 16S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 32S -> 32S */ {0,0,0,0}, - /* 32S -> 32F */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 32S -> 64F */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert} - }, - { - /* 32F -> 8U */ {NppCvt::call, cudaConvert, cudaConvert, cudaConvert}, - /* 32F -> 8S */ {cudaConvert , cudaConvert, cudaConvert, cudaConvert}, - /* 32F -> 16U */ {NppCvt::call, cudaConvert, cudaConvert, cudaConvert}, - /* 32F -> 16S */ {NppCvt::call, cudaConvert, cudaConvert, cudaConvert}, - /* 32F -> 32S */ {cudaConvert , cudaConvert, cudaConvert, cudaConvert}, - /* 32F -> 32F */ {0,0,0,0}, - /* 32F -> 64F */ {cudaConvert , cudaConvert, cudaConvert, cudaConvert} - }, - { - /* 64F -> 8U */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 64F -> 8S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 64F -> 16U */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 64F -> 16S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 64F -> 32S */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 64F -> 32F */ {cudaConvert, cudaConvert, cudaConvert, cudaConvert}, - /* 64F -> 64F */ {0,0,0,0} - } - }; - - const bool aligned = isAligned(src.data, 16) && isAligned(dst.data, 16); - if (!aligned) - { - cudaConvert(src, dst, stream); - return; - } - - const func_t func = funcs[src.depth()][dst.depth()][src.channels() - 1]; - CV_DbgAssert( func != 0 ); - - func(src, dst, stream); - } - - void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0) - { - CV_DbgAssert( src.size() == dst.size() && src.channels() == dst.channels() ); - - CV_Assert( src.depth() <= CV_64F && src.channels() <= 4 ); - CV_Assert( dst.depth() <= CV_64F ); - - if (src.depth() == CV_64F || dst.depth() == CV_64F) - { - CV_Assert( deviceSupports(NATIVE_DOUBLE) ); - } - - cudaConvert(src, dst, alpha, beta, stream); - } - - void set(GpuMat& m, Scalar s, cudaStream_t stream = 0) - { - if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) - { - if (stream) - cudaSafeCall( cudaMemset2DAsync(m.data, m.step, 0, m.cols * m.elemSize(), m.rows, stream) ); - else - cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) ); - return; - } - - if (m.depth() == CV_8U) - { - int cn = m.channels(); - - if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) - { - int val = saturate_cast(s[0]); - if (stream) - cudaSafeCall( cudaMemset2DAsync(m.data, m.step, val, m.cols * m.elemSize(), m.rows, stream) ); - else - cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) ); - return; - } - } - - typedef void (*func_t)(GpuMat& src, Scalar s, cudaStream_t stream); - static const func_t funcs[7][4] = - { - {NppSet::call, cudaSet , cudaSet , NppSet::call}, - {NppSet::call, NppSet::call, NppSet::call, NppSet::call}, - {NppSet::call, NppSet::call, cudaSet , NppSet::call}, - {NppSet::call, NppSet::call, cudaSet , NppSet::call}, - {NppSet::call, cudaSet , cudaSet , NppSet::call}, - {NppSet::call, cudaSet , cudaSet , NppSet::call}, - {cudaSet , cudaSet , cudaSet , cudaSet } - }; - - CV_Assert( m.depth() <= CV_64F && m.channels() <= 4 ); - - if (m.depth() == CV_64F) - { - CV_Assert( deviceSupports(NATIVE_DOUBLE) ); - } - - funcs[m.depth()][m.channels() - 1](m, s, stream); - } - - void set(GpuMat& m, Scalar s, const GpuMat& mask, cudaStream_t stream = 0) - { - CV_DbgAssert( !mask.empty() ); - - CV_Assert( m.depth() <= CV_64F && m.channels() <= 4 ); - - if (m.depth() == CV_64F) - { - CV_Assert( deviceSupports(NATIVE_DOUBLE) ); - } - - typedef void (*func_t)(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream); - static const func_t funcs[7][4] = - { - {NppSetMask::call, cudaSet, cudaSet, NppSetMask::call}, - {cudaSet , cudaSet, cudaSet, cudaSet }, - {NppSetMask::call, cudaSet, cudaSet, NppSetMask::call}, - {NppSetMask::call, cudaSet, cudaSet, NppSetMask::call}, - {NppSetMask::call, cudaSet, cudaSet, NppSetMask::call}, - {NppSetMask::call, cudaSet, cudaSet, NppSetMask::call}, - {cudaSet , cudaSet, cudaSet, cudaSet } - }; - - funcs[m.depth()][m.channels() - 1](m, s, mask, stream); - } -} - -#endif // HAVE_CUDA - cv::gpu::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) : flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(rows_), cols(cols_), step(step_), data((uchar*)data_), refcount(0), @@ -651,288 +153,6 @@ cv::gpu::GpuMat::GpuMat(const GpuMat& m, Rect roi) : rows = cols = 0; } -void cv::gpu::GpuMat::create(int _rows, int _cols, int _type) -{ -#ifndef HAVE_CUDA - (void) _rows; - (void) _cols; - (void) _type; - throw_no_cuda(); -#else - _type &= Mat::TYPE_MASK; - - if (rows == _rows && cols == _cols && type() == _type && data) - return; - - if (data) - release(); - - CV_DbgAssert( _rows >= 0 && _cols >= 0 ); - - if (_rows > 0 && _cols > 0) - { - flags = Mat::MAGIC_VAL + _type; - rows = _rows; - cols = _cols; - - size_t esz = elemSize(); - - void* devPtr; - - if (rows > 1 && cols > 1) - { - cudaSafeCall( cudaMallocPitch(&devPtr, &step, esz * cols, rows) ); - } - else - { - // Single row or single column must be continuous - cudaSafeCall( cudaMalloc(&devPtr, esz * cols * rows) ); - step = esz * cols; - } - - if (esz * cols == step) - flags |= Mat::CONTINUOUS_FLAG; - - int64 _nettosize = static_cast(step) * rows; - size_t nettosize = static_cast(_nettosize); - - datastart = data = static_cast(devPtr); - dataend = data + nettosize; - - refcount = static_cast(fastMalloc(sizeof(*refcount))); - *refcount = 1; - } -#endif -} - -void cv::gpu::GpuMat::release() -{ -#ifdef HAVE_CUDA - if (refcount && CV_XADD(refcount, -1) == 1) - { - cudaFree(datastart); - fastFree(refcount); - } - - data = datastart = dataend = 0; - step = rows = cols = 0; - refcount = 0; -#endif -} - -void cv::gpu::GpuMat::upload(InputArray arr) -{ -#ifndef HAVE_CUDA - (void) arr; - throw_no_cuda(); -#else - Mat mat = arr.getMat(); - - CV_DbgAssert( !mat.empty() ); - - create(mat.size(), mat.type()); - - cudaSafeCall( cudaMemcpy2D(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) ); -#endif -} - -void cv::gpu::GpuMat::upload(InputArray arr, Stream& _stream) -{ -#ifndef HAVE_CUDA - (void) arr; - (void) _stream; - throw_no_cuda(); -#else - Mat mat = arr.getMat(); - - CV_DbgAssert( !mat.empty() ); - - create(mat.size(), mat.type()); - - cudaStream_t stream = StreamAccessor::getStream(_stream); - cudaSafeCall( cudaMemcpy2DAsync(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice, stream) ); -#endif -} - -void cv::gpu::GpuMat::download(OutputArray _dst) const -{ -#ifndef HAVE_CUDA - (void) _dst; - throw_no_cuda(); -#else - CV_DbgAssert( !empty() ); - - _dst.create(size(), type()); - Mat dst = _dst.getMat(); - - cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) ); -#endif -} - -void cv::gpu::GpuMat::download(OutputArray _dst, Stream& _stream) const -{ -#ifndef HAVE_CUDA - (void) _dst; - (void) _stream; - throw_no_cuda(); -#else - CV_DbgAssert( !empty() ); - - _dst.create(size(), type()); - Mat dst = _dst.getMat(); - - cudaStream_t stream = StreamAccessor::getStream(_stream); - cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost, stream) ); -#endif -} - -void cv::gpu::GpuMat::copyTo(OutputArray _dst) const -{ -#ifndef HAVE_CUDA - (void) _dst; - throw_no_cuda(); -#else - CV_DbgAssert( !empty() ); - - _dst.create(size(), type()); - GpuMat dst = _dst.getGpuMat(); - - cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) ); -#endif -} - -void cv::gpu::GpuMat::copyTo(OutputArray _dst, Stream& _stream) const -{ -#ifndef HAVE_CUDA - (void) _dst; - (void) _stream; - throw_no_cuda(); -#else - CV_DbgAssert( !empty() ); - - _dst.create(size(), type()); - GpuMat dst = _dst.getGpuMat(); - - cudaStream_t stream = StreamAccessor::getStream(_stream); - cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice, stream) ); -#endif -} - -void cv::gpu::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& _stream) const -{ -#ifndef HAVE_CUDA - (void) _dst; - (void) _mask; - (void) _stream; - throw_no_cuda(); -#else - CV_DbgAssert( !empty() ); - - _dst.create(size(), type()); - GpuMat dst = _dst.getGpuMat(); - - GpuMat mask = _mask.getGpuMat(); - - cudaStream_t stream = StreamAccessor::getStream(_stream); - ::copyWithMask(*this, dst, mask, stream); -#endif -} - -GpuMat& cv::gpu::GpuMat::setTo(Scalar s, Stream& _stream) -{ -#ifndef HAVE_CUDA - (void) s; - (void) _stream; - throw_no_cuda(); -#else - CV_DbgAssert( !empty() ); - - cudaStream_t stream = StreamAccessor::getStream(_stream); - ::set(*this, s, stream); -#endif - - return *this; -} - -GpuMat& cv::gpu::GpuMat::setTo(Scalar s, InputArray _mask, Stream& _stream) -{ -#ifndef HAVE_CUDA - (void) s; - (void) _mask; - (void) _stream; - throw_no_cuda(); -#else - CV_DbgAssert( !empty() ); - - GpuMat mask = _mask.getGpuMat(); - - cudaStream_t stream = StreamAccessor::getStream(_stream); - ::set(*this, s, mask, stream); -#endif - - return *this; -} - -void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& _stream) const -{ -#ifndef HAVE_CUDA - (void) _dst; - (void) rtype; - (void) _stream; - throw_no_cuda(); -#else - if (rtype < 0) - rtype = type(); - else - rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); - - const int sdepth = depth(); - const int ddepth = CV_MAT_DEPTH(rtype); - if (sdepth == ddepth) - { - if (_stream) - copyTo(_dst, _stream); - else - copyTo(_dst); - - return; - } - - GpuMat src = *this; - - _dst.create(size(), rtype); - GpuMat dst = _dst.getGpuMat(); - - cudaStream_t stream = StreamAccessor::getStream(_stream); - ::convert(src, dst, stream); -#endif -} - -void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& _stream) const -{ -#ifndef HAVE_CUDA - (void) _dst; - (void) rtype; - (void) alpha; - (void) beta; - (void) _stream; - throw_no_cuda(); -#else - if (rtype < 0) - rtype = type(); - else - rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); - - GpuMat src = *this; - - _dst.create(size(), rtype); - GpuMat dst = _dst.getGpuMat(); - - cudaStream_t stream = StreamAccessor::getStream(_stream); - ::convert(src, dst, alpha, beta, stream); -#endif -} - GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const { GpuMat hdr = *this; @@ -1124,3 +344,101 @@ GpuMat cv::gpu::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat) return mat = GpuMat(rows, cols, type); } + +#ifndef HAVE_CUDA + +void cv::gpu::GpuMat::create(int _rows, int _cols, int _type) +{ + (void) _rows; + (void) _cols; + (void) _type; + throw_no_cuda(); +} + +void cv::gpu::GpuMat::release() +{ +} + +void cv::gpu::GpuMat::upload(InputArray arr) +{ + (void) arr; + throw_no_cuda(); +} + +void cv::gpu::GpuMat::upload(InputArray arr, Stream& _stream) +{ + (void) arr; + (void) _stream; + throw_no_cuda(); +} + +void cv::gpu::GpuMat::download(OutputArray _dst) const +{ + (void) _dst; + throw_no_cuda(); +} + +void cv::gpu::GpuMat::download(OutputArray _dst, Stream& _stream) const +{ + (void) _dst; + (void) _stream; + throw_no_cuda(); +} + +void cv::gpu::GpuMat::copyTo(OutputArray _dst) const +{ + (void) _dst; + throw_no_cuda(); +} + +void cv::gpu::GpuMat::copyTo(OutputArray _dst, Stream& _stream) const +{ + (void) _dst; + (void) _stream; + throw_no_cuda(); +} + +void cv::gpu::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& _stream) const +{ + (void) _dst; + (void) _mask; + (void) _stream; + throw_no_cuda(); +} + +GpuMat& cv::gpu::GpuMat::setTo(Scalar s, Stream& _stream) +{ + (void) s; + (void) _stream; + throw_no_cuda(); + return *this; +} + +GpuMat& cv::gpu::GpuMat::setTo(Scalar s, InputArray _mask, Stream& _stream) +{ + (void) s; + (void) _mask; + (void) _stream; + throw_no_cuda(); + return *this; +} + +void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& _stream) const +{ + (void) _dst; + (void) rtype; + (void) _stream; + throw_no_cuda(); +} + +void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& _stream) const +{ + (void) _dst; + (void) rtype; + (void) alpha; + (void) beta; + (void) _stream; + throw_no_cuda(); +} + +#endif diff --git a/modules/cudev/include/opencv2/cudev/grid/copy.hpp b/modules/cudev/include/opencv2/cudev/grid/copy.hpp index cfbe456333..d7d3ea8343 100644 --- a/modules/cudev/include/opencv2/cudev/grid/copy.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/copy.hpp @@ -50,6 +50,7 @@ #include "../util/tuple.hpp" #include "../ptr2d/traits.hpp" #include "../ptr2d/gpumat.hpp" +#include "../ptr2d/glob.hpp" #include "../ptr2d/mask.hpp" #include "../ptr2d/zip.hpp" #include "detail/copy.hpp" @@ -69,6 +70,18 @@ __host__ void gridCopy_(const SrcPtr& src, GpuMat_& dst, const MaskPtr& grid_copy_detail::copy(shrinkPtr(src), shrinkPtr(dst), shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream)); } +template +__host__ void gridCopy_(const SrcPtr& src, const GlobPtrSz& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(dst) == rows && getCols(dst) == cols ); + CV_Assert( getRows(mask) == rows && getCols(mask) == cols ); + + grid_copy_detail::copy(shrinkPtr(src), shrinkPtr(dst), shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream)); +} + template __host__ void gridCopy_(const SrcPtr& src, GpuMat_& dst, Stream& stream = Stream::Null()) { @@ -80,6 +93,17 @@ __host__ void gridCopy_(const SrcPtr& src, GpuMat_& dst, Stream& stream grid_copy_detail::copy(shrinkPtr(src), shrinkPtr(dst), WithOutMask(), rows, cols, StreamAccessor::getStream(stream)); } +template +__host__ void gridCopy_(const SrcPtr& src, const GlobPtrSz& dst, Stream& stream = Stream::Null()) +{ + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(dst) == rows && getCols(dst) == cols ); + + grid_copy_detail::copy(shrinkPtr(src), shrinkPtr(dst), WithOutMask(), rows, cols, StreamAccessor::getStream(stream)); +} + template __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMat_& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) { @@ -100,6 +124,25 @@ __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMa StreamAccessor::getStream(stream)); } +template +__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz, GlobPtrSz >& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + CV_StaticAssert( tuple_size::value == 2, "" ); + + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols ); + CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols ); + CV_Assert( getRows(mask) == rows && getCols(mask) == cols ); + + grid_copy_detail::copy_tuple(shrinkPtr(src), + shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))), + shrinkPtr(mask), + rows, cols, + StreamAccessor::getStream(stream)); +} + template __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMat_& >& dst, Stream& stream = Stream::Null()) { @@ -118,6 +161,24 @@ __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMa StreamAccessor::getStream(stream)); } +template +__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz, GlobPtrSz >& dst, Stream& stream = Stream::Null()) +{ + CV_StaticAssert( tuple_size::value == 2, "" ); + + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols ); + CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols ); + + grid_copy_detail::copy_tuple(shrinkPtr(src), + shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))), + WithOutMask(), + rows, cols, + StreamAccessor::getStream(stream)); +} + template __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) { @@ -139,6 +200,26 @@ __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMa StreamAccessor::getStream(stream)); } +template +__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + CV_StaticAssert( tuple_size::value == 3, "" ); + + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols ); + CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols ); + CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols ); + CV_Assert( getRows(mask) == rows && getCols(mask) == cols ); + + grid_copy_detail::copy_tuple(shrinkPtr(src), + shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))), + shrinkPtr(mask), + rows, cols, + StreamAccessor::getStream(stream)); +} + template __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_& >& dst, Stream& stream = Stream::Null()) { @@ -158,6 +239,25 @@ __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMa StreamAccessor::getStream(stream)); } +template +__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, Stream& stream = Stream::Null()) +{ + CV_StaticAssert( tuple_size::value == 3, "" ); + + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols ); + CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols ); + CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols ); + + grid_copy_detail::copy_tuple(shrinkPtr(src), + shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))), + WithOutMask(), + rows, cols, + StreamAccessor::getStream(stream)); +} + template __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_&, GpuMat_& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) { @@ -180,6 +280,27 @@ __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMa StreamAccessor::getStream(stream)); } +template +__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + CV_StaticAssert( tuple_size::value == 4, "" ); + + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols ); + CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols ); + CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols ); + CV_Assert( getRows(get<3>(dst)) == rows && getCols(get<3>(dst)) == cols ); + CV_Assert( getRows(mask) == rows && getCols(mask) == cols ); + + grid_copy_detail::copy_tuple(shrinkPtr(src), + shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))), + shrinkPtr(mask), + rows, cols, + StreamAccessor::getStream(stream)); +} + template __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_&, GpuMat_& >& dst, Stream& stream = Stream::Null()) { @@ -200,6 +321,26 @@ __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMa StreamAccessor::getStream(stream)); } +template +__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, Stream& stream = Stream::Null()) +{ + CV_StaticAssert( tuple_size::value == 4, "" ); + + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols ); + CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols ); + CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols ); + CV_Assert( getRows(get<3>(dst)) == rows && getCols(get<3>(dst)) == cols ); + + grid_copy_detail::copy_tuple(shrinkPtr(src), + shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))), + WithOutMask(), + rows, cols, + StreamAccessor::getStream(stream)); +} + // Default Policy struct DefaultCopyPolicy @@ -216,48 +357,96 @@ __host__ void gridCopy(const SrcPtr& src, GpuMat_& dst, const MaskPtr& gridCopy_(src, dst, mask, stream); } +template +__host__ void gridCopy(const SrcPtr& src, const GlobPtrSz& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + gridCopy_(src, dst, mask, stream); +} + template __host__ void gridCopy(const SrcPtr& src, GpuMat_& dst, Stream& stream = Stream::Null()) { gridCopy_(src, dst, stream); } +template +__host__ void gridCopy(const SrcPtr& src, const GlobPtrSz& dst, Stream& stream = Stream::Null()) +{ + gridCopy_(src, dst, stream); +} + template __host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMat_& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) { gridCopy_(src, dst, mask, stream); } +template +__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GlobPtrSz, GlobPtrSz >& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + gridCopy_(src, dst, mask, stream); +} + template __host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMat_& >& dst, Stream& stream = Stream::Null()) { gridCopy_(src, dst, stream); } +template +__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GlobPtrSz, GlobPtrSz >& dst, Stream& stream = Stream::Null()) +{ + gridCopy_(src, dst, stream); +} + template __host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) { gridCopy_(src, dst, mask, stream); } +template +__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + gridCopy_(src, dst, mask, stream); +} + template __host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_& >& dst, Stream& stream = Stream::Null()) { gridCopy_(src, dst, stream); } +template +__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, Stream& stream = Stream::Null()) +{ + gridCopy_(src, dst, stream); +} + template __host__ void gridCopy(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_&, GpuMat_& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) { gridCopy_(src, dst, mask, stream); } +template +__host__ void gridCopy(const SrcPtrTuple& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + gridCopy_(src, dst, mask, stream); +} + template __host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_&, GpuMat_& >& dst, Stream& stream = Stream::Null()) { gridCopy_(src, dst, stream); } +template +__host__ void gridCopy_(const SrcPtrTuple& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, Stream& stream = Stream::Null()) +{ + gridCopy_(src, dst, stream); +} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/grid/transform.hpp b/modules/cudev/include/opencv2/cudev/grid/transform.hpp index 6c57758a6d..0da5e2d8a4 100644 --- a/modules/cudev/include/opencv2/cudev/grid/transform.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/transform.hpp @@ -50,6 +50,7 @@ #include "../util/tuple.hpp" #include "../ptr2d/traits.hpp" #include "../ptr2d/gpumat.hpp" +#include "../ptr2d/glob.hpp" #include "../ptr2d/mask.hpp" #include "../ptr2d/zip.hpp" #include "detail/transform.hpp" @@ -69,6 +70,18 @@ __host__ void gridTransform_(const SrcPtr& src, GpuMat_& dst, const UnO grid_transform_detail::transform(shrinkPtr(src), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream)); } +template +__host__ void gridTransform_(const SrcPtr& src, const GlobPtrSz& dst, const UnOp& op, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(dst) == rows && getCols(dst) == cols ); + CV_Assert( getRows(mask) == rows && getCols(mask) == cols ); + + grid_transform_detail::transform(shrinkPtr(src), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream)); +} + template __host__ void gridTransform_(const SrcPtr& src, GpuMat_& dst, const UnOp& op, Stream& stream = Stream::Null()) { @@ -80,6 +93,17 @@ __host__ void gridTransform_(const SrcPtr& src, GpuMat_& dst, const UnO grid_transform_detail::transform(shrinkPtr(src), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream)); } +template +__host__ void gridTransform_(const SrcPtr& src, const GlobPtrSz& dst, const UnOp& op, Stream& stream = Stream::Null()) +{ + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(dst) == rows && getCols(dst) == cols ); + + grid_transform_detail::transform(shrinkPtr(src), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream)); +} + template __host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_& dst, const BinOp& op, const MaskPtr& mask, Stream& stream = Stream::Null()) { @@ -94,6 +118,19 @@ __host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream)); } +template +__host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtrSz& dst, const BinOp& op, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + const int rows = getRows(src1); + const int cols = getCols(src1); + + CV_Assert( getRows(dst) == rows && getCols(dst) == cols ); + CV_Assert( getRows(src2) == rows && getCols(src2) == cols ); + CV_Assert( getRows(mask) == rows && getCols(mask) == cols ); + + grid_transform_detail::transform(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream)); +} + template __host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_& dst, const BinOp& op, Stream& stream = Stream::Null()) { @@ -107,6 +144,18 @@ __host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream)); } +template +__host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GlobPtrSz& dst, const BinOp& op, Stream& stream = Stream::Null()) +{ + const int rows = getRows(src1); + const int cols = getCols(src1); + + CV_Assert( getRows(dst) == rows && getCols(dst) == cols ); + CV_Assert( getRows(src2) == rows && getCols(src2) == cols ); + + grid_transform_detail::transform(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream)); +} + template __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_&, GpuMat_& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null()) { @@ -128,6 +177,26 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_&, GpuMa StreamAccessor::getStream(stream)); } +template +__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz, GlobPtrSz >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + CV_StaticAssert( tuple_size::value == 2, "" ); + + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols ); + CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols ); + CV_Assert( getRows(mask) == rows && getCols(mask) == cols ); + + grid_transform_detail::transform_tuple(shrinkPtr(src), + shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))), + op, + shrinkPtr(mask), + rows, cols, + StreamAccessor::getStream(stream)); +} + template __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_&, GpuMat_& >& dst, const OpTuple& op, Stream& stream = Stream::Null()) { @@ -147,6 +216,25 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_&, GpuMa StreamAccessor::getStream(stream)); } +template +__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz, GlobPtrSz >& dst, const OpTuple& op, Stream& stream = Stream::Null()) +{ + CV_StaticAssert( tuple_size::value == 2, "" ); + + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols ); + CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols ); + + grid_transform_detail::transform_tuple(shrinkPtr(src), + shrinkPtr(zipPtr(get<0>(dst), get<1>(dst))), + op, + WithOutMask(), + rows, cols, + StreamAccessor::getStream(stream)); +} + template __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null()) { @@ -169,6 +257,27 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_&, GpuMa StreamAccessor::getStream(stream)); } +template +__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + CV_StaticAssert( tuple_size::value == 3, "" ); + + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols ); + CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols ); + CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols ); + CV_Assert( getRows(mask) == rows && getCols(mask) == cols ); + + grid_transform_detail::transform_tuple(shrinkPtr(src), + shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))), + op, + shrinkPtr(mask), + rows, cols, + StreamAccessor::getStream(stream)); +} + template __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_& >& dst, const OpTuple& op, Stream& stream = Stream::Null()) { @@ -189,6 +298,26 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_&, GpuMa StreamAccessor::getStream(stream)); } +template +__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, const OpTuple& op, Stream& stream = Stream::Null()) +{ + CV_StaticAssert( tuple_size::value == 3, "" ); + + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols ); + CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols ); + CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols ); + + grid_transform_detail::transform_tuple(shrinkPtr(src), + shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst))), + op, + WithOutMask(), + rows, cols, + StreamAccessor::getStream(stream)); +} + template __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_&, GpuMat_& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null()) { @@ -212,6 +341,28 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_&, GpuMa StreamAccessor::getStream(stream)); } +template +__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + CV_StaticAssert( tuple_size::value == 4, "" ); + + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols ); + CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols ); + CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols ); + CV_Assert( getRows(get<3>(dst)) == rows && getCols(get<3>(dst)) == cols ); + CV_Assert( getRows(mask) == rows && getCols(mask) == cols ); + + grid_transform_detail::transform_tuple(shrinkPtr(src), + shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))), + op, + shrinkPtr(mask), + rows, cols, + StreamAccessor::getStream(stream)); +} + template __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_&, GpuMat_& >& dst, const OpTuple& op, Stream& stream = Stream::Null()) { @@ -233,6 +384,27 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_&, GpuMa StreamAccessor::getStream(stream)); } +template +__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, const OpTuple& op, Stream& stream = Stream::Null()) +{ + CV_StaticAssert( tuple_size::value == 4, "" ); + + const int rows = getRows(src); + const int cols = getCols(src); + + CV_Assert( getRows(get<0>(dst)) == rows && getCols(get<0>(dst)) == cols ); + CV_Assert( getRows(get<1>(dst)) == rows && getCols(get<1>(dst)) == cols ); + CV_Assert( getRows(get<2>(dst)) == rows && getCols(get<2>(dst)) == cols ); + CV_Assert( getRows(get<3>(dst)) == rows && getCols(get<3>(dst)) == cols ); + + grid_transform_detail::transform_tuple(shrinkPtr(src), + shrinkPtr(zipPtr(get<0>(dst), get<1>(dst), get<2>(dst), get<3>(dst))), + op, + WithOutMask(), + rows, cols, + StreamAccessor::getStream(stream)); +} + // Default Policy struct DefaultTransformPolicy @@ -250,60 +422,120 @@ __host__ void gridTransform(const SrcPtr& src, GpuMat_& dst, const Op& gridTransform_(src, dst, op, mask, stream); } +template +__host__ void gridTransform(const SrcPtr& src, const GlobPtrSz& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + gridTransform_(src, dst, op, mask, stream); +} + template __host__ void gridTransform(const SrcPtr& src, GpuMat_& dst, const Op& op, Stream& stream = Stream::Null()) { gridTransform_(src, dst, op, stream); } +template +__host__ void gridTransform(const SrcPtr& src, const GlobPtrSz& dst, const Op& op, Stream& stream = Stream::Null()) +{ + gridTransform_(src, dst, op, stream); +} + template __host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, GpuMat_& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null()) { gridTransform_(src1, src2, dst, op, mask, stream); } +template +__host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, const GlobPtrSz& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + gridTransform_(src1, src2, dst, op, mask, stream); +} + template __host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, GpuMat_& dst, const Op& op, Stream& stream = Stream::Null()) { gridTransform_(src1, src2, dst, op, stream); } +template +__host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, const GlobPtrSz& dst, const Op& op, Stream& stream = Stream::Null()) +{ + gridTransform_(src1, src2, dst, op, stream); +} + template __host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_&, GpuMat_& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null()) { gridTransform_(src, dst, op, mask, stream); } +template +__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz, GlobPtrSz >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + gridTransform_(src, dst, op, mask, stream); +} + template __host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_&, GpuMat_& >& dst, const OpTuple& op, Stream& stream = Stream::Null()) { gridTransform_(src, dst, op, stream); } +template +__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz, GlobPtrSz >& dst, const OpTuple& op, Stream& stream = Stream::Null()) +{ + gridTransform_(src, dst, op, stream); +} + template __host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null()) { gridTransform_(src, dst, op, mask, stream); } +template +__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + gridTransform_(src, dst, op, mask, stream); +} + template __host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_& >& dst, const OpTuple& op, Stream& stream = Stream::Null()) { gridTransform_(src, dst, op, stream); } +template +__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, const OpTuple& op, Stream& stream = Stream::Null()) +{ + gridTransform_(src, dst, op, stream); +} + template __host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_&, GpuMat_& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null()) { gridTransform_(src, dst, op, mask, stream); } +template +__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null()) +{ + gridTransform_(src, dst, op, mask, stream); +} + template __host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_&, GpuMat_&, GpuMat_&, GpuMat_& >& dst, const OpTuple& op, Stream& stream = Stream::Null()) { gridTransform_(src, dst, op, stream); } +template +__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz, GlobPtrSz, GlobPtrSz, GlobPtrSz >& dst, const OpTuple& op, Stream& stream = Stream::Null()) +{ + gridTransform_(src, dst, op, stream); +} + }} #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/detail/gpumat.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/detail/gpumat.hpp index 2c7cf7e146..e378c52372 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/detail/gpumat.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/detail/gpumat.hpp @@ -335,4 +335,27 @@ __host__ GpuMat_& GpuMat_::assign(const Expr& expr, Stream& stream) }} +// Input / Output Arrays + +namespace cv { + +template +__host__ _InputArray::_InputArray(const cudev::GpuMat_<_Tp>& m) + : flags(FIXED_TYPE + GPU_MAT + DataType<_Tp>::type), obj((void*)&m) +{} + +template +__host__ _OutputArray::_OutputArray(cudev::GpuMat_<_Tp>& m) + : _InputArray(m) +{} + +template +__host__ _OutputArray::_OutputArray(const cudev::GpuMat_<_Tp>& m) + : _InputArray(m) +{ + flags |= FIXED_SIZE; +} + +} + #endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp index c64cafbb12..7304a8c7f5 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp @@ -91,6 +91,17 @@ __host__ GlobPtrSz globPtr(T* data, size_t step, int rows, int cols) return p; } +template +__host__ GlobPtrSz globPtr(const GpuMat& mat) +{ + GlobPtrSz p; + p.data = (T*) mat.data; + p.step = mat.step; + p.rows = mat.rows; + p.cols = mat.cols; + return p; +} + template struct PtrTraits< GlobPtrSz > : PtrTraitsBase, GlobPtr > { }; diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp index 5a852c9234..14917acc33 100644 --- a/modules/gpufilters/src/filtering.cpp +++ b/modules/gpufilters/src/filtering.cpp @@ -230,22 +230,22 @@ namespace switch (srcType) { case CV_8UC1: - func_ = cudev::filter2D; + func_ = cv::gpu::cudev::filter2D; break; case CV_8UC4: - func_ = cudev::filter2D; + func_ = cv::gpu::cudev::filter2D; break; case CV_16UC1: - func_ = cudev::filter2D; + func_ = cv::gpu::cudev::filter2D; break; case CV_16UC4: - func_ = cudev::filter2D; + func_ = cv::gpu::cudev::filter2D; break; case CV_32FC1: - func_ = cudev::filter2D; + func_ = cv::gpu::cudev::filter2D; break; case CV_32FC4: - func_ = cudev::filter2D; + func_ = cv::gpu::cudev::filter2D; break; } } diff --git a/modules/gpuimgproc/src/color.cpp b/modules/gpuimgproc/src/color.cpp index 006274742e..3d714b6287 100644 --- a/modules/gpuimgproc/src/color.cpp +++ b/modules/gpuimgproc/src/color.cpp @@ -187,7 +187,7 @@ namespace _dst.create(src.size(), CV_8UC2); GpuMat dst = _dst.getGpuMat(); - cudev::bgr_to_bgr555(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgr_to_bgr555(src, dst, StreamAccessor::getStream(stream)); } void bgr_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -200,7 +200,7 @@ namespace _dst.create(src.size(), CV_8UC2); GpuMat dst = _dst.getGpuMat(); - cudev::bgr_to_bgr565(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgr_to_bgr565(src, dst, StreamAccessor::getStream(stream)); } void rgb_to_bgr555(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -213,7 +213,7 @@ namespace _dst.create(src.size(), CV_8UC2); GpuMat dst = _dst.getGpuMat(); - cudev::rgb_to_bgr555(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::rgb_to_bgr555(src, dst, StreamAccessor::getStream(stream)); } void rgb_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -226,7 +226,7 @@ namespace _dst.create(src.size(), CV_8UC2); GpuMat dst = _dst.getGpuMat(); - cudev::rgb_to_bgr565(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::rgb_to_bgr565(src, dst, StreamAccessor::getStream(stream)); } void bgra_to_bgr555(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -239,7 +239,7 @@ namespace _dst.create(src.size(), CV_8UC2); GpuMat dst = _dst.getGpuMat(); - cudev::bgra_to_bgr555(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgra_to_bgr555(src, dst, StreamAccessor::getStream(stream)); } void bgra_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -252,7 +252,7 @@ namespace _dst.create(src.size(), CV_8UC2); GpuMat dst = _dst.getGpuMat(); - cudev::bgra_to_bgr565(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgra_to_bgr565(src, dst, StreamAccessor::getStream(stream)); } void rgba_to_bgr555(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -265,7 +265,7 @@ namespace _dst.create(src.size(), CV_8UC2); GpuMat dst = _dst.getGpuMat(); - cudev::rgba_to_bgr555(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::rgba_to_bgr555(src, dst, StreamAccessor::getStream(stream)); } void rgba_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -278,7 +278,7 @@ namespace _dst.create(src.size(), CV_8UC2); GpuMat dst = _dst.getGpuMat(); - cudev::rgba_to_bgr565(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::rgba_to_bgr565(src, dst, StreamAccessor::getStream(stream)); } void bgr555_to_rgb(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -291,7 +291,7 @@ namespace _dst.create(src.size(), CV_8UC3); GpuMat dst = _dst.getGpuMat(); - cudev::bgr555_to_rgb(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgr555_to_rgb(src, dst, StreamAccessor::getStream(stream)); } void bgr565_to_rgb(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -304,7 +304,7 @@ namespace _dst.create(src.size(), CV_8UC3); GpuMat dst = _dst.getGpuMat(); - cudev::bgr565_to_rgb(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgr565_to_rgb(src, dst, StreamAccessor::getStream(stream)); } void bgr555_to_bgr(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -317,7 +317,7 @@ namespace _dst.create(src.size(), CV_8UC3); GpuMat dst = _dst.getGpuMat(); - cudev::bgr555_to_bgr(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgr555_to_bgr(src, dst, StreamAccessor::getStream(stream)); } void bgr565_to_bgr(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -330,7 +330,7 @@ namespace _dst.create(src.size(), CV_8UC3); GpuMat dst = _dst.getGpuMat(); - cudev::bgr565_to_bgr(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgr565_to_bgr(src, dst, StreamAccessor::getStream(stream)); } void bgr555_to_rgba(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -343,7 +343,7 @@ namespace _dst.create(src.size(), CV_8UC4); GpuMat dst = _dst.getGpuMat(); - cudev::bgr555_to_rgba(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgr555_to_rgba(src, dst, StreamAccessor::getStream(stream)); } void bgr565_to_rgba(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -356,7 +356,7 @@ namespace _dst.create(src.size(), CV_8UC4); GpuMat dst = _dst.getGpuMat(); - cudev::bgr565_to_rgba(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgr565_to_rgba(src, dst, StreamAccessor::getStream(stream)); } void bgr555_to_bgra(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -369,7 +369,7 @@ namespace _dst.create(src.size(), CV_8UC4); GpuMat dst = _dst.getGpuMat(); - cudev::bgr555_to_bgra(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgr555_to_bgra(src, dst, StreamAccessor::getStream(stream)); } void bgr565_to_bgra(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -382,7 +382,7 @@ namespace _dst.create(src.size(), CV_8UC4); GpuMat dst = _dst.getGpuMat(); - cudev::bgr565_to_bgra(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgr565_to_bgra(src, dst, StreamAccessor::getStream(stream)); } void gray_to_bgr(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -427,7 +427,7 @@ namespace _dst.create(src.size(), CV_8UC2); GpuMat dst = _dst.getGpuMat(); - cudev::gray_to_bgr555(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::gray_to_bgr555(src, dst, StreamAccessor::getStream(stream)); } void gray_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -440,7 +440,7 @@ namespace _dst.create(src.size(), CV_8UC2); GpuMat dst = _dst.getGpuMat(); - cudev::gray_to_bgr565(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::gray_to_bgr565(src, dst, StreamAccessor::getStream(stream)); } void bgr555_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -453,7 +453,7 @@ namespace _dst.create(src.size(), CV_8UC1); GpuMat dst = _dst.getGpuMat(); - cudev::bgr555_to_gray(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgr555_to_gray(src, dst, StreamAccessor::getStream(stream)); } void bgr565_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -466,7 +466,7 @@ namespace _dst.create(src.size(), CV_8UC1); GpuMat dst = _dst.getGpuMat(); - cudev::bgr565_to_gray(src, dst, StreamAccessor::getStream(stream)); + cv::gpu::cudev::bgr565_to_gray(src, dst, StreamAccessor::getStream(stream)); } void rgb_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream) @@ -2145,9 +2145,9 @@ void cv::gpu::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1); if (dcn == 3) - cudev::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + cv::gpu::cudev::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); else - cudev::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + cv::gpu::cudev::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); break; } @@ -2172,7 +2172,7 @@ void cv::gpu::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1, code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1); - cudev::MHCdemosaic<1>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + cv::gpu::cudev::MHCdemosaic<1>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); break; } diff --git a/modules/gpuwarping/src/pyramids.cpp b/modules/gpuwarping/src/pyramids.cpp index 577ed85677..0e8445df2c 100644 --- a/modules/gpuwarping/src/pyramids.cpp +++ b/modules/gpuwarping/src/pyramids.cpp @@ -181,7 +181,7 @@ namespace const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1]; - cudev::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream)); + cv::gpu::cudev::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream)); szLastLayer = szCurLayer; } @@ -222,7 +222,7 @@ namespace lastLayer = curLayer; } - cudev::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream)); + cv::gpu::cudev::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream)); } }