diff --git a/modules/core/include/opencv2/core/cuda/emulation.hpp b/modules/core/include/opencv2/core/cuda/emulation.hpp index 3df26468b2..b484f2378e 100644 --- a/modules/core/include/opencv2/core/cuda/emulation.hpp +++ b/modules/core/include/opencv2/core/cuda/emulation.hpp @@ -43,6 +43,7 @@ #ifndef OPENCV_GPU_EMULATION_HPP_ #define OPENCV_GPU_EMULATION_HPP_ +#include "common.hpp" #include "warp_reduce.hpp" namespace cv { namespace gpu { namespace cudev @@ -131,8 +132,130 @@ namespace cv { namespace gpu { namespace cudev return ::atomicMin(address, val); #endif } + }; // struct cmem + + struct glob + { + static __device__ __forceinline__ int atomicAdd(int* address, int val) + { + return ::atomicAdd(address, val); + } + static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val) + { + return ::atomicAdd(address, val); + } + static __device__ __forceinline__ float atomicAdd(float* address, float val) + { + #if __CUDA_ARCH__ >= 200 + return ::atomicAdd(address, val); + #else + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(val + __int_as_float(assumed))); + } while (assumed != old); + return __int_as_float(old); + #endif + } + static __device__ __forceinline__ double atomicAdd(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); + #else + (void) address; + (void) val; + return 0.0; + #endif + } + + static __device__ __forceinline__ int atomicMin(int* address, int val) + { + return ::atomicMin(address, val); + } + static __device__ __forceinline__ float atomicMin(float* address, float val) + { + #if __CUDA_ARCH__ >= 120 + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(::fminf(val, __int_as_float(assumed)))); + } while (assumed != old); + return __int_as_float(old); + #else + (void) address; + (void) val; + return 0.0f; + #endif + } + static __device__ __forceinline__ double atomicMin(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(::fmin(val, __longlong_as_double(assumed)))); + } while (assumed != old); + return __longlong_as_double(old); + #else + (void) address; + (void) val; + return 0.0; + #endif + } + + static __device__ __forceinline__ int atomicMax(int* address, int val) + { + return ::atomicMax(address, val); + } + static __device__ __forceinline__ float atomicMax(float* address, float val) + { + #if __CUDA_ARCH__ >= 120 + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(::fmaxf(val, __int_as_float(assumed)))); + } while (assumed != old); + return __int_as_float(old); + #else + (void) address; + (void) val; + return 0.0f; + #endif + } + static __device__ __forceinline__ double atomicMax(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(::fmax(val, __longlong_as_double(assumed)))); + } while (assumed != old); + return __longlong_as_double(old); + #else + (void) address; + (void) val; + return 0.0; + #endif + } }; - }; + }; //struct Emulation }}} // namespace cv { namespace gpu { namespace cudev #endif /* OPENCV_GPU_EMULATION_HPP_ */ diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index 6f2f1145e8..2f884b3f9e 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -3,7 +3,7 @@ if(ANDROID OR IOS) endif() set(the_description "GPU-accelerated Computer Vision") -ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy) +ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm) ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda") @@ -58,10 +58,6 @@ if(HAVE_CUDA) CUDA_ADD_CUFFT_TO_TARGET(${the_module}) endif() - if(HAVE_CUBLAS) - CUDA_ADD_CUBLAS_TO_TARGET(${the_module}) - endif() - install(FILES src/nvidia/NPP_staging/NPP_staging.hpp src/nvidia/core/NCV.hpp DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2/${name} COMPONENT main) diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst index b21e2abac8..f17ed7079c 100644 --- a/modules/gpu/doc/gpu.rst +++ b/modules/gpu/doc/gpu.rst @@ -8,10 +8,7 @@ gpu. GPU-accelerated Computer Vision introduction initalization_and_information data_structures - operations_on_matrices - per_element_operations image_processing - matrix_reductions object_detection feature_detection_and_description image_filtering diff --git a/modules/gpu/doc/image_processing.rst b/modules/gpu/doc/image_processing.rst index 7b404c832a..69e5003743 100644 --- a/modules/gpu/doc/image_processing.rst +++ b/modules/gpu/doc/image_processing.rst @@ -414,28 +414,6 @@ The methods support arbitrary permutations of the original channels, including r -gpu::threshold ------------------- -Applies a fixed-level threshold to each array element. - -.. ocv:function:: double gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()) - - :param src: Source array (single-channel). - - :param dst: Destination array with the same size and type as ``src`` . - - :param thresh: Threshold value. - - :param maxval: Maximum value to use with ``THRESH_BINARY`` and ``THRESH_BINARY_INV`` threshold types. - - :param type: Threshold type. For details, see :ocv:func:`threshold` . The ``THRESH_OTSU`` threshold type is not supported. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`threshold` - - - gpu::resize --------------- Resizes an image. diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp index 0b13fc01d0..cfad81738b 100644 --- a/modules/gpu/include/opencv2/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu.hpp @@ -50,6 +50,7 @@ #endif #include "opencv2/core/gpumat.hpp" +#include "opencv2/gpuarithm.hpp" #include "opencv2/imgproc.hpp" #include "opencv2/objdetect.hpp" #include "opencv2/features2d.hpp" @@ -269,182 +270,8 @@ CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& //! supports only ksize = 1 and ksize = 3 CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()); +////////////////////////////// Image processing ////////////////////////////// -////////////////////////////// Arithmetics /////////////////////////////////// - -//! implements generalized matrix product algorithm GEMM from BLAS -CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, - const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); - -//! transposes the matrix -//! supports matrix with element size = 1, 4 and 8 bytes (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc) -CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst, Stream& stream = Stream::Null()); - -//! reverses the order of the rows, columns or both in a matrix -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or CV_32F depth -CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode, Stream& stream = Stream::Null()); - -//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i)) -//! destination array will have the depth type as lut and the same channels number as source -//! supports CV_8UC1, CV_8UC3 types -CV_EXPORTS void LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null()); - -//! makes multi-channel array out of several single-channel arrays -CV_EXPORTS void merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null()); - -//! makes multi-channel array out of several single-channel arrays -CV_EXPORTS void merge(const std::vector& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! copies each plane of a multi-channel array to a dedicated array -CV_EXPORTS void split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null()); - -//! copies each plane of a multi-channel array to a dedicated array -CV_EXPORTS void split(const GpuMat& src, std::vector& dst, Stream& stream = Stream::Null()); - -//! computes magnitude of complex (x(i).re, x(i).im) vector -//! supports only CV_32FC2 type -CV_EXPORTS void magnitude(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null()); - -//! computes squared magnitude of complex (x(i).re, x(i).im) vector -//! supports only CV_32FC2 type -CV_EXPORTS void magnitudeSqr(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null()); - -//! computes magnitude of each (x(i), y(i)) vector -//! supports only floating-point source -CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null()); - -//! computes squared magnitude of each (x(i), y(i)) vector -//! supports only floating-point source -CV_EXPORTS void magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null()); - -//! computes angle (angle(i)) of each (x(i), y(i)) vector -//! supports only floating-point source -CV_EXPORTS void phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); - -//! converts Cartesian coordinates to polar -//! supports only floating-point source -CV_EXPORTS void cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); - -//! converts polar coordinates to Cartesian -//! supports only floating-point source -CV_EXPORTS void polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees = false, Stream& stream = Stream::Null()); - -//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values -CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, - int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat()); -CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double a, double b, - int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf); - - -//////////////////////////// Per-element operations //////////////////////////////////// - -//! adds one matrix to another (c = a + b) -CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); -//! adds scalar to a matrix (c = a + s) -CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); - -//! subtracts one matrix from another (c = a - b) -CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); -//! subtracts scalar from a matrix (c = a - s) -CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); - -//! computes element-wise weighted product of the two arrays (c = scale * a * b) -CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! weighted multiplies matrix to a scalar (c = scale * a * s) -CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); - -//! computes element-wise weighted quotient of the two arrays (c = a / b) -CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! computes element-wise weighted quotient of matrix and scalar (c = a / s) -CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! computes element-wise weighted reciprocal of an array (dst = scale/src2) -CV_EXPORTS void divide(double scale, const GpuMat& b, GpuMat& c, int dtype = -1, Stream& stream = Stream::Null()); - -//! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) -CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, - int dtype = -1, Stream& stream = Stream::Null()); - -//! adds scaled array to another one (dst = alpha*src1 + src2) -static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()) -{ - addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); -} - -//! computes element-wise absolute difference of two arrays (c = abs(a - b)) -CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null()); -//! computes element-wise absolute difference of array and scalar (c = abs(a - s)) -CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c, Stream& stream = Stream::Null()); - -//! computes absolute value of each matrix element -//! supports CV_16S and CV_32F depth -CV_EXPORTS void abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes square of each pixel in an image -//! supports CV_8U, CV_16U, CV_16S and CV_32F depth -CV_EXPORTS void sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes square root of each pixel in an image -//! supports CV_8U, CV_16U, CV_16S and CV_32F depth -CV_EXPORTS void sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes exponent of each matrix element (b = e**a) -//! supports CV_8U, CV_16U, CV_16S and CV_32F depth -CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null()); - -//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a)) -//! supports CV_8U, CV_16U, CV_16S and CV_32F depth -CV_EXPORTS void log(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null()); - -//! computes power of each matrix element: -// (dst(i,j) = pow( src(i,j) , power), if src.type() is integer -// (dst(i,j) = pow(fabs(src(i,j)), power), otherwise -//! supports all, except depth == CV_64F -CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null()); - -//! compares elements of two arrays (c = a b) -CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); -CV_EXPORTS void compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); - -//! performs per-elements bit-wise inversion -CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); - -//! calculates per-element bit-wise disjunction of two arrays -CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise disjunction of array and scalar -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! calculates per-element bit-wise conjunction of two arrays -CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise conjunction of array and scalar -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! calculates per-element bit-wise "exclusive or" operation -CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise "exclusive or" of array and scalar -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! pixel by pixel right shift of an image by a constant value -//! supports 1, 3 and 4 channels images with integers elements -CV_EXPORTS void rshift(const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! pixel by pixel left shift of an image by a constant value -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void lshift(const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element minimum of two arrays (dst = min(src1, src2)) -CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element minimum of array and scalar (dst = min(src1, src2)) -CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element maximum of two arrays (dst = max(src1, src2)) -CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element maximum of array and scalar (dst = max(src1, src2)) -CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()); enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL, ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL}; @@ -453,9 +280,6 @@ enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA //! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null()); - -////////////////////////////// Image processing ////////////////////////////// - //! DST[x,y] = SRC[xmap[x,y],ymap[x,y]] //! supports only CV_32FC1 map type CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, @@ -521,9 +345,6 @@ CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& strea //! Routines for correcting image color gamma CV_EXPORTS void gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward = true, Stream& stream = Stream::Null()); -//! applies fixed threshold to the image -CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()); - //! resizes the image //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null()); @@ -794,62 +615,6 @@ private: CannyBuf cannyBuf_; }; -////////////////////////////// Matrix reductions ////////////////////////////// - -//! computes mean value and standard deviation of all or selected array elements -//! supports only CV_8UC1 type -CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev); -//! buffered version -CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf); - -//! computes norm of array -//! supports NORM_INF, NORM_L1, NORM_L2 -//! supports all matrices except 64F -CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2); -CV_EXPORTS double norm(const GpuMat& src1, int normType, GpuMat& buf); -CV_EXPORTS double norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf); - -//! computes norm of the difference between two arrays -//! supports NORM_INF, NORM_L1, NORM_L2 -//! supports only CV_8UC1 type -CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2); - -//! computes sum of array elements -//! supports only single channel images -CV_EXPORTS Scalar sum(const GpuMat& src); -CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf); -CV_EXPORTS Scalar sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); - -//! computes sum of array elements absolute values -//! supports only single channel images -CV_EXPORTS Scalar absSum(const GpuMat& src); -CV_EXPORTS Scalar absSum(const GpuMat& src, GpuMat& buf); -CV_EXPORTS Scalar absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); - -//! computes squared sum of array elements -//! supports only single channel images -CV_EXPORTS Scalar sqrSum(const GpuMat& src); -CV_EXPORTS Scalar sqrSum(const GpuMat& src, GpuMat& buf); -CV_EXPORTS Scalar sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); - -//! finds global minimum and maximum array elements and returns their values -CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat()); -CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf); - -//! finds global minimum and maximum array elements and returns their values with locations -CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, - const GpuMat& mask=GpuMat()); -CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, - const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf); - -//! counts non-zero array elements -CV_EXPORTS int countNonZero(const GpuMat& src); -CV_EXPORTS int countNonZero(const GpuMat& src, GpuMat& buf); - -//! reduces a matrix to a vector -CV_EXPORTS void reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null()); - - ///////////////////////////// Calibration 3D ////////////////////////////////// CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu deleted file mode 100644 index 095d8bac06..0000000000 --- a/modules/gpu/src/cuda/element_operations.cu +++ /dev/null @@ -1,2636 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/functional.hpp" -#include "opencv2/core/cuda/vec_math.hpp" -#include "opencv2/core/cuda/transform.hpp" -#include "opencv2/core/cuda/limits.hpp" -#include "opencv2/core/cuda/saturate_cast.hpp" -#include "opencv2/core/cuda/simd_functions.hpp" - -using namespace cv::gpu; -using namespace cv::gpu::cudev; - -namespace arithm -{ - template struct ArithmFuncTraits - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 1 }; - }; - - template <> struct ArithmFuncTraits<1, 1> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template <> struct ArithmFuncTraits<1, 2> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template <> struct ArithmFuncTraits<1, 4> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - template <> struct ArithmFuncTraits<2, 1> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template <> struct ArithmFuncTraits<2, 2> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template <> struct ArithmFuncTraits<2, 4> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - template <> struct ArithmFuncTraits<4, 1> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template <> struct ArithmFuncTraits<4, 2> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - template <> struct ArithmFuncTraits<4, 4> - { - enum { simple_block_dim_x = 32 }; - enum { simple_block_dim_y = 8 }; - - enum { smart_block_dim_x = 32 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; -} - -////////////////////////////////////////////////////////////////////////// -// addMat - -namespace arithm -{ - struct VAdd4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vadd4(a, b); - } - - __device__ __forceinline__ VAdd4() {} - __device__ __forceinline__ VAdd4(const VAdd4& other) {} - }; - - //////////////////////////////////// - - struct VAdd2 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vadd2(a, b); - } - - __device__ __forceinline__ VAdd2() {} - __device__ __forceinline__ VAdd2(const VAdd2& other) {} - }; - - //////////////////////////////////// - - template struct AddMat : binary_function - { - __device__ __forceinline__ D operator ()(T a, T b) const - { - return saturate_cast(a + b); - } - - __device__ __forceinline__ AddMat() {} - __device__ __forceinline__ AddMat(const AddMat& other) {} - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template <> struct TransformFunctorTraits< arithm::VAdd2 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template struct TransformFunctorTraits< arithm::AddMat > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void addMat_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VAdd4(), WithOutMask(), stream); - } - - void addMat_v2(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VAdd2(), WithOutMask(), stream); - } - - template - void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) - { - if (mask.data) - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, AddMat(), mask, stream); - else - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, AddMat(), WithOutMask(), stream); - } - - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// addScalar - -namespace arithm -{ - template struct AddScalar : unary_function - { - S val; - - explicit AddScalar(S val_) : val(val_) {} - - __device__ __forceinline__ D operator ()(T a) const - { - return saturate_cast(a + val); - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::AddScalar > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) - { - AddScalar op(static_cast(val)); - - if (mask.data) - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, mask, stream); - else - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// subMat - -namespace arithm -{ - struct VSub4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vsub4(a, b); - } - - __device__ __forceinline__ VSub4() {} - __device__ __forceinline__ VSub4(const VSub4& other) {} - }; - - //////////////////////////////////// - - struct VSub2 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vsub2(a, b); - } - - __device__ __forceinline__ VSub2() {} - __device__ __forceinline__ VSub2(const VSub2& other) {} - }; - - //////////////////////////////////// - - template struct SubMat : binary_function - { - __device__ __forceinline__ D operator ()(T a, T b) const - { - return saturate_cast(a - b); - } - - __device__ __forceinline__ SubMat() {} - __device__ __forceinline__ SubMat(const SubMat& other) {} - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template <> struct TransformFunctorTraits< arithm::VSub2 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template struct TransformFunctorTraits< arithm::SubMat > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void subMat_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VSub4(), WithOutMask(), stream); - } - - void subMat_v2(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VSub2(), WithOutMask(), stream); - } - - template - void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) - { - if (mask.data) - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, SubMat(), mask, stream); - else - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, SubMat(), WithOutMask(), stream); - } - - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// subScalar - -namespace arithm -{ - template - void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) - { - AddScalar op(-static_cast(val)); - - if (mask.data) - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, mask, stream); - else - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// mulMat - -namespace arithm -{ - struct Mul_8uc4_32f : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, float b) const - { - uint res = 0; - - res |= (saturate_cast((0xffu & (a )) * b) ); - res |= (saturate_cast((0xffu & (a >> 8)) * b) << 8); - res |= (saturate_cast((0xffu & (a >> 16)) * b) << 16); - res |= (saturate_cast((0xffu & (a >> 24)) * b) << 24); - - return res; - } - - __device__ __forceinline__ Mul_8uc4_32f() {} - __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {} - }; - - struct Mul_16sc4_32f : binary_function - { - __device__ __forceinline__ short4 operator ()(short4 a, float b) const - { - return make_short4(saturate_cast(a.x * b), saturate_cast(a.y * b), - saturate_cast(a.z * b), saturate_cast(a.w * b)); - } - - __device__ __forceinline__ Mul_16sc4_32f() {} - __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {} - }; - - template struct Mul : binary_function - { - __device__ __forceinline__ D operator ()(T a, T b) const - { - return saturate_cast(a * b); - } - - __device__ __forceinline__ Mul() {} - __device__ __forceinline__ Mul(const Mul& other) {} - }; - - template struct MulScale : binary_function - { - S scale; - - explicit MulScale(S scale_) : scale(scale_) {} - - __device__ __forceinline__ D operator ()(T a, T b) const - { - return saturate_cast(scale * a * b); - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits : arithm::ArithmFuncTraits - { - }; - - template struct TransformFunctorTraits< arithm::Mul > : arithm::ArithmFuncTraits - { - }; - - template struct TransformFunctorTraits< arithm::MulScale > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void mulMat_8uc4_32f(PtrStepSz src1, PtrStepSzf src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, Mul_8uc4_32f(), WithOutMask(), stream); - } - - void mulMat_16sc4_32f(PtrStepSz src1, PtrStepSzf src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, Mul_16sc4_32f(), WithOutMask(), stream); - } - - template - void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream) - { - if (scale == 1) - { - Mul op; - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, op, WithOutMask(), stream); - } - else - { - MulScale op(static_cast(scale)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, op, WithOutMask(), stream); - } - } - - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// mulScalar - -namespace arithm -{ - template struct MulScalar : unary_function - { - S val; - - explicit MulScalar(S val_) : val(val_) {} - - __device__ __forceinline__ D operator ()(T a) const - { - return saturate_cast(a * val); - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::MulScalar > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) - { - MulScalar op(static_cast(val)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// divMat - -namespace arithm -{ - struct Div_8uc4_32f : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, float b) const - { - uint res = 0; - - if (b != 0) - { - b = 1.0f / b; - res |= (saturate_cast((0xffu & (a )) * b) ); - res |= (saturate_cast((0xffu & (a >> 8)) * b) << 8); - res |= (saturate_cast((0xffu & (a >> 16)) * b) << 16); - res |= (saturate_cast((0xffu & (a >> 24)) * b) << 24); - } - - return res; - } - }; - - struct Div_16sc4_32f : binary_function - { - __device__ __forceinline__ short4 operator ()(short4 a, float b) const - { - return b != 0 ? make_short4(saturate_cast(a.x / b), saturate_cast(a.y / b), - saturate_cast(a.z / b), saturate_cast(a.w / b)) - : make_short4(0,0,0,0); - } - }; - - template struct Div : binary_function - { - __device__ __forceinline__ D operator ()(T a, T b) const - { - return b != 0 ? saturate_cast(a / b) : 0; - } - - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} - }; - template struct Div : binary_function - { - __device__ __forceinline__ float operator ()(T a, T b) const - { - return b != 0 ? static_cast(a) / b : 0; - } - - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} - }; - template struct Div : binary_function - { - __device__ __forceinline__ double operator ()(T a, T b) const - { - return b != 0 ? static_cast(a) / b : 0; - } - - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} - }; - - template struct DivScale : binary_function - { - S scale; - - explicit DivScale(S scale_) : scale(scale_) {} - - __device__ __forceinline__ D operator ()(T a, T b) const - { - return b != 0 ? saturate_cast(scale * a / b) : 0; - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits : arithm::ArithmFuncTraits - { - }; - - template struct TransformFunctorTraits< arithm::Div > : arithm::ArithmFuncTraits - { - }; - - template struct TransformFunctorTraits< arithm::DivScale > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void divMat_8uc4_32f(PtrStepSz src1, PtrStepSzf src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream); - } - - void divMat_16sc4_32f(PtrStepSz src1, PtrStepSzf src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream); - } - - template - void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream) - { - if (scale == 1) - { - Div op; - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, op, WithOutMask(), stream); - } - else - { - DivScale op(static_cast(scale)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, op, WithOutMask(), stream); - } - } - - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - //template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - template void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// divScalar - -namespace arithm -{ - template - void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) - { - MulScalar op(static_cast(1.0 / val)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// divInv - -namespace arithm -{ - template struct DivInv : unary_function - { - S val; - - explicit DivInv(S val_) : val(val_) {} - - __device__ __forceinline__ D operator ()(T a) const - { - return a != 0 ? saturate_cast(val / a) : 0; - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::DivInv > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) - { - DivInv op(static_cast(val)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// absDiffMat - -namespace arithm -{ - struct VAbsDiff4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vabsdiff4(a, b); - } - - __device__ __forceinline__ VAbsDiff4() {} - __device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {} - }; - - //////////////////////////////////// - - struct VAbsDiff2 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vabsdiff2(a, b); - } - - __device__ __forceinline__ VAbsDiff2() {} - __device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {} - }; - - //////////////////////////////////// - - __device__ __forceinline__ int _abs(int a) - { - return ::abs(a); - } - __device__ __forceinline__ float _abs(float a) - { - return ::fabsf(a); - } - __device__ __forceinline__ double _abs(double a) - { - return ::fabs(a); - } - - template struct AbsDiffMat : binary_function - { - __device__ __forceinline__ T operator ()(T a, T b) const - { - return saturate_cast(_abs(a - b)); - } - - __device__ __forceinline__ AbsDiffMat() {} - __device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {} - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template <> struct TransformFunctorTraits< arithm::VAbsDiff2 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template struct TransformFunctorTraits< arithm::AbsDiffMat > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void absDiffMat_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VAbsDiff4(), WithOutMask(), stream); - } - - void absDiffMat_v2(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VAbsDiff2(), WithOutMask(), stream); - } - - template - void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) - { - cudev::transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, AbsDiffMat(), WithOutMask(), stream); - } - - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// absDiffScalar - -namespace arithm -{ - template struct AbsDiffScalar : unary_function - { - S val; - - explicit AbsDiffScalar(S val_) : val(val_) {} - - __device__ __forceinline__ T operator ()(T a) const - { - abs_func f; - return saturate_cast(f(a - val)); - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::AbsDiffScalar > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void absDiffScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) - { - AbsDiffScalar op(static_cast(val)); - - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - template void absDiffScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// absMat - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< abs_func > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - cudev::transform((PtrStepSz) src, (PtrStepSz) dst, abs_func(), WithOutMask(), stream); - } - - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// sqrMat - -namespace arithm -{ - template struct Sqr : unary_function - { - __device__ __forceinline__ T operator ()(T x) const - { - return saturate_cast(x * x); - } - - __device__ __forceinline__ Sqr() {} - __device__ __forceinline__ Sqr(const Sqr& other) {} - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::Sqr > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - cudev::transform((PtrStepSz) src, (PtrStepSz) dst, Sqr(), WithOutMask(), stream); - } - - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// sqrtMat - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< sqrt_func > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - cudev::transform((PtrStepSz) src, (PtrStepSz) dst, sqrt_func(), WithOutMask(), stream); - } - - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// logMat - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< log_func > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - cudev::transform((PtrStepSz) src, (PtrStepSz) dst, log_func(), WithOutMask(), stream); - } - - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////// -// expMat - -namespace arithm -{ - template struct Exp : unary_function - { - __device__ __forceinline__ T operator ()(T x) const - { - exp_func f; - return saturate_cast(f(x)); - } - - __device__ __forceinline__ Exp() {} - __device__ __forceinline__ Exp(const Exp& other) {} - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::Exp > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - cudev::transform((PtrStepSz) src, (PtrStepSz) dst, Exp(), WithOutMask(), stream); - } - - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -////////////////////////////////////////////////////////////////////////////////////// -// cmpMat - -namespace arithm -{ - struct VCmpEq4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vcmpeq4(a, b); - } - - __device__ __forceinline__ VCmpEq4() {} - __device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {} - }; - struct VCmpNe4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vcmpne4(a, b); - } - - __device__ __forceinline__ VCmpNe4() {} - __device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {} - }; - struct VCmpLt4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vcmplt4(a, b); - } - - __device__ __forceinline__ VCmpLt4() {} - __device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {} - }; - struct VCmpLe4 : binary_function - { - __device__ __forceinline__ uint operator ()(uint a, uint b) const - { - return vcmple4(a, b); - } - - __device__ __forceinline__ VCmpLe4() {} - __device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {} - }; - - //////////////////////////////////// - - template - struct Cmp : binary_function - { - __device__ __forceinline__ uchar operator()(T a, T b) const - { - Op op; - return -op(a, b); - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits - { - }; - template <> struct TransformFunctorTraits< arithm::VCmpNe4 > : arithm::ArithmFuncTraits - { - }; - template <> struct TransformFunctorTraits< arithm::VCmpLt4 > : arithm::ArithmFuncTraits - { - }; - template <> struct TransformFunctorTraits< arithm::VCmpLe4 > : arithm::ArithmFuncTraits - { - }; - - //////////////////////////////////// - - template struct TransformFunctorTraits< arithm::Cmp > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - void cmpMatEq_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream); - } - void cmpMatNe_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream); - } - void cmpMatLt_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream); - } - void cmpMatLe_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream) - { - cudev::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream); - } - - template